"""
Utility functions for manipulating iterables, lists, and sequences.
The :func:`chunks` function splits a list into smaller parts. There are different strategies for how to do this.
The :func:`flatten` function take a list of lists and removes the inner lists. This
only removes one level of nesting.
The :func:`iterable` function checks if an object is iterable or not. Similar to the
:func:`callable` builtin function.
The :func:`argmax`, :func:`argmin`, and :func:`argsort` work similarly to the
analogous :mod:`numpy` functions, except they operate on dictionaries and other
Python builtin types.
The :func:`take` and :func:`compress` are generators, and also similar to their
lesser known, but very useful numpy equivalents.
There are also other numpy inspired functions: :func:`unique`,
:func:`argunique`, :func:`unique_flags`, and :func:`boolmask`.
"""
import itertools as it
import math
import operator
from collections import abc as collections_abc
from itertools import zip_longest
from ubelt import util_const
from ubelt import util_dict
__all__ = [
'allsame', 'argmax', 'argmin', 'argsort', 'argunique', 'boolmask',
'chunks', 'compress', 'flatten', 'iter_window', 'iterable', 'peek', 'take',
'unique', 'unique_flags',
]
[docs]
class chunks(object):
"""
Generates successive n-sized chunks from ``items``.
If the last chunk has less than n elements, ``bordermode`` is used to
determine fill values.
Note:
FIXME:
When nchunks is given, that's how many chunks we should get
but the issue is that chunksize is not well defined in that instance
For instance how do we turn a list with 4 elements into 3 chunks
where does the extra item go?
In ubelt <= 0.10.3 there is a bug when specifying nchunks,
where it chooses a chunksize that is too large. Specify
``legacy=True`` to get the old buggy behavior if needed.
Notes:
This is similar to functionality provided by
:func:`more_itertools.chunked`,
:func:`more_itertools.chunked_even`,
:func:`more_itertools.sliced`,
:func:`more_itertools.divide`,
Yields:
List[T]:
subsequent non-overlapping chunks of the input items
Attributes:
remainder (int): number of leftover items that don't divide cleanly
References:
.. [SO_434287] http://stackoverflow.com/questions/434287/iterate-over-a-list-in-chunks
Example:
>>> import ubelt as ub
>>> items = '1234567'
>>> genresult = ub.chunks(items, chunksize=3)
>>> list(genresult)
[['1', '2', '3'], ['4', '5', '6'], ['7']]
Example:
>>> import ubelt as ub
>>> items = [1, 2, 3, 4, 5, 6, 7]
>>> genresult = ub.chunks(items, chunksize=3, bordermode='none')
>>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7]]
>>> genresult = ub.chunks(items, chunksize=3, bordermode='cycle')
>>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7, 1, 2]]
>>> genresult = ub.chunks(items, chunksize=3, bordermode='replicate')
>>> assert list(genresult) == [[1, 2, 3], [4, 5, 6], [7, 7, 7]]
Example:
>>> import ubelt as ub
>>> assert len(list(ub.chunks(range(2), nchunks=2))) == 2
>>> assert len(list(ub.chunks(range(3), nchunks=2))) == 2
>>> # Note: ub.chunks will not do the 2,1,1 split
>>> assert len(list(ub.chunks(range(4), nchunks=3))) == 3
>>> assert len(list(ub.chunks([], 2, bordermode='none'))) == 0
>>> assert len(list(ub.chunks([], 2, bordermode='cycle'))) == 0
>>> assert len(list(ub.chunks([], 2, None, bordermode='replicate'))) == 0
Example:
>>> from ubelt.util_list import * # NOQA
>>> def _check_len(self):
... assert len(self) == len(list(self))
>>> _check_len(chunks(list(range(3)), nchunks=2))
>>> _check_len(chunks(list(range(2)), nchunks=2))
>>> _check_len(chunks(list(range(2)), nchunks=3))
Example:
>>> from ubelt.util_list import * # NOQA
>>> import pytest
>>> assert pytest.raises(ValueError, chunks, range(9))
>>> assert pytest.raises(ValueError, chunks, range(9), chunksize=2, nchunks=2)
>>> assert pytest.raises(TypeError, len, chunks((_ for _ in range(2)), 2))
Example:
>>> from ubelt.util_list import * # NOQA
>>> import ubelt as ub
>>> basis = {
>>> 'legacy': [False, True],
>>> 'chunker': [{'nchunks': 3}, {'nchunks': 4}, {'nchunks': 5}, {'nchunks': 7}, {'chunksize': 3}],
>>> 'items': [range(2), range(4), range(5), range(7), range(9)],
>>> 'bordermode': ['none', 'cycle', 'replicate'],
>>> }
>>> grid_items = list(ub.named_product(basis))
>>> rows = []
>>> for grid_item in ub.ProgIter(grid_items):
>>> chunker = grid_item.get('chunker')
>>> grid_item.update(chunker)
>>> kw = ub.dict_diff(grid_item, {'chunker'})
>>> self = chunk_iter = ub.chunks(**kw)
>>> chunked = list(chunk_iter)
>>> chunk_lens = list(map(len, chunked))
>>> row = ub.dict_union(grid_item, {'chunk_lens': chunk_lens, 'chunks': chunked})
>>> row['chunker'] = str(row['chunker'])
>>> if not row['legacy'] and 'nchunks' in kw:
>>> assert kw['nchunks'] == row['nchunks']
>>> row.update(chunk_iter.__dict__)
>>> rows.append(row)
>>> # xdoctest: +SKIP
>>> import pandas as pd
>>> df = pd.DataFrame(rows)
>>> for _, subdf in df.groupby('chunker'):
>>> print(subdf)
"""
def __init__(self, items, chunksize=None, nchunks=None, total=None,
bordermode='none', legacy=False):
"""
Args:
items (Iterable): input to iterate over
chunksize (int | None): size of each sublist yielded
nchunks (int | None): number of chunks to create (
cannot be specified if chunksize is specified)
bordermode (str): determines how to handle the last case if the
length of the input is not divisible by chunksize valid values
are: {'none', 'cycle', 'replicate'}
total (int | None): hints about the length of the input
legacy (bool):
if True use old behavior, defaults to False. This will be
removed in the future.
"""
if nchunks is not None and chunksize is not None: # nocover
raise ValueError('Cannot specify both chunksize and nchunks')
if nchunks is None and chunksize is None: # nocover
raise ValueError('Must specify either chunksize or nchunks')
if total is None:
try:
total = len(items)
except TypeError:
pass # iterators dont know len
if bordermode is None: # nocover
bordermode = 'none'
if nchunks is None:
if total is not None:
nchunks = int(math.ceil(total / chunksize))
remainder = 0
else:
if total is None:
raise ValueError(
'Need to specify total to use nchunks on an iterable '
'without length hints')
if legacy:
chunksize: int = int(math.ceil(total / nchunks))
remainder = 0
else:
if bordermode == 'none':
# I feel like this could be simpler
chunksize: int = max(int(math.floor(total / nchunks)), 1)
nchunks: int = min(int(math.ceil(total / chunksize)), nchunks)
chunked_total: int = chunksize * nchunks
remainder: int = total - chunked_total
else:
# not working
chunksize: int = max(int(math.ceil(total / nchunks)), 1)
# Can artificially extend the size in this case
# total = chunksize * nchunks
remainder = 0
self.legacy = legacy
self.remainder: int = remainder
self.items = items
self.total = total
self.nchunks = nchunks
self.chunksize = chunksize
self.bordermode = bordermode
def __len__(self):
if self.nchunks is None:
raise TypeError('length is unknown')
return self.nchunks
def __iter__(self):
bordermode = self.bordermode
items = self.items
chunksize = self.chunksize
if not self.legacy and self.nchunks is not None:
return self._new_iterator()
else:
if bordermode is None or bordermode == 'none':
return self.noborder(items, chunksize)
elif bordermode == 'cycle':
return self.cycle(items, chunksize)
elif bordermode == 'replicate':
return self.replicate(items, chunksize)
else:
raise ValueError('unknown bordermode=%r' % (bordermode,))
[docs]
def _new_iterator(self):
chunksize = self.chunksize
nchunks = self.nchunks
chunksize = self.chunksize
remainder = self.remainder
if self.bordermode == 'cycle':
iterator = it.cycle(iter(self.items))
elif self.bordermode == 'replicate':
def replicator(items):
for item in items:
yield item
while True:
yield item
iterator = replicator(iter(self.items))
elif self.bordermode == 'none':
iterator = iter(self.items)
else:
raise KeyError(self.bordermode)
# Build an iterator that describes how big each chunk will be
if remainder:
# TODO:
# handle replicate and cycle border modes
# TODO:
# benchmark different methods
chunksize_iter = it.chain(
it.repeat(chunksize + 1, remainder),
it.repeat(chunksize, nchunks - remainder)
)
else:
chunksize_iter = it.repeat(chunksize, nchunks)
for _chunksize in chunksize_iter:
chunk = list(it.islice(iterator, _chunksize))
# if chunk:
yield chunk
[docs]
@staticmethod
def noborder(items, chunksize):
# feed the same iter to zip_longest multiple times, this causes it to
# consume successive values of the same sequence
sentinel = object()
copied_iters = [iter(items)] * chunksize
chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel)
# Dont fill empty space in the last chunk, just return it as is
for chunk in chunks_with_sentinals:
yield [item for item in chunk if item is not sentinel]
[docs]
@staticmethod
def cycle(items, chunksize):
sentinel = object()
copied_iters = [iter(items)] * chunksize
chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel)
# Fill empty space in the last chunk with values from the beginning
bordervalues = it.cycle(iter(items))
for chunk in chunks_with_sentinals:
yield [item if item is not sentinel else next(bordervalues)
for item in chunk]
[docs]
@staticmethod
def replicate(items, chunksize):
sentinel = object()
copied_iters = [iter(items)] * chunksize
# Fill empty space in the last chunk by replicating the last value
chunks_with_sentinals = zip_longest(*copied_iters, fillvalue=sentinel)
for chunk in chunks_with_sentinals:
filt_chunk = [item for item in chunk if item is not sentinel]
if len(filt_chunk) == chunksize:
yield filt_chunk
else:
sizediff = (chunksize - len(filt_chunk))
padded_chunk = filt_chunk + [filt_chunk[-1]] * sizediff
yield padded_chunk
[docs]
def iterable(obj, strok=False):
"""
Checks if the input implements the iterator interface. An exception is made
for strings, which return False unless ``strok`` is True
Args:
obj (object): a scalar or iterable input
strok (bool):
if True allow strings to be interpreted as iterable.
Defaults to False.
Returns:
bool: True if the input is iterable
Example:
>>> import ubelt as ub
>>> obj_list = [3, [3], '3', (3,), [3, 4, 5], {}]
>>> result = [ub.iterable(obj) for obj in obj_list]
>>> assert result == [False, True, False, True, True, True]
>>> result = [ub.iterable(obj, strok=True) for obj in obj_list]
>>> assert result == [False, True, True, True, True, True]
"""
try:
iter(obj)
except Exception:
return False
else:
return strok or not isinstance(obj, str)
[docs]
def take(items, indices, default=util_const.NoParam):
"""
Lookup a subset of an indexable object using a sequence of indices.
The ``items`` input is usually a list or dictionary. When ``items`` is a
list, this should be a sequence of integers. When ``items`` is a dict, this
is a list of keys to lookup in that dictionary.
For dictionaries, a default may be specified as a placeholder to use if a
key from ``indices`` is not in ``items``.
Args:
items (Sequence[VT] | Mapping[KT, VT]):
An indexable object to select items from.
indices (Iterable[int | KT]):
A sequence of indexes into ``items``.
default (Any | NoParamType):
if specified ``items`` must support the ``get`` method and
this will be used as the default value.
Yields:
VT: a selected item within the list
SeeAlso:
:func:`ubelt.dict_subset`
Note:
``ub.take(items, indices)`` is equivalent to
``(items[i] for i in indices)`` when ``default`` is unspecified.
Notes:
This is based on the :func:`numpy.take` function, but written in pure
python.
Do not confuse this with :func:`more_itertools.take`, the behavior is
very different.
Example:
>>> import ubelt as ub
>>> items = [0, 1, 2, 3]
>>> indices = [2, 0]
>>> list(ub.take(items, indices))
[2, 0]
Example:
>>> import ubelt as ub
>>> dict_ = {1: 'a', 2: 'b', 3: 'c'}
>>> keys = [1, 2, 3, 4, 5]
>>> result = list(ub.take(dict_, keys, None))
>>> assert result == ['a', 'b', 'c', None, None]
Example:
>>> import ubelt as ub
>>> dict_ = {1: 'a', 2: 'b', 3: 'c'}
>>> keys = [1, 2, 3, 4, 5]
>>> try:
>>> print(list(ub.take(dict_, keys)))
>>> raise AssertionError('did not get key error')
>>> except KeyError:
>>> print('correctly got key error')
"""
if default is util_const.NoParam:
for index in indices:
yield items[index]
else:
for index in indices:
yield items.get(index, default)
[docs]
def compress(items, flags):
"""
Selects from ``items`` where the corresponding value in ``flags`` is True.
Args:
items (Iterable[Any]): a sequence to select items from
flags (Iterable[bool]): corresponding sequence of bools
Returns:
Iterable[Any]: a subset of masked items
Notes:
This function is based on :func:`numpy.compress`, but is pure Python
and swaps the condition and array argument to be consistent with
:func:`ubelt.take`.
This is equivalent to :func:`itertools.compress`.
Example:
>>> import ubelt as ub
>>> items = [1, 2, 3, 4, 5]
>>> flags = [False, True, True, False, True]
>>> list(ub.compress(items, flags))
[2, 3, 5]
"""
return it.compress(items, flags)
[docs]
def flatten(nested):
"""
Transforms a nested iterable into a flat iterable.
Args:
nested (Iterable[Iterable[Any]]): list of lists
Returns:
Iterable[Any]: flattened items
Notes:
Equivalent to :func:`more_itertools.flatten` and
:func:`itertools.chain.from_iterable`.
Example:
>>> import ubelt as ub
>>> nested = [['a', 'b'], ['c', 'd']]
>>> list(ub.flatten(nested))
['a', 'b', 'c', 'd']
"""
return it.chain.from_iterable(nested)
[docs]
def unique(items, key=None):
"""
Generates unique items in the order they appear.
Args:
items (Iterable[T]): list of items
key (Callable[[T], Any] | None):
Custom normalization function.
If specified, this function generates items where ``key(item)`` is
unique.
Yields:
T:
a unique item from the input sequence
Notes:
Functionally equivalent to :func:`more_itertools.unique_everseen`.
Example:
>>> import ubelt as ub
>>> items = [4, 6, 6, 0, 6, 1, 0, 2, 2, 1]
>>> unique_items = list(ub.unique(items))
>>> assert unique_items == [4, 6, 0, 1, 2]
Example:
>>> import ubelt as ub
>>> items = ['A', 'a', 'b', 'B', 'C', 'c', 'D', 'e', 'D', 'E']
>>> unique_items = list(ub.unique(items, key=str.lower))
>>> assert unique_items == ['A', 'b', 'C', 'D', 'e']
>>> unique_items = list(ub.unique(items))
>>> assert unique_items == ['A', 'a', 'b', 'B', 'C', 'c', 'D', 'e', 'E']
"""
seen = set()
if key is None:
for item in items:
if item not in seen:
seen.add(item)
yield item
else:
for item in items:
norm = key(item)
if norm not in seen:
seen.add(norm)
yield item
[docs]
def argunique(items, key=None):
"""
Returns indices corresponding to the first instance of each unique item.
Args:
items (Sequence[VT]): indexable collection of items
key (Callable[[VT], Any] | None):
Custom normalization function.
If specified, this function generates indexes where
``key(item[index])`` is unique.
Returns:
Iterator[int] : indices of the unique items
Example:
>>> import ubelt as ub
>>> items = [0, 2, 5, 1, 1, 0, 2, 4]
>>> indices = list(ub.argunique(items))
>>> assert indices == [0, 1, 2, 3, 7]
>>> indices = list(ub.argunique(items, key=lambda x: x % 2 == 0))
>>> assert indices == [0, 2]
"""
if key is None:
return unique(range(len(items)), key=lambda i: items[i])
else:
return unique(range(len(items)), key=lambda i: key(items[i]))
[docs]
def unique_flags(items, key=None):
"""
Returns a list of booleans corresponding to the first instance of each
unique item.
Args:
items (Sequence[VT]): indexable collection of items
key (Callable[[VT], Any] | None):
Custom normalization function.
If specified generates True if ``key(item)`` is unique and False
otherwise.
Returns:
List[bool] : flags the items that are unique
Example:
>>> import ubelt as ub
>>> items = [0, 2, 1, 1, 0, 9, 2]
>>> flags = ub.unique_flags(items)
>>> assert flags == [True, True, True, False, False, True, False]
>>> flags = ub.unique_flags(items, key=lambda x: x % 2 == 0)
>>> assert flags == [True, False, True, False, False, False, False]
"""
len_ = len(items)
if key is None:
item_to_index = dict(zip(reversed(items), reversed(range(len_))))
indices = item_to_index.values()
else:
indices = argunique(items, key=key)
flags = boolmask(indices, len_)
return flags
[docs]
def boolmask(indices, maxval=None):
"""
Constructs a list of booleans where an item is True if its position is in
``indices`` otherwise it is False.
Args:
indices (List[int]): list of integer indices
maxval (int | None):
length of the returned list. If not specified this is inferred
using ``max(indices)``
Returns:
List[bool]:
mask - a list of booleans. mask[idx] is True if idx in indices
Note:
In the future the arg ``maxval`` may change its name to ``shape``
Example:
>>> import ubelt as ub
>>> indices = [0, 1, 4]
>>> mask = ub.boolmask(indices, maxval=6)
>>> assert mask == [True, True, False, False, True, False]
>>> mask = ub.boolmask(indices)
>>> assert mask == [True, True, False, False, True]
"""
if maxval is None:
indices = list(indices)
maxval = max(indices) + 1
mask = [False] * maxval
for index in indices:
mask[index] = True
return mask
[docs]
def iter_window(iterable, size=2, step=1, wrap=False):
"""
Iterates through iterable with a window size. This is essentially a 1D
sliding window.
Args:
iterable (Iterable[T]): an iterable sequence
size (int): Sliding window size. Defaults to 2.
step (int): Sliding step size. Default to 1.
wrap (bool):
If True, the last window will "wrap-around" to include items from
the start of the input sequence in order to always produce
consistently sized chunks. Otherwise, the last chunk may be smaller
if there are not enough items in the sequence.. Defaults to False.
Returns:
Iterable[T]: returns a possibly overlapping windows in a sequence
Notes:
Similar to :func:`more_itertools.windowed`,
Similar to :func:`more_itertools.pairwise`,
Similar to :func:`more_itertools.triplewise`,
Similar to :func:`more_itertools.sliding_window`
Example:
>>> import ubelt as ub
>>> iterable = [1, 2, 3, 4, 5, 6]
>>> size, step, wrap = 3, 1, True
>>> window_iter = ub.iter_window(iterable, size, step, wrap)
>>> window_list = list(window_iter)
>>> print('window_list = %r' % (window_list,))
window_list = [(1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6), (5, 6, 1), (6, 1, 2)]
Example:
>>> import ubelt as ub
>>> iterable = [1, 2, 3, 4, 5, 6]
>>> size, step, wrap = 3, 2, True
>>> window_iter = ub.iter_window(iterable, size, step, wrap)
>>> window_list = list(window_iter)
>>> print('window_list = {!r}'.format(window_list))
window_list = [(1, 2, 3), (3, 4, 5), (5, 6, 1)]
Example:
>>> import ubelt as ub
>>> iterable = [1, 2, 3, 4, 5, 6]
>>> size, step, wrap = 3, 2, False
>>> window_iter = ub.iter_window(iterable, size, step, wrap)
>>> window_list = list(window_iter)
>>> print('window_list = {!r}'.format(window_list))
window_list = [(1, 2, 3), (3, 4, 5)]
Example:
>>> import ubelt as ub
>>> iterable = []
>>> size, step, wrap = 3, 2, False
>>> window_iter = ub.iter_window(iterable, size, step, wrap)
>>> window_list = list(window_iter)
>>> print('window_list = {!r}'.format(window_list))
window_list = []
"""
# it.tee may be slow, but works on all iterables
iter_list = it.tee(iterable, size)
if wrap:
# Secondary iterables need to be cycled for wraparound
iter_list = [iter_list[0]] + list(map(it.cycle, iter_list[1:]))
# Step each iterator the appropriate number of times
try:
for count, iter_ in enumerate(iter_list[1:], start=1):
for _ in range(count):
next(iter_)
except StopIteration:
return iter(())
else:
_window_iter = zip(*iter_list)
# Account for the step size
window_iter = it.islice(_window_iter, 0, None, step)
return window_iter
[docs]
def allsame(iterable, eq=operator.eq):
"""
Determine if all items in a sequence are the same
Args:
iterable (Iterable[T]):
items to determine if they are all the same
eq (Callable[[T, T], bool]):
function used to test for equality.
Defaults to :func:`operator.eq`.
Returns:
bool: True if all items are equal, otherwise False
Notes:
Similar to :func:`more_itertools.all_equal`
Example:
>>> import ubelt as ub
>>> ub.allsame([1, 1, 1, 1])
True
>>> ub.allsame([])
True
>>> ub.allsame([0, 1])
False
>>> iterable = iter([0, 1, 1, 1])
>>> next(iterable)
>>> ub.allsame(iterable)
True
>>> ub.allsame(range(10))
False
>>> ub.allsame(range(10), lambda a, b: True)
True
"""
iter_ = iter(iterable)
try:
first = next(iter_)
except StopIteration:
return True
return all(eq(first, item) for item in iter_)
[docs]
def argsort(indexable, key=None, reverse=False):
"""
Returns the indices that would sort a indexable object.
This is similar to :func:`numpy.argsort`, but it is written in pure python
and works on both lists and dictionaries.
Args:
indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by
key (Callable[[VT], VT] | None):
If specified, customizes the ordering of the indexable.
reverse (bool): if True returns in descending order. Default to False.
Returns:
List[int] | List[KT]:
indices - list of indices that sorts the indexable
Example:
>>> import ubelt as ub
>>> # argsort works on dicts by returning keys
>>> dict_ = {'a': 3, 'b': 2, 'c': 100}
>>> indices = ub.argsort(dict_)
>>> assert list(ub.take(dict_, indices)) == sorted(dict_.values())
>>> # argsort works on lists by returning indices
>>> indexable = [100, 2, 432, 10]
>>> indices = ub.argsort(indexable)
>>> assert list(ub.take(indexable, indices)) == sorted(indexable)
>>> # Can use iterators, but be careful. It exhausts them.
>>> indexable = reversed(range(100))
>>> indices = ub.argsort(indexable)
>>> assert indices[0] == 99
>>> # Can use key just like sorted
>>> indexable = [[0, 1, 2], [3, 4], [5]]
>>> indices = ub.argsort(indexable, key=len)
>>> assert indices == [2, 1, 0]
>>> # Can use reverse just like sorted
>>> indexable = [0, 2, 1]
>>> indices = ub.argsort(indexable, reverse=True)
>>> assert indices == [1, 2, 0]
"""
# Create an iterator of value/key pairs
if isinstance(indexable, collections_abc.Mapping):
vk_iter = ((v, k) for k, v in indexable.items())
else:
vk_iter = ((v, k) for k, v in enumerate(indexable))
# Sort by values and extract the indices
if key is None:
indices = [k for v, k in sorted(vk_iter, reverse=reverse)]
else:
# If key is provided, call it using the value as input
indices = [k for v, k in sorted(vk_iter, key=lambda vk: key(vk[0]),
reverse=reverse)]
return indices
[docs]
def argmax(indexable, key=None):
"""
Returns index / key of the item with the largest value.
This is similar to :func:`numpy.argmax`, but it is written in pure python
and works on both lists and dictionaries.
Args:
indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by
key (Callable[[VT], Any] | None):
If specified, customizes the ordering of the indexable
Returns:
int | KT: the index of the item with the maximum value.
Example:
>>> import ubelt as ub
>>> assert ub.argmax({'a': 3, 'b': 2, 'c': 100}) == 'c'
>>> assert ub.argmax(['a', 'c', 'b', 'z', 'f']) == 3
>>> assert ub.argmax([[0, 1], [2, 3, 4], [5]], key=len) == 1
>>> assert ub.argmax({'a': 3, 'b': 2, 3: 100, 4: 4}) == 3
>>> assert ub.argmax(iter(['a', 'c', 'b', 'z', 'f'])) == 3
"""
if key is None and isinstance(indexable, collections_abc.Mapping):
return max(indexable.items(), key=operator.itemgetter(1))[0]
elif hasattr(indexable, 'index'):
if key is None:
return indexable.index(max(indexable))
else:
return indexable.index(max(indexable, key=key))
else:
# less efficient, but catch all solution
return argsort(indexable, key=key)[-1]
[docs]
def argmin(indexable, key=None):
"""
Returns index / key of the item with the smallest value.
This is similar to :func:`numpy.argmin`, but it is written in pure python
and works on both lists and dictionaries.
Args:
indexable (Iterable[VT] | Mapping[KT, VT]): indexable to sort by
key (Callable[[VT], VT] | None):
If specified, customizes the ordering of the indexable.
Returns:
int | KT: the index of the item with the minimum value.
Example:
>>> import ubelt as ub
>>> assert ub.argmin({'a': 3, 'b': 2, 'c': 100}) == 'b'
>>> assert ub.argmin(['a', 'c', 'b', 'z', 'f']) == 0
>>> assert ub.argmin([[0, 1], [2, 3, 4], [5]], key=len) == 2
>>> assert ub.argmin({'a': 3, 'b': 2, 3: 100, 4: 4}) == 'b'
>>> assert ub.argmin(iter(['a', 'c', 'A', 'z', 'f'])) == 2
"""
if key is None and isinstance(indexable, collections_abc.Mapping):
return min(indexable.items(), key=operator.itemgetter(1))[0]
elif hasattr(indexable, 'index'):
if key is None:
return indexable.index(min(indexable))
else:
return indexable.index(min(indexable, key=key))
else:
# less efficient, but catch all solution
return argsort(indexable, key=key)[0]
[docs]
def peek(iterable, default=util_const.NoParam):
"""
Look at the first item of an iterable. If the input is an iterator, then
the next element is exhausted (i.e. a pop operation).
Args:
iterable (Iterable[T]): an iterable
default (T): default item to return if the iterable is empty,
otherwise a StopIteration error is raised
Returns:
T: item - the first item of ordered sequence, a popped item from an
iterator, or an arbitrary item from an unordered collection.
Notes:
Similar to :func:`more_itertools.peekable`
Example:
>>> import ubelt as ub
>>> data = [0, 1, 2]
>>> ub.peek(data)
0
>>> iterator = iter(data)
>>> print(ub.peek(iterator))
0
>>> print(ub.peek(iterator))
1
>>> print(ub.peek(iterator))
2
>>> ub.peek(range(3))
0
>>> ub.peek([], 3)
3
"""
if default is util_const.NoParam:
return next(iter(iterable))
else:
return next(iter(iterable), default)
# Stubs for potential future object oriented wrappers
class IterableMixin:
"""
"""
unique = unique
# chunks = chunks
histogram = util_dict.dict_hist
duplicates = util_dict.find_duplicates
group = util_dict.group_items
def chunks(self, size=None, num=None, bordermode='none'):
return chunks(self, chunksize=size, nchunks=num, total=len(self), bordermode=bordermode)
# def histogram(self, weights=None, ordered=False, labels=None):
# util_dict.dict_hist.__doc__
# return util_dict.dict_hist(self, weights=weights, ordered=ordered)
# def duplicates(self, k=2, key=None):
# util_dict.find_duplicates.__doc__
# return util_dict.find_duplicates(self, k=k, key=key)
# def group(self, key):
# util_dict.group_items.__doc__
# return util_dict.group_items(self, key=key)
class OrderedIterableMixin(IterableMixin):
compress = compress
argunique = argunique
window = iter_window
class UList(list, OrderedIterableMixin):
"""
An extended list class that features additional helper methods.
Example:
>>> from ubelt.util_list import UList
>>> self = UList()
>>> self.append(1)
>>> self += UList([1, 2, 3])
>>> self += UList([5, 7])
>>> #
>>> print(f'unique: {list(self.unique())}')
>>> print(f'argunique: {list(self.argunique())}')
>>> #
>>> print(f'chunks: {list(self.chunks(num=2))}')
>>> print(f'chunks: {list(self.chunks(size=2))}')
>>> #
>>> print(f'window: {list(self.window(3))}')
>>> #
>>> print(f'take: {list(self.take([0, 2, 3]))}')
>>> print(f'compress: {list(self.compress([0, 1, 0, 1]))}')
>>> #
>>> print(f'argsort: {self.argsort()}')
>>> print(f'argmax: {self.argmax()}')
>>> print(f'argmin: {self.argmin()}')
>>> print(f'flatten: {list(UList([self, [2, 3, 3]]).flatten())}')
>>> print(f'allsame: {self.allsame()}')
>>> print(f'peek: {self.peek()}')
>>> print(f'histogram: {self.histogram()}')
>>> print(f'group: {self.group(key=lambda x: x % 2)}')
>>> print(f'duplicates: {self.duplicates()}')
"""
peek = peek
take = take
flatten = flatten
allsame = allsame
argsort = argsort
argmax = argmax
argmin = argmin
# class USet(set, IterableMixin):
# ...
# class Set(set, IterableMixin):
# ...