| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """ |
| High-level access to HDF5 dataspace selections |
| """ |
|
|
| import numpy as np |
|
|
| from .base import product |
| from .. import h5s, h5r, _selector |
|
|
| def select(shape, args, dataset=None): |
| """ High-level routine to generate a selection from arbitrary arguments |
| to __getitem__. The arguments should be the following: |
| |
| shape |
| Shape of the "source" dataspace. |
| |
| args |
| Either a single argument or a tuple of arguments. See below for |
| supported classes of argument. |
| |
| dataset |
| A h5py.Dataset instance representing the source dataset. |
| |
| Argument classes: |
| |
| Single Selection instance |
| Returns the argument. |
| |
| numpy.ndarray |
| Must be a boolean mask. Returns a PointSelection instance. |
| |
| RegionReference |
| Returns a Selection instance. |
| |
| Indices, slices, ellipses, MultiBlockSlices only |
| Returns a SimpleSelection instance |
| |
| Indices, slices, ellipses, lists or boolean index arrays |
| Returns a FancySelection instance. |
| """ |
| if not isinstance(args, tuple): |
| args = (args,) |
|
|
| |
| if len(args) == 1: |
|
|
| arg = args[0] |
| if isinstance(arg, Selection): |
| if arg.shape != shape: |
| raise TypeError("Mismatched selection shape") |
| return arg |
|
|
| elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b': |
| if arg.shape != shape: |
| raise TypeError("Boolean indexing array has incompatible shape") |
| return PointSelection.from_mask(arg) |
|
|
| elif isinstance(arg, h5r.RegionReference): |
| if dataset is None: |
| raise TypeError("Cannot apply a region reference without a dataset") |
| sid = h5r.get_region(arg, dataset.id) |
| if shape != sid.shape: |
| raise TypeError("Reference shape does not match dataset shape") |
|
|
| return Selection(shape, spaceid=sid) |
|
|
| if dataset is not None: |
| selector = dataset._selector |
| else: |
| space = h5s.create_simple(shape) |
| selector = _selector.Selector(space) |
|
|
| return selector.make_selection(args) |
|
|
|
|
| class Selection: |
|
|
| """ |
| Base class for HDF5 dataspace selections. Subclasses support the |
| "selection protocol", which means they have at least the following |
| members: |
| |
| __init__(shape) => Create a new selection on "shape"-tuple |
| __getitem__(args) => Perform a selection with the range specified. |
| What args are allowed depends on the |
| particular subclass in use. |
| |
| id (read-only) => h5py.h5s.SpaceID instance |
| shape (read-only) => The shape of the dataspace. |
| mshape (read-only) => The shape of the selection region. |
| Not guaranteed to fit within "shape", although |
| the total number of points is less than |
| product(shape). |
| nselect (read-only) => Number of selected points. Always equal to |
| product(mshape). |
| |
| broadcast(target_shape) => Return an iterable which yields dataspaces |
| for read, based on target_shape. |
| |
| The base class represents "unshaped" selections (1-D). |
| """ |
|
|
| def __init__(self, shape, spaceid=None): |
| """ Create a selection. Shape may be None if spaceid is given. """ |
| if spaceid is not None: |
| self._id = spaceid |
| self._shape = spaceid.shape |
| else: |
| shape = tuple(shape) |
| self._shape = shape |
| self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape)) |
| self._id.select_all() |
|
|
| @property |
| def id(self): |
| """ SpaceID instance """ |
| return self._id |
|
|
| @property |
| def shape(self): |
| """ Shape of whole dataspace """ |
| return self._shape |
|
|
| @property |
| def nselect(self): |
| """ Number of elements currently selected """ |
| return self._id.get_select_npoints() |
|
|
| @property |
| def mshape(self): |
| """ Shape of selection (always 1-D for this class) """ |
| return (self.nselect,) |
|
|
| @property |
| def array_shape(self): |
| """Shape of array to read/write (always 1-D for this class)""" |
| return self.mshape |
|
|
| |
| def expand_shape(self, source_shape): |
| if product(source_shape) != self.nselect: |
| raise TypeError("Broadcasting is not supported for point-wise selections") |
| return source_shape |
|
|
| def broadcast(self, source_shape): |
| """ Get an iterable for broadcasting """ |
| if product(source_shape) != self.nselect: |
| raise TypeError("Broadcasting is not supported for point-wise selections") |
| yield self._id |
|
|
| def __getitem__(self, args): |
| raise NotImplementedError("This class does not support indexing") |
|
|
| class PointSelection(Selection): |
|
|
| """ |
| Represents a point-wise selection. You can supply sequences of |
| points to the three methods append(), prepend() and set(), or |
| instantiate it with a single boolean array using from_mask(). |
| """ |
| def __init__(self, shape, spaceid=None, points=None): |
| super().__init__(shape, spaceid) |
| if points is not None: |
| self._perform_selection(points, h5s.SELECT_SET) |
|
|
| def _perform_selection(self, points, op): |
| """ Internal method which actually performs the selection """ |
| points = np.asarray(points, order='C', dtype='u8') |
| if len(points.shape) == 1: |
| points.shape = (1,points.shape[0]) |
|
|
| if self._id.get_select_type() != h5s.SEL_POINTS: |
| op = h5s.SELECT_SET |
|
|
| if len(points) == 0: |
| self._id.select_none() |
| else: |
| self._id.select_elements(points, op) |
|
|
| @classmethod |
| def from_mask(cls, mask, spaceid=None): |
| """Create a point-wise selection from a NumPy boolean array """ |
| if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'): |
| raise TypeError("PointSelection.from_mask only works with bool arrays") |
|
|
| points = np.transpose(mask.nonzero()) |
| return cls(mask.shape, spaceid, points=points) |
|
|
| def append(self, points): |
| """ Add the sequence of points to the end of the current selection """ |
| self._perform_selection(points, h5s.SELECT_APPEND) |
|
|
| def prepend(self, points): |
| """ Add the sequence of points to the beginning of the current selection """ |
| self._perform_selection(points, h5s.SELECT_PREPEND) |
|
|
| def set(self, points): |
| """ Replace the current selection with the given sequence of points""" |
| self._perform_selection(points, h5s.SELECT_SET) |
|
|
|
|
| class SimpleSelection(Selection): |
|
|
| """ A single "rectangular" (regular) selection composed of only slices |
| and integer arguments. Can participate in broadcasting. |
| """ |
|
|
| @property |
| def mshape(self): |
| """ Shape of current selection """ |
| return self._sel[1] |
|
|
| @property |
| def array_shape(self): |
| scalar = self._sel[3] |
| return tuple(x for x, s in zip(self.mshape, scalar) if not s) |
|
|
| def __init__(self, shape, spaceid=None, hyperslab=None): |
| super().__init__(shape, spaceid) |
| if hyperslab is not None: |
| self._sel = hyperslab |
| else: |
| |
| rank = len(self.shape) |
| self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank) |
|
|
| def expand_shape(self, source_shape): |
| """Match the dimensions of an array to be broadcast to the selection |
| |
| The returned shape describes an array of the same size as the input |
| shape, but its dimensions |
| |
| E.g. with a dataset shape (10, 5, 4, 2), writing like this:: |
| |
| ds[..., 0] = np.ones((5, 4)) |
| |
| The source shape (5, 4) will expand to (1, 5, 4, 1). |
| Then the broadcast method below repeats that chunk 10 |
| times to write to an effective shape of (10, 5, 4, 1). |
| """ |
| start, count, step, scalar = self._sel |
|
|
| rank = len(count) |
| remaining_src_dims = list(source_shape) |
|
|
| eshape = [] |
| for idx in range(1, rank + 1): |
| if len(remaining_src_dims) == 0 or scalar[-idx]: |
| eshape.append(1) |
| else: |
| t = remaining_src_dims.pop() |
| if t == 1 or count[-idx] == t: |
| eshape.append(t) |
| else: |
| raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) |
|
|
| if any([n > 1 for n in remaining_src_dims]): |
| |
| |
| raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) |
|
|
| |
| return tuple(eshape[::-1]) |
|
|
|
|
| def broadcast(self, source_shape): |
| """ Return an iterator over target dataspaces for broadcasting. |
| |
| Follows the standard NumPy broadcasting rules against the current |
| selection shape (self.mshape). |
| """ |
| if self.shape == (): |
| if product(source_shape) != 1: |
| raise TypeError("Can't broadcast %s to scalar" % source_shape) |
| self._id.select_all() |
| yield self._id |
| return |
|
|
| start, count, step, scalar = self._sel |
|
|
| rank = len(count) |
| tshape = self.expand_shape(source_shape) |
|
|
| chunks = tuple(x//y for x, y in zip(count, tshape)) |
| nchunks = product(chunks) |
|
|
| if nchunks == 1: |
| yield self._id |
| else: |
| sid = self._id.copy() |
| sid.select_hyperslab((0,)*rank, tshape, step) |
| for idx in range(nchunks): |
| offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start)) |
| sid.offset_simple(offset) |
| yield sid |
|
|
|
|
| class FancySelection(Selection): |
|
|
| """ |
| Implements advanced NumPy-style selection operations in addition to |
| the standard slice-and-int behavior. |
| |
| Indexing arguments may be ints, slices, lists of indices, or |
| per-axis (1D) boolean arrays. |
| |
| Broadcasting is not supported for these selections. |
| """ |
|
|
| @property |
| def mshape(self): |
| return self._mshape |
|
|
| @property |
| def array_shape(self): |
| return self._array_shape |
|
|
| def __init__(self, shape, spaceid=None, mshape=None, array_shape=None): |
| super().__init__(shape, spaceid) |
| if mshape is None: |
| mshape = self.shape |
| if array_shape is None: |
| array_shape = mshape |
| self._mshape = mshape |
| self._array_shape = array_shape |
|
|
| def expand_shape(self, source_shape): |
| if not source_shape == self.array_shape: |
| raise TypeError("Broadcasting is not supported for complex selections") |
| return source_shape |
|
|
| def broadcast(self, source_shape): |
| if not source_shape == self.array_shape: |
| raise TypeError("Broadcasting is not supported for complex selections") |
| yield self._id |
|
|
|
|
| def guess_shape(sid): |
| """ Given a dataspace, try to deduce the shape of the selection. |
| |
| Returns one of: |
| * A tuple with the selection shape, same length as the dataspace |
| * A 1D selection shape for point-based and multiple-hyperslab selections |
| * None, for unselected scalars and for NULL dataspaces |
| """ |
|
|
| sel_class = sid.get_simple_extent_type() |
| sel_type = sid.get_select_type() |
|
|
| if sel_class == h5s.NULL: |
| |
| return None |
|
|
| elif sel_class == h5s.SCALAR: |
| |
| if sel_type == h5s.SEL_NONE: return None |
| if sel_type == h5s.SEL_ALL: return tuple() |
|
|
| elif sel_class != h5s.SIMPLE: |
| raise TypeError("Unrecognized dataspace class %s" % sel_class) |
|
|
| |
|
|
| N = sid.get_select_npoints() |
| rank = len(sid.shape) |
|
|
| if sel_type == h5s.SEL_NONE: |
| return (0,)*rank |
|
|
| elif sel_type == h5s.SEL_ALL: |
| return sid.shape |
|
|
| elif sel_type == h5s.SEL_POINTS: |
| |
| |
| return (N,) |
|
|
| elif sel_type != h5s.SEL_HYPERSLABS: |
| raise TypeError("Unrecognized selection method %s" % sel_type) |
|
|
| |
|
|
| if N == 0: |
| return (0,)*rank |
|
|
| bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds()) |
|
|
| |
| boxshape = topcorner - bottomcorner + np.ones((rank,)) |
|
|
| def get_n_axis(sid, axis): |
| """ Determine the number of elements selected along a particular axis. |
| |
| To do this, we "mask off" the axis by making a hyperslab selection |
| which leaves only the first point along the axis. For a 2D dataset |
| with selection box shape (X, Y), for axis 1, this would leave a |
| selection of shape (X, 1). We count the number of points N_leftover |
| remaining in the selection and compute the axis selection length by |
| N_axis = N/N_leftover. |
| """ |
|
|
| if(boxshape[axis]) == 1: |
| return 1 |
|
|
| start = bottomcorner.copy() |
| start[axis] += 1 |
| count = boxshape.copy() |
| count[axis] -= 1 |
|
|
| |
| masked_sid = sid.copy() |
| masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB) |
|
|
| N_leftover = masked_sid.get_select_npoints() |
|
|
| return N//N_leftover |
|
|
|
|
| shape = tuple(get_n_axis(sid, x) for x in range(rank)) |
|
|
| if np.product(shape) != N: |
| |
| |
| return (N,) |
|
|
| return shape |
|
|