| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """ |
| Implements high-level support for HDF5 file objects. |
| """ |
|
|
| import sys |
| import os |
| from warnings import warn |
|
|
| from .compat import filename_decode, filename_encode |
|
|
| from .base import phil, with_phil |
| from .group import Group |
| from .. import h5, h5f, h5p, h5i, h5fd, _objects |
| from .. import version |
|
|
| mpi = h5.get_config().mpi |
| ros3 = h5.get_config().ros3 |
| direct_vfd = h5.get_config().direct_vfd |
| hdf5_version = version.hdf5_version_tuple[0:3] |
|
|
| swmr_support = False |
| if hdf5_version >= h5.get_config().swmr_min_hdf5_version: |
| swmr_support = True |
|
|
|
|
| libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST} |
| libver_dict_r = dict((y, x) for x, y in libver_dict.items()) |
| if hdf5_version >= (1, 10, 2): |
| libver_dict.update({'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110}) |
| libver_dict_r.update({h5f.LIBVER_V18: 'v108', h5f.LIBVER_V110: 'v110'}) |
|
|
| if hdf5_version >= (1, 11, 4): |
| libver_dict.update({'v112': h5f.LIBVER_V112}) |
| libver_dict_r.update({h5f.LIBVER_V112: 'v112'}) |
|
|
| if hdf5_version >= (1, 13, 0): |
| libver_dict.update({'v114': h5f.LIBVER_V114}) |
| libver_dict_r.update({h5f.LIBVER_V114: 'v114'}) |
|
|
|
|
| def _set_fapl_mpio(plist, **kwargs): |
| """Set file access property list for mpio driver""" |
| if not mpi: |
| raise ValueError("h5py was built without MPI support, can't use mpio driver") |
|
|
| import mpi4py.MPI |
| kwargs.setdefault('info', mpi4py.MPI.Info()) |
| plist.set_fapl_mpio(**kwargs) |
|
|
|
|
| def _set_fapl_fileobj(plist, **kwargs): |
| """Set the Python file object driver in a file access property list""" |
| plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj')) |
|
|
|
|
| _drivers = { |
| 'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs), |
| 'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs), |
| 'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs), |
| 'family': lambda plist, **kwargs: plist.set_fapl_family( |
| memb_fapl=plist.copy(), |
| **kwargs |
| ), |
| 'mpio': _set_fapl_mpio, |
| 'fileobj': _set_fapl_fileobj, |
| 'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs), |
| } |
|
|
| if ros3: |
| _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs) |
|
|
| if direct_vfd: |
| _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) |
|
|
|
|
| def register_driver(name, set_fapl): |
| """Register a custom driver. |
| |
| Parameters |
| ---------- |
| name : str |
| The name of the driver. |
| set_fapl : callable[PropFAID, **kwargs] -> NoneType |
| The function to set the fapl to use your custom driver. |
| """ |
| _drivers[name] = set_fapl |
|
|
|
|
| def unregister_driver(name): |
| """Unregister a custom driver. |
| |
| Parameters |
| ---------- |
| name : str |
| The name of the driver. |
| """ |
| del _drivers[name] |
|
|
|
|
| def registered_drivers(): |
| """Return a frozenset of the names of all of the registered drivers. |
| """ |
| return frozenset(_drivers) |
|
|
|
|
| def make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, locking, |
| page_buf_size, min_meta_keep, min_raw_keep, |
| alignment_threshold, alignment_interval, meta_block_size, |
| **kwds): |
| """ Set up a file access property list """ |
| plist = h5p.create(h5p.FILE_ACCESS) |
|
|
| if libver is not None: |
| if libver in libver_dict: |
| low = libver_dict[libver] |
| high = h5f.LIBVER_LATEST |
| else: |
| low, high = (libver_dict[x] for x in libver) |
| else: |
| |
| low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST |
| plist.set_libver_bounds(low, high) |
| plist.set_alignment(alignment_threshold, alignment_interval) |
|
|
| cache_settings = list(plist.get_cache()) |
| if rdcc_nslots is not None: |
| cache_settings[1] = rdcc_nslots |
| if rdcc_nbytes is not None: |
| cache_settings[2] = rdcc_nbytes |
| if rdcc_w0 is not None: |
| cache_settings[3] = rdcc_w0 |
| plist.set_cache(*cache_settings) |
|
|
| if page_buf_size: |
| plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep), |
| int(min_raw_keep)) |
|
|
| if meta_block_size is not None: |
| plist.set_meta_block_size(int(meta_block_size)) |
|
|
| if locking is not None: |
| if hdf5_version < (1, 12, 1) and (hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7): |
| raise ValueError( |
| "HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking.") |
|
|
| if locking in ("false", False): |
| plist.set_file_locking(False, ignore_when_disabled=False) |
| elif locking in ("true", True): |
| plist.set_file_locking(True, ignore_when_disabled=False) |
| elif locking == "best-effort": |
| plist.set_file_locking(True, ignore_when_disabled=True) |
| else: |
| raise ValueError(f"Unsupported locking value: {locking}") |
|
|
| if driver is None or (driver == 'windows' and sys.platform == 'win32'): |
| |
| if kwds: |
| msg = "'{key}' is an invalid keyword argument for this function" \ |
| .format(key=next(iter(kwds))) |
| raise TypeError(msg) |
| return plist |
|
|
| try: |
| set_fapl = _drivers[driver] |
| except KeyError: |
| raise ValueError('Unknown driver type "%s"' % driver) |
| else: |
| set_fapl(plist, **kwds) |
|
|
| return plist |
|
|
|
|
| def make_fcpl(track_order=False, fs_strategy=None, fs_persist=False, |
| fs_threshold=1, fs_page_size=None): |
| """ Set up a file creation property list """ |
| if track_order or fs_strategy: |
| plist = h5p.create(h5p.FILE_CREATE) |
| if track_order: |
| plist.set_link_creation_order( |
| h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) |
| plist.set_attr_creation_order( |
| h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED) |
| if fs_strategy: |
| strategies = { |
| 'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR, |
| 'page': h5f.FSPACE_STRATEGY_PAGE, |
| 'aggregate': h5f.FSPACE_STRATEGY_AGGR, |
| 'none': h5f.FSPACE_STRATEGY_NONE |
| } |
| fs_strat_num = strategies.get(fs_strategy, -1) |
| if fs_strat_num == -1: |
| raise ValueError("Invalid file space strategy type") |
|
|
| plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold) |
| if fs_page_size and fs_strategy == 'page': |
| plist.set_file_space_page_size(int(fs_page_size)) |
| else: |
| plist = None |
| return plist |
|
|
|
|
| def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False): |
| """ Get a new FileID by opening or creating a file. |
| Also validates mode argument.""" |
|
|
| if userblock_size is not None: |
| if mode in ('r', 'r+'): |
| raise ValueError("User block may only be specified " |
| "when creating a file") |
| try: |
| userblock_size = int(userblock_size) |
| except (TypeError, ValueError): |
| raise ValueError("User block size must be an integer") |
| if fcpl is None: |
| fcpl = h5p.create(h5p.FILE_CREATE) |
| fcpl.set_userblock(userblock_size) |
|
|
| if mode == 'r': |
| flags = h5f.ACC_RDONLY |
| if swmr and swmr_support: |
| flags |= h5f.ACC_SWMR_READ |
| fid = h5f.open(name, flags, fapl=fapl) |
| elif mode == 'r+': |
| fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl) |
| elif mode in ['w-', 'x']: |
| fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl) |
| elif mode == 'w': |
| fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl) |
| elif mode == 'a': |
| |
| |
| |
| try: |
| fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl) |
| |
| except FileNotFoundError if fapl.get_driver() in ( |
| h5fd.SEC2, |
| h5fd.DIRECT if direct_vfd else -1, |
| |
| |
| h5fd.FAMILY, |
| h5fd.WINDOWS, |
| |
| |
| h5fd.fileobj_driver, |
| h5fd.ROS3D if ros3 else -1, |
| ) else OSError: |
| fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl) |
| else: |
| raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a") |
|
|
| try: |
| if userblock_size is not None: |
| existing_fcpl = fid.get_create_plist() |
| if existing_fcpl.get_userblock() != userblock_size: |
| raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock())) |
| except Exception as e: |
| fid.close() |
| raise e |
|
|
| return fid |
|
|
|
|
| class File(Group): |
|
|
| """ |
| Represents an HDF5 file. |
| """ |
|
|
| @property |
| def attrs(self): |
| """ Attributes attached to this object """ |
| |
| |
| from . import attrs |
| with phil: |
| return attrs.AttributeManager(self['/']) |
|
|
| @property |
| @with_phil |
| def filename(self): |
| """File name on disk""" |
| return filename_decode(h5f.get_name(self.id)) |
|
|
| @property |
| @with_phil |
| def driver(self): |
| """Low-level HDF5 file driver used to open file""" |
| drivers = {h5fd.SEC2: 'sec2', |
| h5fd.STDIO: 'stdio', |
| h5fd.CORE: 'core', |
| h5fd.FAMILY: 'family', |
| h5fd.WINDOWS: 'windows', |
| h5fd.MPIO: 'mpio', |
| h5fd.MPIPOSIX: 'mpiposix', |
| h5fd.fileobj_driver: 'fileobj'} |
| if ros3: |
| drivers[h5fd.ROS3D] = 'ros3' |
| if direct_vfd: |
| drivers[h5fd.DIRECT] = 'direct' |
| return drivers.get(self.id.get_access_plist().get_driver(), 'unknown') |
|
|
| @property |
| @with_phil |
| def mode(self): |
| """ Python mode used to open file """ |
| write_intent = h5f.ACC_RDWR |
| if swmr_support: |
| write_intent |= h5f.ACC_SWMR_WRITE |
| return 'r+' if self.id.get_intent() & write_intent else 'r' |
|
|
| @property |
| @with_phil |
| def libver(self): |
| """File format version bounds (2-tuple: low, high)""" |
| bounds = self.id.get_access_plist().get_libver_bounds() |
| return tuple(libver_dict_r[x] for x in bounds) |
|
|
| @property |
| @with_phil |
| def userblock_size(self): |
| """ User block size (in bytes) """ |
| fcpl = self.id.get_create_plist() |
| return fcpl.get_userblock() |
|
|
| @property |
| @with_phil |
| def meta_block_size(self): |
| """ Meta block size (in bytes) """ |
| fapl = self.id.get_access_plist() |
| return fapl.get_meta_block_size() |
|
|
| if mpi and hdf5_version >= (1, 8, 9): |
|
|
| @property |
| @with_phil |
| def atomic(self): |
| """ Set/get MPI-IO atomic mode |
| """ |
| return self.id.get_mpi_atomicity() |
|
|
| @atomic.setter |
| @with_phil |
| def atomic(self, value): |
| |
| self.id.set_mpi_atomicity(value) |
|
|
| @property |
| @with_phil |
| def swmr_mode(self): |
| """ Controls single-writer multiple-reader mode """ |
| return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE)) |
|
|
| @swmr_mode.setter |
| @with_phil |
| def swmr_mode(self, value): |
| |
| if swmr_support: |
| if value: |
| self.id.start_swmr_write() |
| else: |
| raise ValueError("It is not possible to forcibly switch SWMR mode off.") |
| else: |
| raise RuntimeError('SWMR support is not available in HDF5 version {}.{}.{}.'.format(*hdf5_version)) |
|
|
| def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False, |
| rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None, |
| fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None, |
| page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None, |
| alignment_threshold=1, alignment_interval=1, meta_block_size=None, **kwds): |
| """Create a new file object. |
| |
| See the h5py user guide for a detailed explanation of the options. |
| |
| name |
| Name of the file on disk, or file-like object. Note: for files |
| created with the 'core' driver, HDF5 still requires this be |
| non-empty. |
| mode |
| r Readonly, file must exist (default) |
| r+ Read/write, file must exist |
| w Create file, truncate if exists |
| w- or x Create file, fail if exists |
| a Read/write if exists, create otherwise |
| driver |
| Name of the driver to use. Legal values are None (default, |
| recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'. |
| libver |
| Library version bounds. Supported values: 'earliest', 'v108', |
| 'v110', 'v112' and 'latest'. The 'v108', 'v110' and 'v112' |
| options can only be specified with the HDF5 1.10.2 library or later. |
| userblock_size |
| Desired size of user block. Only allowed when creating a new |
| file (mode w, w- or x). |
| swmr |
| Open the file in SWMR read mode. Only used when mode = 'r'. |
| rdcc_nbytes |
| Total size of the dataset chunk cache in bytes. The default size |
| is 1024**2 (1 MiB) per dataset. Applies to all datasets unless individually changed. |
| rdcc_w0 |
| The chunk preemption policy for all datasets. This must be |
| between 0 and 1 inclusive and indicates the weighting according to |
| which chunks which have been fully read or written are penalized |
| when determining which chunks to flush from cache. A value of 0 |
| means fully read or written chunks are treated no differently than |
| other chunks (the preemption is strictly LRU) while a value of 1 |
| means fully read or written chunks are always preempted before |
| other chunks. If your application only reads or writes data once, |
| this can be safely set to 1. Otherwise, this should be set lower |
| depending on how often you re-read or re-write the same data. The |
| default value is 0.75. Applies to all datasets unless individually changed. |
| rdcc_nslots |
| The number of chunk slots in the raw data chunk cache for this |
| file. Increasing this value reduces the number of cache collisions, |
| but slightly increases the memory used. Due to the hashing |
| strategy, this value should ideally be a prime number. As a rule of |
| thumb, this value should be at least 10 times the number of chunks |
| that can fit in rdcc_nbytes bytes. For maximum performance, this |
| value should be set approximately 100 times that number of |
| chunks. The default value is 521. Applies to all datasets unless individually changed. |
| track_order |
| Track dataset/group/attribute creation order under root group |
| if True. If None use global default h5.get_config().track_order. |
| fs_strategy |
| The file space handling strategy to be used. Only allowed when |
| creating a new file (mode w, w- or x). Defined as: |
| "fsm" FSM, Aggregators, VFD |
| "page" Paged FSM, VFD |
| "aggregate" Aggregators, VFD |
| "none" VFD |
| If None use HDF5 defaults. |
| fs_page_size |
| File space page size in bytes. Only used when fs_strategy="page". If |
| None use the HDF5 default (4096 bytes). |
| fs_persist |
| A boolean value to indicate whether free space should be persistent |
| or not. Only allowed when creating a new file. The default value |
| is False. |
| fs_threshold |
| The smallest free-space section size that the free space manager |
| will track. Only allowed when creating a new file. The default |
| value is 1. |
| page_buf_size |
| Page buffer size in bytes. Only allowed for HDF5 files created with |
| fs_strategy="page". Must be a power of two value and greater or |
| equal than the file space page size when creating the file. It is |
| not used by default. |
| min_meta_keep |
| Minimum percentage of metadata to keep in the page buffer before |
| allowing pages containing metadata to be evicted. Applicable only if |
| page_buf_size is set. Default value is zero. |
| min_raw_keep |
| Minimum percentage of raw data to keep in the page buffer before |
| allowing pages containing raw data to be evicted. Applicable only if |
| page_buf_size is set. Default value is zero. |
| locking |
| The file locking behavior. Defined as: |
| |
| - False (or "false") -- Disable file locking |
| - True (or "true") -- Enable file locking |
| - "best-effort" -- Enable file locking but ignore some errors |
| - None -- Use HDF5 defaults |
| |
| .. warning:: |
| |
| The HDF5_USE_FILE_LOCKING environment variable can override |
| this parameter. |
| |
| Only available with HDF5 >= 1.12.1 or 1.10.x >= 1.10.7. |
| |
| alignment_threshold |
| Together with ``alignment_interval``, this property ensures that |
| any file object greater than or equal in size to the alignement |
| threshold (in bytes) will be aligned on an address which is a |
| multiple of alignment interval. |
| |
| alignment_interval |
| This property should be used in conjunction with |
| ``alignment_threshold``. See the description above. For more |
| details, see |
| https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT |
| |
| meta_block_size |
| Set the current minimum size, in bytes, of new metadata block allocations. |
| See https://portal.hdfgroup.org/display/HDF5/H5P_SET_META_BLOCK_SIZE |
| |
| Additional keywords |
| Passed on to the selected file driver. |
| """ |
| if (fs_strategy or page_buf_size) and hdf5_version < (1, 10, 1): |
| raise ValueError("HDF5 version 1.10.1 or greater required for file space strategy or page buffering support.") |
|
|
| if swmr and not swmr_support: |
| raise ValueError("The SWMR feature is not available in this version of the HDF5 library") |
|
|
| if driver == 'ros3': |
| if ros3: |
| from urllib.parse import urlparse |
| url = urlparse(name) |
| if url.scheme == 's3': |
| aws_region = kwds.get('aws_region', b'').decode('ascii') |
| if len(aws_region) == 0: |
| raise ValueError('AWS region required for s3:// location') |
| name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}' |
| elif url.scheme not in ('https', 'http'): |
| raise ValueError(f'{name}: S3 location must begin with ' |
| 'either "https://", "http://", or "s3://"') |
| else: |
| raise ValueError( |
| "h5py was built without ROS3 support, can't use ros3 driver") |
|
|
| if locking is not None and hdf5_version < (1, 12, 1) and ( |
| hdf5_version[:2] != (1, 10) or hdf5_version[2] < 7): |
| raise ValueError("HDF5 version >= 1.12.1 or 1.10.x >= 1.10.7 required for file locking options.") |
|
|
| if isinstance(name, _objects.ObjectID): |
| if fs_strategy: |
| raise ValueError("Unable to set file space strategy of an existing file") |
|
|
| with phil: |
| fid = h5i.get_file_id(name) |
| else: |
| if hasattr(name, 'read') and hasattr(name, 'seek'): |
| if driver not in (None, 'fileobj'): |
| raise ValueError("Driver must be 'fileobj' for file-like object if specified.") |
| driver = 'fileobj' |
| if kwds.get('fileobj', name) != name: |
| raise ValueError("Invalid value of 'fileobj' argument; " |
| "must equal to file-like object if specified.") |
| kwds.update(fileobj=name) |
| name = repr(name).encode('ASCII', 'replace') |
| else: |
| name = filename_encode(name) |
|
|
| if track_order is None: |
| track_order = h5.get_config().track_order |
|
|
| if fs_strategy and mode not in ('w', 'w-', 'x'): |
| raise ValueError("Unable to set file space strategy of an existing file") |
|
|
| if swmr and mode != 'r': |
| warn( |
| "swmr=True only affects read ('r') mode. For swmr write " |
| "mode, set f.swmr_mode = True after opening the file.", |
| stacklevel=2, |
| ) |
|
|
| with phil: |
| fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0, |
| locking, page_buf_size, min_meta_keep, min_raw_keep, |
| alignment_threshold=alignment_threshold, |
| alignment_interval=alignment_interval, |
| meta_block_size=meta_block_size, |
| **kwds) |
| fcpl = make_fcpl(track_order=track_order, fs_strategy=fs_strategy, |
| fs_persist=fs_persist, fs_threshold=fs_threshold, |
| fs_page_size=fs_page_size) |
| fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr) |
|
|
| if isinstance(libver, tuple): |
| self._libver = libver |
| else: |
| self._libver = (libver, 'latest') |
|
|
| super().__init__(fid) |
|
|
| def close(self): |
| """ Close the file. All open objects become invalid """ |
| with phil: |
| |
| if self.id.valid: |
| |
|
|
| |
| |
| self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE) |
| self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE) |
|
|
| self.id.close() |
| _objects.nonlocal_close() |
|
|
| def flush(self): |
| """ Tell the HDF5 library to flush its buffers. |
| """ |
| with phil: |
| h5f.flush(self.id) |
|
|
| @with_phil |
| def __enter__(self): |
| return self |
|
|
| @with_phil |
| def __exit__(self, *args): |
| if self.id: |
| self.close() |
|
|
| @with_phil |
| def __repr__(self): |
| if not self.id: |
| r = '<Closed HDF5 file>' |
| else: |
| |
| |
| filename = self.filename |
| if isinstance(filename, bytes): |
| filename = filename.decode('utf8', 'replace') |
| r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>' |
|
|
| return r |
|
|