| import os |
| import h5py |
| import torch |
| import socket |
| import numpy as np |
| from time import time |
| from datetime import datetime |
| from src.utils.tensor import tensor_idx, cast_numpyfy |
| from src.utils.sparse import dense_to_csr, csr_to_dense |
|
|
|
|
| __all__ = [ |
| 'date_time_string', 'dated_dir', 'host_data_root', 'save_tensor', |
| 'load_tensor', 'save_tensor_dict', 'load_tensor_dict', 'save_dense_to_csr', |
| 'load_csr_to_dense'] |
|
|
|
|
| def date_time_string(): |
| """Returns a string holding the current date and time. Useful for |
| creating an output file or directory. |
| """ |
| date = '-'.join([ |
| f'{getattr(datetime.now(), x)}' |
| for x in ['year', 'month', 'day']]) |
| time = '-'.join([ |
| f'{getattr(datetime.now(), x)}' |
| for x in ['hour', 'minute', 'second']]) |
| return f'{date}_{time}' |
|
|
|
|
| def dated_dir(root, create=False): |
| """Returns a directory path in root, named based on the current date |
| and time. |
| """ |
| dir_name = date_time_string() |
| path = os.path.join(root, dir_name) |
| if create and not os.path.exists(path): |
| os.makedirs(path, exist_ok=True) |
| return path |
|
|
|
|
| |
| def host_data_root(): |
| """Read the host machine's name and return the known $DATA_ROOT |
| directory |
| """ |
| HOST = socket.gethostname() |
| if HOST == 'DEL2001W017': |
| DATA_ROOT = '/media/drobert-admin/DATA2/datasets' |
| elif HOST == 'HP-2010S002': |
| DATA_ROOT = '/var/data/drobert/datasets' |
| elif HOST == '9c81b1a54ad8': |
| DATA_ROOT = '/raid/dataset/pointcloud/data' |
| elif HOST.endswith('sis.cnes.fr'): |
| DATA_ROOT = '/home/qt/robertda/scratch/datasets' |
| else: |
| raise NotImplementedError( |
| f"Unknown host '{HOST}', cannot set DATA_ROOT") |
| return DATA_ROOT |
|
|
|
|
| def save_tensor(x, f, key, fp_dtype=torch.float): |
| """Save torch.Tensor to HDF5 file. |
| |
| :param x: 2D torch.Tensor |
| :param f: h5 file path of h5py.File or h5py.Group |
| :param key: str |
| h5py.Dataset key under which to save the tensor |
| :param fp_dtype: torch dtype |
| Data type to which floating point tensors should be cast before |
| saving |
| :return: |
| """ |
| if not isinstance(f, (h5py.File, h5py.Group)): |
| with h5py.File(f, 'w') as file: |
| save_tensor(x, file, key, fp_dtype=fp_dtype) |
| return |
|
|
| assert isinstance(x, torch.Tensor) |
|
|
| d = cast_numpyfy(x, fp_dtype=fp_dtype) |
| f.create_dataset(key, data=d, dtype=d.dtype) |
|
|
|
|
| def load_tensor(f, key=None, idx=None): |
| """Load torch.Tensor from an HDF5 file. See `save_tensor` for |
| writing such file. Options allow reading only part of the rows. |
| |
| :param f: h5 file path of h5py.File or h5py.Group or h5py.Dataset |
| :param key: str |
| h5py.Dataset key under which to the tensor was saved. Must be |
| provided if f is not already a h5py.Dataset object |
| :param idx: int, list, numpy.ndarray, torch.Tensor |
| Used to select and read only some rows of the dense tensor. |
| Supports fancy indexing |
| :return: |
| """ |
| if not isinstance(f, (h5py.File, h5py.Group, h5py.Dataset)): |
| with h5py.File(f, 'r') as file: |
| out = load_tensor(file, key=key, idx=idx) |
| return out |
|
|
| if not isinstance(f, h5py.Dataset): |
| f = f[key] |
|
|
| idx = tensor_idx(idx) |
|
|
| if idx is None or idx.shape[0] == 0: |
| x = torch.from_numpy(f[:]) |
| else: |
| x = torch.from_numpy(f[:])[idx] |
|
|
| |
| |
| if x is not None and not x.is_floating_point(): |
| x = x.long() |
|
|
| return x |
|
|
|
|
| def save_tensor_dict(d, f, key, fp_dtype=torch.float): |
| """Save torch.Tensor to HDF5 file. |
| |
| :param d: dictionary of 2D torch.Tensors |
| :param f: h5 file path of h5py.File or h5py.Group |
| :param key: str |
| h5py.Dataset key under which to save the tensor dictionary |
| :param fp_dtype: torch dtype |
| Data type to which floating point tensors should be cast before |
| saving |
| :return: |
| """ |
| if not isinstance(f, (h5py.File, h5py.Group)): |
| with h5py.File(f, 'w') as file: |
| save_tensor_dict(d, file, key, fp_dtype=fp_dtype) |
| return |
|
|
| g = f.create_group(key) |
| for k, v in d.items(): |
| if not isinstance(v, torch.Tensor): |
| continue |
| save_tensor(v, g, k, fp_dtype=fp_dtype) |
|
|
|
|
| def load_tensor_dict(f, idx=None): |
| """Load a dictionary of torch.Tensor from an HDF5 file. |
| |
| :param f: h5 file path of h5py.File or h5py.Group or h5py.Dataset |
| :param idx: int, list, numpy.ndarray, torch.Tensor |
| Used to select and read only some rows of the dense tensor. |
| Supports fancy indexing |
| :return: |
| """ |
| if not isinstance(f, (h5py.File, h5py.Group)): |
| with h5py.File(f, 'w') as file: |
| load_tensor_dict(file) |
| return |
|
|
| return {k: load_tensor(f[k], key=None, idx=idx) for k in f.keys()} |
|
|
|
|
| def save_dense_to_csr(x, f, fp_dtype=torch.float): |
| """Compress a 2D tensor with CSR format and save it in an |
| already-open HDF5. |
| |
| :param x: 2D torch.Tensor |
| :param f: h5 file path of h5py.File or h5py.Group |
| :param fp_dtype: torch dtype |
| Data type to which floating point tensors should be cast before |
| saving |
| :return: |
| """ |
| if not isinstance(f, (h5py.File, h5py.Group)): |
| with h5py.File(f, 'w') as file: |
| save_dense_to_csr(x, file, fp_dtype=fp_dtype) |
| return |
|
|
| assert isinstance(x, torch.Tensor) and x.dim() == 2 |
|
|
| pointers, columns, values = dense_to_csr(x) |
| save_tensor(pointers, f, 'pointers', fp_dtype=fp_dtype) |
| save_tensor(columns, f, 'columns', fp_dtype=fp_dtype) |
| save_tensor(values, f, 'values', fp_dtype=fp_dtype) |
| f.create_dataset('shape', data=np.array(x.shape)) |
|
|
|
|
| def load_csr_to_dense(f, idx=None, verbose=False): |
| """Read an HDF5 file of group produced using `dense_to_csr_hdf5` and |
| return the dense tensor. An optional idx can be passed to only read |
| corresponding rows from the dense tensor. |
| |
| :param f: h5 file path of h5py.File or h5py.Group |
| :param idx: int, list, numpy.ndarray, torch.Tensor |
| Used to select and read only some rows of the dense tensor. |
| Supports fancy indexing |
| :param verbose: bool |
| :return: |
| """ |
| KEYS = ['pointers', 'columns', 'values', 'shape'] |
|
|
| if not isinstance(f, (h5py.File, h5py.Group)): |
| with h5py.File(f, 'r') as file: |
| out = load_csr_to_dense(file, idx=idx, verbose=verbose) |
| return out |
|
|
| assert all(k in f.keys() for k in KEYS) |
|
|
| idx = tensor_idx(idx) |
|
|
| if idx is None or idx.shape[0] == 0: |
| start = time() |
| pointers = load_tensor(f['pointers']) |
| columns = load_tensor(f['columns']) |
| values = load_tensor(f['values']) |
| shape = load_tensor(f['shape']) |
| if verbose: |
| print(f'load_csr_to_dense read all : {time() - start:0.5f}s') |
| start = time() |
| out = csr_to_dense(pointers, columns, values, shape=shape) |
| if verbose: |
| print(f'load_csr_to_dense csr_to_dense : {time() - start:0.5f}s') |
| return out |
|
|
| |
| start = time() |
| ptr_start = load_tensor(f['pointers'], idx=idx) |
| ptr_end = load_tensor(f['pointers'], idx=idx + 1) |
| if verbose: |
| print(f'load_csr_to_dense read ptr : {time() - start:0.5f}s') |
|
|
| |
| start = time() |
| pointers = torch.cat([ |
| torch.zeros(1, dtype=ptr_start.dtype), |
| torch.cumsum(ptr_end - ptr_start, 0)]) |
| if verbose: |
| print(f'load_csr_to_dense pointers : {time() - start:0.5f}s') |
|
|
| |
| |
| |
| |
| start = time() |
| sizes = pointers[1:] - pointers[:-1] |
| val_idx = torch.arange(pointers[-1]) |
| val_idx -= torch.arange(pointers[-1] + 1)[ |
| pointers[:-1]].repeat_interleave(sizes) |
| val_idx += ptr_start.repeat_interleave(sizes) |
| if verbose: |
| print(f'load_csr_to_dense val_idx : {time() - start:0.5f}s') |
|
|
| |
| |
| |
| start = time() |
| columns = load_tensor(f['columns'], idx=val_idx) |
| values = load_tensor(f['values'], idx=val_idx) |
| shape = load_tensor(f['shape']) |
| shape[0] = idx.shape[0] |
| if verbose: |
| print(f'load_csr_to_dense read values : {time() - start:0.5f}s') |
|
|
| start = time() |
| out = csr_to_dense(pointers, columns, values, shape=shape) |
| if verbose: |
| print(f'load_csr_to_dense csr_to_dense : {time() - start:0.5f}s') |
|
|
| return out |
|
|