| | import h5py |
| | import sys |
| | import numpy as np |
| |
|
| | def _decode_h5_object(value): |
| | """Decode HDF5 object dtype (vlen str) to Python str for display.""" |
| | if value is None: |
| | return None |
| | if isinstance(value, np.ndarray): |
| | if value.size == 0: |
| | return None |
| | value = np.reshape(value, -1)[0] |
| | if isinstance(value, (bytes, np.bytes_)): |
| | try: |
| | return value.decode("utf-8") |
| | except Exception: |
| | return repr(value) |
| | if isinstance(value, str): |
| | return value |
| | return str(value) |
| |
|
| |
|
| | def print_hdf5_structure(name, obj, indent=0): |
| | """ |
| | Recursively print the structure of an HDF5 group or dataset. |
| | Only prints one 'episode_' and one 'timestep_' per level to avoid cluttering. |
| | """ |
| | base_name = name.split('/')[-1] |
| | |
| | |
| | parent_path = '/'.join(name.split('/')[:-1]) |
| | |
| | |
| | |
| | pass |
| |
|
| | def _format_value(obj, max_elems=20, max_str_len=200, max_array_size=10000): |
| | """Read dataset and format for display; handle scalars and arrays.""" |
| | try: |
| | shape = obj.shape |
| | size = int(np.prod(shape)) if shape else 0 |
| | if size > max_array_size: |
| | |
| | take = min(max_elems, size) |
| | if take == 0: |
| | return "[]" |
| | idx = np.unravel_index(take - 1, shape) |
| | slice_tuple = tuple(slice(0, int(i) + 1) for i in idx) |
| | raw = obj[slice_tuple] |
| | flat = np.asarray(raw).reshape(-1)[:take] |
| | n = len(flat) |
| | total = size |
| | else: |
| | raw = obj[()] |
| | if raw is None: |
| | return "None" |
| | if obj.shape == () or np.isscalar(raw): |
| | out = _decode_h5_object(raw) |
| | if out is None: |
| | out = str(raw) |
| | if isinstance(out, str) and len(out) > max_str_len: |
| | out = out[:max_str_len] + "..." |
| | return out |
| | arr = np.asarray(raw) |
| | flat = np.reshape(arr, -1) |
| | n = min(flat.size, max_elems) |
| | total = flat.size |
| | except Exception as e: |
| | return f"(read error: {e})" |
| |
|
| | if n == 0: |
| | return "[]" |
| | parts = [] |
| | for i in range(n): |
| | v = flat.flat[i] |
| | if isinstance(v, (bytes, np.bytes_)): |
| | try: |
| | v = v.decode("utf-8") |
| | except Exception: |
| | v = repr(v) |
| | parts.append(str(v)) |
| | s = "[" + ", ".join(parts) + "]" |
| | if total > max_elems: |
| | s += f" ... ({total} total)" |
| | return s |
| |
|
| |
|
| | def print_recursive(obj, indent=0): |
| | tab = " " * indent |
| | if isinstance(obj, h5py.Dataset): |
| | name = (obj.name or "").split("/")[-1] |
| | print(f"{tab}- [Dataset] {name}: shape={obj.shape}, dtype={obj.dtype}") |
| | |
| | value_str = _format_value(obj) |
| | if value_str: |
| | print(f"{tab} -> {value_str}") |
| | elif isinstance(obj, h5py.Group): |
| | print(f"{tab}+ [Group] {(obj.name or '').split('/')[-1]}") |
| | |
| | |
| | |
| | |
| | items = list(obj.items()) |
| | |
| | shown_episode = False |
| | shown_timestep = False |
| | |
| | for name, item in items: |
| | is_episode = name.startswith('episode_') |
| | is_timestep = name.startswith('timestep_') |
| | |
| | if is_episode: |
| | if not shown_episode: |
| | print_recursive(item, indent + 1) |
| | shown_episode = True |
| | continue |
| | |
| | if is_timestep: |
| | if not shown_timestep: |
| | print_recursive(item, indent + 1) |
| | shown_timestep = True |
| | continue |
| | |
| | |
| | print_recursive(item, indent + 1) |
| |
|
| | DEFAULT_PATH = "/data/hongzefu/data_0226/record_dataset_SwingXtimes.h5" |
| |
|
| | def main(): |
| | filepath = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_PATH |
| | print(f"Inspecting HDF5 file: {filepath}") |
| | |
| | try: |
| | with h5py.File(filepath, 'r') as f: |
| | |
| | print("/") |
| | |
| | items = list(f.items()) |
| | shown_episode = False |
| | |
| | for name, item in items: |
| | if name.startswith('episode_'): |
| | if not shown_episode: |
| | print_recursive(item, 1) |
| | shown_episode = True |
| | continue |
| | print_recursive(item, 1) |
| | |
| | except Exception as e: |
| | print(f"Error reading HDF5 file: {e}") |
| |
|
| | if __name__ == "__main__": |
| | main() |
| |
|