File size: 5,115 Bytes
06c11b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 | import h5py
import sys
import numpy as np
def _decode_h5_object(value):
"""Decode HDF5 object dtype (vlen str) to Python str for display."""
if value is None:
return None
if isinstance(value, np.ndarray):
if value.size == 0:
return None
value = np.reshape(value, -1)[0]
if isinstance(value, (bytes, np.bytes_)):
try:
return value.decode("utf-8")
except Exception:
return repr(value)
if isinstance(value, str):
return value
return str(value)
def print_hdf5_structure(name, obj, indent=0):
"""
Recursively print the structure of an HDF5 group or dataset.
Only prints one 'episode_' and one 'timestep_' per level to avoid cluttering.
"""
base_name = name.split('/')[-1]
# Check if we should skip this item to limit to one episode/timestep
parent_path = '/'.join(name.split('/')[:-1])
# This logic is a bit tricky inside visititems since it's flat traversal normally
# But we can implement a custom recursive function instead
pass
def _format_value(obj, max_elems=20, max_str_len=200, max_array_size=10000):
"""Read dataset and format for display; handle scalars and arrays."""
try:
shape = obj.shape
size = int(np.prod(shape)) if shape else 0
if size > max_array_size:
# Large array: read only the first max_elems elements (flattened in C-order)
take = min(max_elems, size)
if take == 0:
return "[]"
idx = np.unravel_index(take - 1, shape)
slice_tuple = tuple(slice(0, int(i) + 1) for i in idx)
raw = obj[slice_tuple]
flat = np.asarray(raw).reshape(-1)[:take]
n = len(flat)
total = size
else:
raw = obj[()]
if raw is None:
return "None"
if obj.shape == () or np.isscalar(raw):
out = _decode_h5_object(raw)
if out is None:
out = str(raw)
if isinstance(out, str) and len(out) > max_str_len:
out = out[:max_str_len] + "..."
return out
arr = np.asarray(raw)
flat = np.reshape(arr, -1)
n = min(flat.size, max_elems)
total = flat.size
except Exception as e:
return f"(read error: {e})"
if n == 0:
return "[]"
parts = []
for i in range(n):
v = flat.flat[i]
if isinstance(v, (bytes, np.bytes_)):
try:
v = v.decode("utf-8")
except Exception:
v = repr(v)
parts.append(str(v))
s = "[" + ", ".join(parts) + "]"
if total > max_elems:
s += f" ... ({total} total)"
return s
def print_recursive(obj, indent=0):
tab = " " * indent
if isinstance(obj, h5py.Dataset):
name = (obj.name or "").split("/")[-1]
print(f"{tab}- [Dataset] {name}: shape={obj.shape}, dtype={obj.dtype}")
# Print value: scalar, small array, or array summary
value_str = _format_value(obj)
if value_str:
print(f"{tab} -> {value_str}")
elif isinstance(obj, h5py.Group):
print(f"{tab}+ [Group] {(obj.name or '').split('/')[-1]}")
# Sort items: groups first, then datasets? Or just as is.
# Filter items to only show one episode_* or timestep_*
items = list(obj.items())
shown_episode = False
shown_timestep = False
for name, item in items:
is_episode = name.startswith('episode_')
is_timestep = name.startswith('timestep_')
if is_episode:
if not shown_episode:
print_recursive(item, indent + 1)
shown_episode = True
continue
if is_timestep:
if not shown_timestep:
print_recursive(item, indent + 1)
shown_timestep = True
continue
# Regular items (meta, obs, action, info etc)
print_recursive(item, indent + 1)
DEFAULT_PATH = "/data/hongzefu/data_0226/record_dataset_SwingXtimes.h5"
def main():
filepath = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_PATH
print(f"Inspecting HDF5 file: {filepath}")
try:
with h5py.File(filepath, 'r') as f:
# The root itself if it has a name (usually empty string or '/')
print("/")
items = list(f.items())
shown_episode = False
for name, item in items:
if name.startswith('episode_'):
if not shown_episode:
print_recursive(item, 1)
shown_episode = True
continue
print_recursive(item, 1)
except Exception as e:
print(f"Error reading HDF5 file: {e}")
if __name__ == "__main__":
main()
|