Spaces:

HongzeFu
/

RoboMME

Running on T4

App Files Files Community

RoboMME / scripts /dev /inspect_hdf5.py

HongzeFu

HF Space: code-only (no binary assets)

06c11b0 6 days ago

raw

history blame contribute delete

5.12 kB

	import h5py
	import sys
	import numpy as np

	def _decode_h5_object(value):
	"""Decode HDF5 object dtype (vlen str) to Python str for display."""
	if value is None:
	return None
	if isinstance(value, np.ndarray):
	if value.size == 0:
	return None
	value = np.reshape(value, -1)[0]
	if isinstance(value, (bytes, np.bytes_)):
	try:
	return value.decode("utf-8")
	except Exception:
	return repr(value)
	if isinstance(value, str):
	return value
	return str(value)


	def print_hdf5_structure(name, obj, indent=0):
	"""
	Recursively print the structure of an HDF5 group or dataset.
	Only prints one 'episode_' and one 'timestep_' per level to avoid cluttering.
	"""
	base_name = name.split('/')[-1]

	# Check if we should skip this item to limit to one episode/timestep
	parent_path = '/'.join(name.split('/')[:-1])

	# This logic is a bit tricky inside visititems since it's flat traversal normally
	# But we can implement a custom recursive function instead
	pass

	def _format_value(obj, max_elems=20, max_str_len=200, max_array_size=10000):
	"""Read dataset and format for display; handle scalars and arrays."""
	try:
	shape = obj.shape
	size = int(np.prod(shape)) if shape else 0
	if size > max_array_size:
	# Large array: read only the first max_elems elements (flattened in C-order)
	take = min(max_elems, size)
	if take == 0:
	return "[]"
	idx = np.unravel_index(take - 1, shape)
	slice_tuple = tuple(slice(0, int(i) + 1) for i in idx)
	raw = obj[slice_tuple]
	flat = np.asarray(raw).reshape(-1)[:take]
	n = len(flat)
	total = size
	else:
	raw = obj[()]
	if raw is None:
	return "None"
	if obj.shape == () or np.isscalar(raw):
	out = _decode_h5_object(raw)
	if out is None:
	out = str(raw)
	if isinstance(out, str) and len(out) > max_str_len:
	out = out[:max_str_len] + "..."
	return out
	arr = np.asarray(raw)
	flat = np.reshape(arr, -1)
	n = min(flat.size, max_elems)
	total = flat.size
	except Exception as e:
	return f"(read error: {e})"

	if n == 0:
	return "[]"
	parts = []
	for i in range(n):
	v = flat.flat[i]
	if isinstance(v, (bytes, np.bytes_)):
	try:
	v = v.decode("utf-8")
	except Exception:
	v = repr(v)
	parts.append(str(v))
	s = "[" + ", ".join(parts) + "]"
	if total > max_elems:
	s += f" ... ({total} total)"
	return s


	def print_recursive(obj, indent=0):
	tab = " " * indent
	if isinstance(obj, h5py.Dataset):
	name = (obj.name or "").split("/")[-1]
	print(f"{tab}- [Dataset] {name}: shape={obj.shape}, dtype={obj.dtype}")
	# Print value: scalar, small array, or array summary
	value_str = _format_value(obj)
	if value_str:
	print(f"{tab} -> {value_str}")
	elif isinstance(obj, h5py.Group):
	print(f"{tab}+ [Group] {(obj.name or '').split('/')[-1]}")

	# Sort items: groups first, then datasets? Or just as is.
	# Filter items to only show one episode_* or timestep_*

	items = list(obj.items())

	shown_episode = False
	shown_timestep = False

	for name, item in items:
	is_episode = name.startswith('episode_')
	is_timestep = name.startswith('timestep_')

	if is_episode:
	if not shown_episode:
	print_recursive(item, indent + 1)
	shown_episode = True
	continue

	if is_timestep:
	if not shown_timestep:
	print_recursive(item, indent + 1)
	shown_timestep = True
	continue

	# Regular items (meta, obs, action, info etc)
	print_recursive(item, indent + 1)

	DEFAULT_PATH = "/data/hongzefu/data_0226/record_dataset_SwingXtimes.h5"

	def main():
	filepath = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_PATH
	print(f"Inspecting HDF5 file: {filepath}")

	try:
	with h5py.File(filepath, 'r') as f:
	# The root itself if it has a name (usually empty string or '/')
	print("/")

	items = list(f.items())
	shown_episode = False

	for name, item in items:
	if name.startswith('episode_'):
	if not shown_episode:
	print_recursive(item, 1)
	shown_episode = True
	continue
	print_recursive(item, 1)

	except Exception as e:
	print(f"Error reading HDF5 file: {e}")

	if __name__ == "__main__":
	main()