Spaces:

HongzeFu
/

RoboMME

Running on T4

File size: 5,115 Bytes

06c11b0

import h5py
import sys
import numpy as np

def _decode_h5_object(value):
    """Decode HDF5 object dtype (vlen str) to Python str for display."""
    if value is None:
        return None
    if isinstance(value, np.ndarray):
        if value.size == 0:
            return None
        value = np.reshape(value, -1)[0]
    if isinstance(value, (bytes, np.bytes_)):
        try:
            return value.decode("utf-8")
        except Exception:
            return repr(value)
    if isinstance(value, str):
        return value
    return str(value)


def print_hdf5_structure(name, obj, indent=0):
    """
    Recursively print the structure of an HDF5 group or dataset.
    Only prints one 'episode_' and one 'timestep_' per level to avoid cluttering.
    """
    base_name = name.split('/')[-1]
    
    # Check if we should skip this item to limit to one episode/timestep
    parent_path = '/'.join(name.split('/')[:-1])
    
    # This logic is a bit tricky inside visititems since it's flat traversal normally
    # But we can implement a custom recursive function instead
    pass

def _format_value(obj, max_elems=20, max_str_len=200, max_array_size=10000):
    """Read dataset and format for display; handle scalars and arrays."""
    try:
        shape = obj.shape
        size = int(np.prod(shape)) if shape else 0
        if size > max_array_size:
            # Large array: read only the first max_elems elements (flattened in C-order)
            take = min(max_elems, size)
            if take == 0:
                return "[]"
            idx = np.unravel_index(take - 1, shape)
            slice_tuple = tuple(slice(0, int(i) + 1) for i in idx)
            raw = obj[slice_tuple]
            flat = np.asarray(raw).reshape(-1)[:take]
            n = len(flat)
            total = size
        else:
            raw = obj[()]
            if raw is None:
                return "None"
            if obj.shape == () or np.isscalar(raw):
                out = _decode_h5_object(raw)
                if out is None:
                    out = str(raw)
                if isinstance(out, str) and len(out) > max_str_len:
                    out = out[:max_str_len] + "..."
                return out
            arr = np.asarray(raw)
            flat = np.reshape(arr, -1)
            n = min(flat.size, max_elems)
            total = flat.size
    except Exception as e:
        return f"(read error: {e})"

    if n == 0:
        return "[]"
    parts = []
    for i in range(n):
        v = flat.flat[i]
        if isinstance(v, (bytes, np.bytes_)):
            try:
                v = v.decode("utf-8")
            except Exception:
                v = repr(v)
        parts.append(str(v))
    s = "[" + ", ".join(parts) + "]"
    if total > max_elems:
        s += f" ... ({total} total)"
    return s


def print_recursive(obj, indent=0):
    tab = "  " * indent
    if isinstance(obj, h5py.Dataset):
        name = (obj.name or "").split("/")[-1]
        print(f"{tab}- [Dataset] {name}: shape={obj.shape}, dtype={obj.dtype}")
        # Print value: scalar, small array, or array summary
        value_str = _format_value(obj)
        if value_str:
            print(f"{tab}    -> {value_str}")
    elif isinstance(obj, h5py.Group):
        print(f"{tab}+ [Group] {(obj.name or '').split('/')[-1]}")
        
        # Sort items: groups first, then datasets? Or just as is.
        # Filter items to only show one episode_* or timestep_*
        
        items = list(obj.items())
        
        shown_episode = False
        shown_timestep = False
        
        for name, item in items:
            is_episode = name.startswith('episode_')
            is_timestep = name.startswith('timestep_')
            
            if is_episode:
                if not shown_episode:
                    print_recursive(item, indent + 1)
                    shown_episode = True
                continue
            
            if is_timestep:
                if not shown_timestep:
                    print_recursive(item, indent + 1)
                    shown_timestep = True
                continue
                
            # Regular items (meta, obs, action, info etc)
            print_recursive(item, indent + 1)

DEFAULT_PATH = "/data/hongzefu/data_0226/record_dataset_SwingXtimes.h5"

def main():
    filepath = sys.argv[1] if len(sys.argv) > 1 else DEFAULT_PATH
    print(f"Inspecting HDF5 file: {filepath}")
    
    try:
        with h5py.File(filepath, 'r') as f:
            # The root itself if it has a name (usually empty string or '/')
            print("/")
            
            items = list(f.items())
            shown_episode = False
            
            for name, item in items:
                if name.startswith('episode_'):
                    if not shown_episode:
                        print_recursive(item, 1)
                        shown_episode = True
                    continue
                print_recursive(item, 1)
                
    except Exception as e:
        print(f"Error reading HDF5 file: {e}")

if __name__ == "__main__":
    main()