| | import argparse |
| | import h5py |
| | import sys |
| | import numpy as np |
| |
|
| | DEFAULT_PATH = "/data/hongzefu/data_0225/record_dataset_VideoUnmaskSwap.h5" |
| |
|
| | def _is_equal(a, b): |
| | if a is None and b is None: |
| | return True |
| | if a is None or b is None: |
| | return False |
| | if isinstance(a, np.ndarray) and isinstance(b, np.ndarray): |
| | return np.array_equal(a, b) |
| | try: |
| | return bool(np.all(a == b)) |
| | except: |
| | return False |
| |
|
| | def inspect_actions(filepath, target_timestep=None, window=10): |
| | print(f"Inspecting HDF5 file: {filepath}") |
| | |
| | try: |
| | with h5py.File(filepath, 'r') as f: |
| | |
| | episodes = [k for k in f.keys() if k.startswith('episode_')] |
| | episodes.sort(key=lambda x: int(x.split('_')[1])) |
| | |
| | for ep_name in episodes: |
| | ep_group = f[ep_name] |
| | print(f"--- {ep_name} ---") |
| | |
| | |
| | timesteps = [k for k in ep_group.keys() if k.startswith('timestep_')] |
| | timesteps.sort(key=lambda x: int(x.split('_')[1])) |
| | |
| | last_choice_action = None |
| | last_is_subgoal_boundary = None |
| | last_waypoint_action = None |
| | last_is_video_demo = None |
| | skip_count = 0 |
| | |
| | for ts_name in timesteps: |
| | ts_group = ep_group[ts_name] |
| | |
| | |
| | choice_action = None |
| | is_subgoal_boundary = None |
| | is_video_demo = None |
| | waypoint_action = None |
| | |
| | |
| | if 'action' in ts_group: |
| | action_group = ts_group['action'] |
| | if 'choice_action' in action_group: |
| | choice_action = np.array(action_group['choice_action']) |
| | if 'waypoint_action' in action_group: |
| | waypoint_action = np.array(action_group['waypoint_action']) |
| | |
| | |
| | if 'info' in ts_group: |
| | info_group = ts_group['info'] |
| | if 'is_subgoal_boundary' in info_group: |
| | is_subgoal_boundary = np.array(info_group['is_subgoal_boundary']) |
| | if 'is_video_demo' in info_group: |
| | is_video_demo = np.array(info_group['is_video_demo']) |
| | |
| | |
| | if choice_action is None and 'choice_action' in ts_group: |
| | choice_action = np.array(ts_group['choice_action']) |
| | if is_subgoal_boundary is None and 'is_subgoal_boundary' in ts_group: |
| | is_subgoal_boundary = np.array(ts_group['is_subgoal_boundary']) |
| | if is_video_demo is None and 'is_video_demo' in ts_group: |
| | is_video_demo = np.array(ts_group['is_video_demo']) |
| | if waypoint_action is None and 'waypoint_action' in ts_group: |
| | waypoint_action = np.array(ts_group['waypoint_action']) |
| |
|
| | should_skip = False |
| | if last_choice_action is not None: |
| | same_choice = _is_equal(choice_action, last_choice_action) |
| | same_boundary = _is_equal(is_subgoal_boundary, last_is_subgoal_boundary) |
| | same_vd = _is_equal(is_video_demo, last_is_video_demo) |
| | same_wp = _is_equal(waypoint_action, last_waypoint_action) |
| | |
| | should_skip = same_choice and same_boundary and same_vd and same_wp |
| | |
| | if target_timestep is not None: |
| | ts_idx = int(ts_name.split('_')[1]) |
| | if abs(ts_idx - target_timestep) <= window: |
| | should_skip = False |
| | |
| | if should_skip: |
| | skip_count += 1 |
| | continue |
| |
|
| | if skip_count > 0: |
| | print(f" ... ({skip_count} identical timesteps skipped) ...") |
| | skip_count = 0 |
| |
|
| | print(f" {ts_name}:") |
| | print(f" choice_action: {choice_action}") |
| | print(f" is_subgoal_boundary: {is_subgoal_boundary}") |
| | print(f" is_video_demo: {is_video_demo}") |
| | print(f" waypoint_action: {waypoint_action}") |
| |
|
| | last_choice_action = choice_action |
| | last_is_subgoal_boundary = is_subgoal_boundary |
| | last_is_video_demo = is_video_demo |
| | last_waypoint_action = waypoint_action |
| |
|
| | if skip_count > 0: |
| | print(f" ... ({skip_count} identical timesteps skipped) ...") |
| | skip_count = 0 |
| |
|
| | except Exception as e: |
| | print(f"Error reading HDF5 file: {e}") |
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser(description="Inspect HDF5 dataset actions.") |
| | parser.add_argument("filepath", type=str, nargs="?", default=DEFAULT_PATH, |
| | help="Path to the HDF5 file.") |
| | parser.add_argument("-t", "--timestep", type=int, default=None, |
| | help="Specific timestep to not omit even if identical.") |
| | parser.add_argument("-w", "--window", type=int, default=10, |
| | help="Window around specified timestep to not omit (default: 10).") |
| | args = parser.parse_args() |
| |
|
| | inspect_actions(args.filepath, target_timestep=args.timestep, window=args.window) |
| |
|