Spaces:

HongzeFu
/

RoboMME

Running on T4

App Files Files Community

RoboMME / scripts /dataset_replay.py

HongzeFu

HF Space: code-only (no binary assets)

06c11b0 6 days ago

raw

history blame contribute delete

8.45 kB

	"""
	Replay episodes from HDF5 datasets and save rollout videos.
	Loads recorded actions from record_dataset_<Task>.h5, steps the environment
	"""

	import os
	os.environ["CUDA_VISIBLE_DEVICES"] = "1"

	import json
	from pathlib import Path
	from typing import Any, Dict, Literal, Union

	import cv2
	import h5py
	import imageio
	import numpy as np
	import torch

	from robomme.env_record_wrapper import BenchmarkEnvBuilder

	GUI_RENDER = False
	REPLAY_VIDEO_DIR = "replay_videos"
	VIDEO_FPS = 30
	VIDEO_BORDER_COLOR = (255, 0, 0)
	VIDEO_BORDER_THICKNESS = 10

	TaskID = Literal[
	# "BinFill",
	# "PickXtimes",
	# "SwingXtimes",
	# "StopCube",
	# "VideoUnmask",
	"VideoUnmaskSwap",
	# "ButtonUnmask",
	# "ButtonUnmaskSwap",
	# "PickHighlight",
	# "VideoRepick",
	# "VideoPlaceButton",
	# "VideoPlaceOrder",
	# "MoveCube",
	# "InsertPeg",
	# "PatternLock",
	# "RouteStick",
	]


	ActionSpaceType = Literal["joint_angle", "ee_pose", "waypoint", "multi_choice"]

	def _to_numpy(t) -> np.ndarray:
	return t.cpu().numpy() if isinstance(t, torch.Tensor) else np.asarray(t)


	def _frame_from_obs(
	front: np.ndarray \| torch.Tensor,
	wrist: np.ndarray \| torch.Tensor,
	is_video_demo: bool = False,
	) -> np.ndarray:
	frame = np.hstack([_to_numpy(front), _to_numpy(wrist)]).astype(np.uint8)
	if is_video_demo:
	h, w = frame.shape[:2]
	cv2.rectangle(frame, (0, 0), (w, h),
	VIDEO_BORDER_COLOR, VIDEO_BORDER_THICKNESS)
	return frame


	def _extract_frames(obs: dict, is_video_demo_fn=None) -> list[np.ndarray]:
	n = len(obs["front_rgb_list"])
	return [
	_frame_from_obs(
	obs["front_rgb_list"][i],
	obs["wrist_rgb_list"][i],
	is_video_demo=(is_video_demo_fn(i) if is_video_demo_fn else False),
	)
	for i in range(n)
	]


	def _is_video_demo(ts: h5py.Group) -> bool:
	info = ts.get("info")
	if info is None or "is_video_demo" not in info:
	return False
	return bool(np.reshape(np.asarray(info["is_video_demo"][()]), -1)[0])


	def _is_subgoal_boundary(ts: h5py.Group) -> bool:
	info = ts.get("info")
	if info is None or "is_subgoal_boundary" not in info:
	return False
	return bool(np.reshape(np.asarray(info["is_subgoal_boundary"][()]), -1)[0])


	def _decode_h5_str(raw) -> str:
	"""Uniformly decode bytes / numpy bytes / str from HDF5 to str."""
	if isinstance(raw, np.ndarray):
	raw = raw.flatten()[0]
	if isinstance(raw, (bytes, np.bytes_)):
	raw = raw.decode("utf-8")
	return raw


	def _build_action_sequence(
	episode_data: h5py.Group, action_space_type: str
	) -> list[Union[np.ndarray, Dict[str, Any]]]:
	"""
	Scan the entire episode and return the deduplicated action sequence:
	- joint_angle / ee_pose: actions of all non-video-demo steps (sequential, not deduplicated)
	- waypoint: remove adjacent duplicate waypoint_action (like EpisodeDatasetResolver)
	- multi_choice: choice_action (JSON dict) only for steps where is_subgoal_boundary=True
	"""
	timestep_keys = sorted(
	(k for k in episode_data.keys() if k.startswith("timestep_")),
	key=lambda k: int(k.split("_")[1]),
	)

	actions: list[Union[np.ndarray, Dict[str, Any]]] = []
	prev_waypoint: np.ndarray \| None = None

	for key in timestep_keys:
	ts = episode_data[key]
	if _is_video_demo(ts):
	continue

	action_grp = ts.get("action")
	if action_grp is None:
	continue

	if action_space_type == "joint_angle":
	if "joint_action" not in action_grp:
	continue
	actions.append(np.asarray(action_grp["joint_action"][()], dtype=np.float32))

	elif action_space_type == "ee_pose":
	if "eef_action" not in action_grp:
	continue
	actions.append(np.asarray(action_grp["eef_action"][()], dtype=np.float32))

	elif action_space_type == "waypoint":
	if "waypoint_action" not in action_grp:
	continue
	wa = np.asarray(action_grp["waypoint_action"][()], dtype=np.float32).flatten()
	if wa.shape != (7,) or not np.all(np.isfinite(wa)):
	continue
	# Remove adjacent duplicates
	if prev_waypoint is None or not np.array_equal(wa, prev_waypoint):
	actions.append(wa)
	prev_waypoint = wa.copy()

	elif action_space_type == "multi_choice":
	if not _is_subgoal_boundary(ts):
	continue
	if "choice_action" not in action_grp:
	continue
	raw = _decode_h5_str(action_grp["choice_action"][()])
	try:
	payload = json.loads(raw)
	except (TypeError, ValueError, json.JSONDecodeError):
	continue
	if not isinstance(payload, dict):
	continue
	choice = payload.get("choice")
	if not isinstance(choice, str) or not choice.strip():
	continue
	if "point" not in payload:
	continue
	actions.append({"choice": choice, "point": payload.get("point")})

	else:
	raise ValueError(f"Unknown action space type: {action_space_type}")

	return actions


	def _save_video(
	frames: list[np.ndarray],
	task_id: str,
	episode_idx: int,
	task_goal: str,
	outcome: str,
	action_space_type: str,
	) -> Path:
	video_dir = Path(REPLAY_VIDEO_DIR) / action_space_type
	video_dir.mkdir(parents=True, exist_ok=True)
	name = f"{outcome}_{task_id}_ep{episode_idx}_{task_goal}.mp4"
	path = video_dir / name
	imageio.mimsave(str(path), frames, fps=VIDEO_FPS)
	return path


	def _get_episode_indices(data: h5py.File) -> list[int]:
	return sorted(
	int(key.split("_")[1])
	for key in data.keys()
	if key.startswith("episode_")
	)


	def process_episode(
	env_data: h5py.File,
	episode_idx: int,
	task_id: str,
	action_space_type: ActionSpaceType,
	) -> None:
	"""Replay one episode from HDF5 data, record frames, and save a video."""
	episode_data = env_data[f"episode_{episode_idx}"]
	task_goal = episode_data["setup"]["task_goal"][()][0].decode()
	action_sequence = _build_action_sequence(episode_data, action_space_type)

	env = BenchmarkEnvBuilder(
	env_id=task_id,
	dataset="train",
	action_space=action_space_type,
	gui_render=GUI_RENDER,
	).make_env_for_episode(episode_idx)

	print(f"\nTask: {task_id}, Episode: {episode_idx}, ",
	f"Seed: {env.unwrapped.seed}, Difficulty: {env.unwrapped.difficulty}")
	print(f"Task goal: {task_goal}")
	print(f"Total actions after dedup: {len(action_sequence)}")

	obs, _ = env.reset()
	frames = _extract_frames(
	obs, is_video_demo_fn=lambda i, n=len(obs["front_rgb_list"]): i < n - 1
	)

	outcome = "unknown"
	for seq_idx, action in enumerate(action_sequence):
	try:
	obs, _, terminated, truncated, info = env.step(action)
	frames.extend(_extract_frames(obs))
	except Exception as e:
	print(f"Error at seq_idx {seq_idx}: {e}")
	break

	if GUI_RENDER:
	env.render()
	if terminated or truncated:
	outcome = info.get("status", "unknown")
	print(f"Outcome: {outcome}")
	break

	env.close()
	path = _save_video(frames, task_id, episode_idx, task_goal, outcome, action_space_type)
	print(f"Saved video to {path}\n")


	def replay(
	h5_data_dir: str = "/data/hongzefu/data_0226",
	action_space_type: ActionSpaceType = "ee_pose",
	replay_number: int = 10,
	) -> None:
	"""Replay episodes from HDF5 dataset files and save rollout videos."""
	#for task_id in BenchmarkEnvBuilder.get_task_list():
	for task_id in ["VideoUnmaskSwap"]:
	file_path = Path(h5_data_dir) / f"record_dataset_{task_id}.h5"

	if not file_path.exists():
	print(f"Skipping {task_id}: file not found: {file_path}")
	continue

	with h5py.File(file_path, "r") as data:
	episode_indices = _get_episode_indices(data)
	for episode_idx in episode_indices[:min(replay_number, len(episode_indices))]:
	process_episode(data, episode_idx, task_id, action_space_type)


	if __name__ == "__main__":
	import tyro
	tyro.cli(replay)