Spaces:
Runtime error
Runtime error
| # -------------------------------------------------------- | |
| # Licensed under The MIT License [see LICENSE for details] | |
| # -------------------------------------------------------- | |
| import numpy as np | |
| from robomimic.envs.env_robosuite import EnvRobosuite | |
| import os | |
| import IPython | |
| import robomimic.utils.file_utils as FileUtils | |
| import robomimic.utils.env_utils as EnvUtils | |
| import robomimic.utils.obs_utils as ObsUtils | |
| from matplotlib import pyplot as plt | |
| import torch | |
| import h5py | |
| from tqdm import tqdm | |
| from sim.robomimic.robomimic_wrapper import RobomimicLowdimWrapper | |
| import cv2 | |
| import sys | |
| from collections import OrderedDict | |
| import traceback | |
| OBS_KEYS = ["object", "robot0_eef_pos", "robot0_eef_quat", "robot0_gripper_qpos"] | |
| RESOLUTION = (128, 128) | |
| global_idx = 0 | |
| if sys.platform == "linux" and os.getenv("DISPLAY") is None: | |
| try: | |
| from pyvirtualdisplay import Display | |
| import signal | |
| def handler(signum, frame): | |
| raise TimeoutError("Timeout while starting virtual display") | |
| signal.signal(signal.SIGALRM, handler) | |
| signal.alarm(5) | |
| virtual_display = Display(visible=0, size=(1400, 900)) | |
| virtual_display.start() | |
| signal.alarm(0) | |
| except Exception as e: | |
| print(f"When instantiate pyvirtualdisplay: {e}") | |
| def render_step(env, state): | |
| env.env.env.sim.set_state_from_flattened(state) | |
| env.env.env.sim.forward() | |
| img = env.render() | |
| img = cv2.resize(img, RESOLUTION) | |
| return img | |
| def convert_img_dataset( | |
| dataset_dir="data/robomimic/datasets", env_names=None, gui=False, episode_num_pertask=2000, **kwargs | |
| ): | |
| # convert to a list of episodes that can be added to replay buffer | |
| for env_name in env_names: | |
| dataset_dir_task = os.path.join(dataset_dir, env_name, "ph", "image.hdf5") | |
| env_meta = FileUtils.get_env_metadata_from_dataset(dataset_dir_task) | |
| env_meta["use_image_obs"] = True | |
| env = create_env(env_meta=env_meta, obs_keys=OBS_KEYS) | |
| env = RobomimicLowdimWrapper(env=env) | |
| with h5py.File(dataset_dir_task) as file: | |
| demos = file["data"] | |
| for i in tqdm(range(episode_num_pertask), desc="Loading hdf5 to ReplayBuffer"): | |
| if f"demo_{i}" not in demos: | |
| continue | |
| demo = demos[f"demo_{i}"] | |
| obs = demo["obs"] | |
| states = demo["states"] | |
| action = demo["actions"][:].astype(np.float32) | |
| step_obs = np.concatenate([obs[key] for key in OBS_KEYS], axis=-1).astype(np.float32) | |
| steps = [] | |
| for a, o, s in zip(action, step_obs, states): | |
| # break into step dict | |
| image = render_step(env, s) | |
| step = { | |
| "observation": {"state": o, "image": image}, | |
| "action": a, | |
| "language_instruction": f"{env_name}", | |
| } | |
| steps.append(OrderedDict(step)) | |
| data_dict = {"steps": steps} | |
| yield data_dict | |
| def writer_for(tag, fps, res, src_folder="data/robomimic_policy_videos"): | |
| if not os.path.exists(src_folder): | |
| os.mkdir(src_folder) | |
| return cv2.VideoWriter( | |
| f"{src_folder}/{tag}.mp4", | |
| cv2.VideoWriter_fourcc("m", "p", "4", "v"), | |
| fps, | |
| res, | |
| ) | |
| def create_env(env_meta, obs_keys): | |
| ObsUtils.initialize_obs_modality_mapping_from_dict({"image": obs_keys}) | |
| env = EnvUtils.create_env_from_metadata( | |
| env_meta=env_meta, | |
| render=False, | |
| render_offscreen=True, | |
| use_image_obs=True, | |
| ) | |
| return env | |
| def learner_trajectory_generator(env, policy, camera_name="view_1", env_name="lift"): | |
| """generate a trajectory rollout from the policy and a environment""" | |
| o = env.reset() | |
| # env.env.env.sim.set_state_from_flattened(state) | |
| img = env.render() | |
| prompt_image = np.tile( | |
| img, (policy.prompt_horizon, 1, 1, 1) | |
| ).astype(np.uint8) | |
| prompt_action = np.zeros( | |
| (policy.prompt_horizon, policy.action_stride, 7) # robomimic dof=7 | |
| ) | |
| policy.set_initial_state((prompt_image, prompt_action)) | |
| policy.reset() | |
| img = env.render() | |
| img = cv2.resize(img, RESOLUTION).astype(np.uint8) | |
| step_data = {"state": o, "image": img, "language_instruction": f"{env_name}"} # B x T | |
| max_path_length = 100 | |
| action_dim = len(env.action_space.low) | |
| i = 0 | |
| while i < max_path_length: | |
| actions = policy.generate_action(step_data) | |
| for a in actions: | |
| i += 1 | |
| o, r, done, info = env.step(a) | |
| img = env.render() | |
| img = cv2.resize(img, RESOLUTION).astype(np.uint8) | |
| ret = [o, r, done, info, img] | |
| step_data = OrderedDict({"state": o, "image": img, "language_instruction": f"{env_name}"}) # | |
| yield ret | |
| def dp_learner_trajectory_generator(env, policy, max_length=100, camera_name="view_1", env_name="lift"): | |
| """generate a trajectory rollout from the policy and a environment""" | |
| DP_RES = 84 | |
| # o = env.reset() | |
| # # env.env.env.sim.set_state_from_flattened(state) | |
| # img = env.render() | |
| # prompt_image = np.tile( | |
| # img, (policy.prompt_horizon, 1, 1, 1) | |
| # ).astype(np.uint8) | |
| # prompt_action = np.zeros( | |
| # (policy.prompt_horizon, policy.action_stride, 7) # robomimic dof=7 | |
| # ) | |
| # policy.set_initial_state((prompt_image, prompt_action)) | |
| policy.reset() | |
| o = env.reset() | |
| # env.env.env.sim.set_state_from_flattened(state) | |
| img = env.render() | |
| img = cv2.resize(img, (DP_RES, DP_RES)).astype(np.uint8) | |
| obs_dict = {"agentview_image": img.transpose(2, 0, 1)} # (C, H, W) | |
| obs_buff = { | |
| k: v[np.newaxis].repeat(policy.n_obs_steps, axis=0) | |
| for k, v in obs_dict.items() | |
| } | |
| for _ in range(max_length): | |
| traj = policy.generate_action({ | |
| k: torch.from_numpy(v).to(device=policy.device, dtype=policy.dtype).unsqueeze(0) | |
| for k, v in obs_buff.items() | |
| })['action'].squeeze(0).detach().cpu().numpy() | |
| for a in traj: | |
| o, r, done, info = env.step(a) | |
| img = env.render() | |
| img = cv2.resize(img, (DP_RES, DP_RES)).astype(np.uint8) | |
| ret = [o, r, done, info, img] | |
| obs_dict = {"agentview_image": img.transpose(2, 0, 1)} | |
| obs_buff = { | |
| k: np.roll(v, shift=-1, axis=0) | |
| for k, v in obs_buff.items() | |
| } | |
| for k, v in obs_dict.items(): | |
| obs_buff[k][-1] = v | |
| yield ret | |
| class RolloutRunner: | |
| """evaluate policy rollouts""" | |
| def __init__(self, env_names, episode_num=200, save_video=False): | |
| self.env_names = env_names | |
| self.save_video = save_video | |
| self.episode_num = episode_num | |
| self.envs = [] | |
| self.env_names = [] | |
| for env_name in env_names: | |
| dataset_dir = "data/robomimic/datasets" | |
| dataset_dir_task = os.path.join(dataset_dir, env_name, "ph", "image.hdf5") | |
| env_meta = FileUtils.get_env_metadata_from_dataset(dataset_dir_task) | |
| env = create_env(env_meta=env_meta, obs_keys=OBS_KEYS) | |
| env = RobomimicLowdimWrapper(env=env) # should be kitchen all | |
| self.envs.append(env) | |
| self.env_names.append(env_name) | |
| def step_render(self, curr_state, actions, freq=1): | |
| """ | |
| render a sequence of frames given current state and a sequence of actions | |
| """ | |
| images = [] | |
| prompt_steps = len(actions) | |
| env = self.envs[0] | |
| env.env.env.sim.set_state_from_flattened(curr_state) | |
| env.env.env.sim.forward() | |
| img = env.render() | |
| img = cv2.resize(img, RESOLUTION) | |
| # first image | |
| images.append(img) | |
| for action in actions: | |
| # action expands based on the frequency | |
| action = action.reshape(freq, -1) | |
| for sub_action in action: | |
| env.step(sub_action) | |
| img = env.render() | |
| img = cv2.resize(img, RESOLUTION) | |
| images.append(img) | |
| # visualize all list of these images | |
| fig, ax = plt.subplots(1, prompt_steps + 1, figsize=(20, 20)) | |
| for i, img in enumerate(images): | |
| ax[i].imshow(img) | |
| ax[i].axis("off") | |
| # plt.savefig("output/sim_video_mix/test.png") | |
| return images | |
| def run( | |
| self, policy, save_video=False, gui=False, video_postfix="", seed=233, env_name=None, episode_num=-1, **kwargs | |
| ): | |
| quit_on_success = True | |
| if episode_num == -1: | |
| episode_num = self.episode_num | |
| for env_idx, env in enumerate(self.envs): | |
| env.seed(seed) | |
| curr_env_name = self.env_names[env_idx] | |
| if env_name is not None: | |
| if str(env_name[0]) != str(curr_env_name): | |
| continue | |
| print(f"selected env name: {env_name} currente env: {curr_env_name}") | |
| if self.save_video: | |
| writer = writer_for( | |
| f"{self.env_names[env_idx]}_{video_postfix}", | |
| 10, | |
| RESOLUTION, | |
| src_folder="data/robomimic_policy_videos", | |
| ) | |
| print(f"save to video file {self.env_names[env_idx]}_{video_postfix}") | |
| total_success = 0 | |
| total_reward = 0 | |
| pbar = tqdm(range(episode_num), position=1, leave=True) | |
| for i in pbar: | |
| try: | |
| eps_reward = 0 | |
| traj_length = 0 | |
| for o, r, done, info, img in learner_trajectory_generator(env, policy): | |
| traj_length += 1 | |
| eps_reward += r | |
| if self.save_video and i <= 10: | |
| if gui: | |
| cv2.imshow("img", img) | |
| cv2.waitKey(1) | |
| img = cv2.resize(img, RESOLUTION) | |
| writer.write(img[..., [2, 1, 0]]) | |
| if done or eps_reward >= 1: | |
| break | |
| actual_reward = eps_reward >= 1 | |
| pbar.set_description(f"success: {actual_reward}") | |
| total_success += actual_reward | |
| total_reward += actual_reward | |
| except Exception as e: | |
| print(traceback.format_exc()) | |
| continue | |
| return total_success / episode_num, total_reward / episode_num | |
| if __name__ == "__main__": | |
| # generate for all tasks | |
| runner = RolloutRunner(["all"], 200) |