Spaces:

gberseth
/

robo-eval

Runtime error

App Files Files Community

Neo-X commited on Jan 28

Commit

e18f7ef

1 Parent(s): 472e6ce

Fixing sim evals and addting action stacking support.

Browse files

Files changed (3) hide show

README.md +1 -2
app.py +20 -14
sim_eval.py +78 -38

README.md CHANGED Viewed

@@ -14,5 +14,4 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 Submit your files in the form
 miniGRP.pth
-conf/config.yaml
-grp_model.py

 Submit your files in the form
 miniGRP.pth
+conf/config.yaml

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ api = HfApi()
 def evaluate_policy(model_id):
     """
-    Downloads a PPO model from HF Hub, runs it in Gym, returns mean reward.
     """
     print(f"Starting evaluation for: {model_id}")
     try:
@@ -36,7 +36,7 @@ def evaluate_policy(model_id):
                     model_file = os.path.join(root, file)
                 if file.endswith("model.py"):
                     grp_file_path = os.path.join(root, file)
-                if file.endswith(".yaml") or file.endswith(".yalm"):
                     hydra_config_file_path = os.path.join(root, file)
         if not model_file:
@@ -44,7 +44,7 @@ def evaluate_policy(model_id):
         # 2. Load the PPO Agent
         # custom_objects map may be needed if python versions differ, but usually fine for PPO
-        import torch
         # ------------
         # Train and test splits
         # Loading data
@@ -54,13 +54,14 @@ def evaluate_policy(model_id):
         from omegaconf import OmegaConf
         cfg = OmegaConf.load(hydra_config_file_path)
         cfg.dataset.load_dataset = "skip"
         ## load the GRP model from the file doanloaded in the snappshot
         # Dynamically load the module
         import importlib.util, sys
         sys.path.insert(0, repo_path+"/") ## dangerous for sequrity but ok for now.
         from grp_model import GRP
-        model_ = torch.load(model_file)
         # model_._cgf = cfg
         # model = PPO.load(model_file)
         print("Memory used by the model:", torch.cuda.memory_allocated(cfg.device) / 1e6, "MB") ## This to the database later.
@@ -69,15 +70,19 @@ def evaluate_policy(model_id):
         tokenizer = None
         text_model = None
         if cfg.dataset.encode_with_t5: ## Load T5 model
             from transformers import T5Tokenizer, T5ForConditionalGeneration
             tokenizer = T5Tokenizer.from_pretrained(cfg.dataset.t5_version)
             text_model = T5ForConditionalGeneration.from_pretrained(cfg.dataset.t5_version)
         if "libero" in cfg.simEval:
             results = eval_libero(model_.to(cfg.device), device=cfg.device, cfg=cfg,
                             iter_=0, tokenizer=tokenizer, text_model=text_model, wandb=None,
                             log_dir="./")
         if "simple_env" in cfg.simEval:
             import simpler_env
             task_name = "widowx_carrot_on_plate"  # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
@@ -93,11 +98,11 @@ def evaluate_policy(model_id):
                                     wandb=None, iter_=0, tokenizer=tokenizer, text_model=text_model)
             print("results:", results)
-        # cbuffer.save(cfg.dataset.to_name)
-        env.close()
-        del env
-        return results['rewards'], "Success"
     except Exception as e:
         print(f"Evaluation failed: {e}")
@@ -136,23 +141,24 @@ def run_evaluation_loop():
     score, status_msg = evaluate_policy(model_id)
     # 4. Update the Dataframes
     # Update Requests (Mark as Done or Failed)
     requests_df.loc[row_index, "status"] = "Done" if score is not None else "Failed"
     # Prepare Results Row
     if score is not None:
         new_result = {
             "model_id": model_id,
-            "mean_reward": score,
-            "status": "Success"
         }
         # Load Results Dataset
         try:
             results_df = pd.read_csv(f"hf://datasets/{RESULTS_DATASET}/results.csv")
         except:
-            results_df = pd.DataFrame(columns=["model_id", "mean_reward", "status"])
         # Append new result
         results_df = pd.concat([results_df, pd.DataFrame([new_result])], ignore_index=True)

 def evaluate_policy(model_id):
     """
+    Downloads a GRP model from HF Hub, runs it in the simulator, returns mean reward.
     """
     print(f"Starting evaluation for: {model_id}")
     try:
                     model_file = os.path.join(root, file)
                 if file.endswith("model.py"):
                     grp_file_path = os.path.join(root, file)
+                if file.endswith("config.yaml") or file.endswith("config.yalm"):
                     hydra_config_file_path = os.path.join(root, file)
         if not model_file:
         # 2. Load the PPO Agent
         # custom_objects map may be needed if python versions differ, but usually fine for PPO
+        import torch, dill
         # ------------
         # Train and test splits
         # Loading data
         from omegaconf import OmegaConf
         cfg = OmegaConf.load(hydra_config_file_path)
         cfg.dataset.load_dataset = "skip"
+        cfg.testing = True
         ## load the GRP model from the file doanloaded in the snappshot
         # Dynamically load the module
         import importlib.util, sys
         sys.path.insert(0, repo_path+"/") ## dangerous for sequrity but ok for now.
         from grp_model import GRP
+        model_ = torch.load(model_file, pickle_module=dill)
         # model_._cgf = cfg
         # model = PPO.load(model_file)
         print("Memory used by the model:", torch.cuda.memory_allocated(cfg.device) / 1e6, "MB") ## This to the database later.
         tokenizer = None
         text_model = None
+        ## Time the evalaution run
+        start_time = time.time()
         if cfg.dataset.encode_with_t5: ## Load T5 model
             from transformers import T5Tokenizer, T5ForConditionalGeneration
             tokenizer = T5Tokenizer.from_pretrained(cfg.dataset.t5_version)
             text_model = T5ForConditionalGeneration.from_pretrained(cfg.dataset.t5_version)
         if "libero" in cfg.simEval:
+            from sim_eval import eval_libero
             results = eval_libero(model_.to(cfg.device), device=cfg.device, cfg=cfg,
                             iter_=0, tokenizer=tokenizer, text_model=text_model, wandb=None,
                             log_dir="./")
+            print("LIBERO results:", results)
         if "simple_env" in cfg.simEval:
             import simpler_env
             task_name = "widowx_carrot_on_plate"  # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
                                     wandb=None, iter_=0, tokenizer=tokenizer, text_model=text_model)
             print("results:", results)
+            # cbuffer.save(cfg.dataset.to_name)
+            env.close()
+            del env
+        results['time'] = time.time() - start_time
+        return results, "Success"
     except Exception as e:
         print(f"Evaluation failed: {e}")
     score, status_msg = evaluate_policy(model_id)
     # 4. Update the Dataframes
     # Update Requests (Mark as Done or Failed)
     requests_df.loc[row_index, "status"] = "Done" if score is not None else "Failed"
     # Prepare Results Row
     if score is not None:
         new_result = {
             "model_id": model_id,
+            "mean_reward": score['rewards'],
+            "run_time": score["time"],
+            "status": "Success",
+            "completed_at": time.time()
         }
         # Load Results Dataset
         try:
             results_df = pd.read_csv(f"hf://datasets/{RESULTS_DATASET}/results.csv")
         except:
+            results_df = pd.DataFrame(columns=["model_id", "mean_reward", "run_time", "status", "completed_at"])
         # Append new result
         results_df = pd.concat([results_df, pd.DataFrame([new_result])], ignore_index=True)

sim_eval.py CHANGED Viewed

@@ -1,4 +1,7 @@
 def get_text_tokens(cfg, tokenizer, text_model, goal, model=None):
     """
@@ -43,28 +46,24 @@ def eval_model_in_sim(cfg, model, device, log_dir, env, env_unwrapped,
         obs, reset_info = env.reset()
         obs_ = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)[:,:,:3]
         obs_hist = deque(maxlen=cfg.policy.obs_stacking)
-        obs_hist.append(obs_)
-        obs_hist.append(obs_)
-        obs_hist.append(obs_)
         instruction = env_unwrapped.get_language_instruction()
         # print("Reset info", reset_info)
         print("Instruction", instruction)
         frames = []
         done, truncated, timeLimit, t = False, False, 100, 0
         txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
         while not (done or truncated or (t > timeLimit)):
             # action[:3]: delta xyz; action[3:6]: delta rotation in axis-angle representation;
             # action[6:7]: gripper (the meaning of open / close depends on robot URDF)
-            image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
-            image = image[:,:,:3] ## Remove last dimension of image color
-            obs_hist.append(image) ## Add the new observation to the history buffer
             # obs = [obs_["image"] for obs_ in obs] # obs is a list of dicts
             image = np.stack(obs_hist, axis=-1)  # stack along the last dimension
             image = rearrange(image, 'h w c t -> h w (c t)')  # add batch dimension
-            obs_state = model.preprocess_state(image).to(device)
-            goal_state = model.preprocess_goal_image(image[:,:,:3]).to(device)
             action, loss = model.forward(torch.tensor(obs_state.unsqueeze(0), dtype=torch.float32).to(device)
                                 ,torch.tensor(txt_goal).to(device)
                                 ,torch.tensor(goal_state.unsqueeze(0), dtype=torch.float32).to(device),
@@ -73,24 +72,39 @@ def eval_model_in_sim(cfg, model, device, log_dir, env, env_unwrapped,
                                 )
             action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
-            obs, reward, done, truncated, info = env.step(action)
-            reward = -np.linalg.norm(info["eof_to_obj1_diff"])
-            frames.append(image)
-            rewards.append(reward)
-            t=t+1
     episode_stats = info.get('episode_stats', {})
     episode_stats['rewards'] = np.mean(rewards)
     # print("Episode stats", episode_stats)
-    # print(f"avg reward {np.mean(episode_stats['rewards']):.8f}")
     if not cfg.testing:
         wandb.log({"avg reward": np.mean(rewards)})
-    import moviepy.editor as mpy
-    clip = mpy.ImageSequenceClip(list(frames), fps=20)
-    path_ = log_dir+"/sim-env-"+str(iter_)+".mp4"
-    # clip.write_videofile(path_, fps=20, audio=False, logger=None) ## Getting weird Nonetype issues. Will need to fix version issue later.
     if not cfg.testing:
-        wandb.log({"example": wandb.Video(path_)})
     return episode_stats
 import gymnasium as gym
@@ -143,6 +157,7 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
     from libero.libero.utils import get_libero_path
     from gymnasium.wrappers import FrameStackObservation
     from einops import rearrange
     benchmark_dict = benchmark.get_benchmark_dict()
@@ -172,6 +187,9 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
         env.set_init_state(init_states[init_state_id])
         env = FrameStackObservation(DictWrapper(env, obs_key="agentview_image"), cfg.policy.obs_stacking) ## Stacking the observations
         obs, info = env.reset()
         mask = get_blocked_mask(cfg, targets=None, T=0) ## Get the blocked mask
@@ -180,9 +198,15 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
         frames = []
         rewards = []
         infos = []
-        for step_ in range(250):
             ## Reshape the image to the correct size and stack the hostory on the last channel dimension
-            image = obs[0]
             # obs = obs.reshape((128, 128, 3*cfg.policy.obs_stacking)) ## Assuming the observation is an image of size 128x128 with 3 color channels
             obs = rearrange(obs, 't h w c -> h w (t c)', c=3, t=cfg.policy.obs_stacking) ## Rearranging the image to have the stacked history in the last channel dimension
             # image = obs[:,:,:3] ## Remove the last dimension of the image color
@@ -195,35 +219,50 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
                         pose=torch.tensor([[np.concatenate( (info["robot0_eef_pos"],
                                                            info["robot0_eef_quat"][:3],
                                                             [(info["robot0_gripper_qpos"][0] - info["robot0_gripper_qpos"][0]) < 0.005 ]), axis=-1)]], dtype=torch.float32).to(device),
-                        morphology=torch.tensor([0], dtype=torch.uint8).to(device) ## Morphology is 0 for arm, 1 for A1}
                         )
-            action = model.decode_action(action[0,0,:7]).cpu().detach().numpy() ## Add in the gripper close action
-            frames.append(image)
-            x = env.step(action)
-            obs, reward, done, truncated, info = x
-            rewards.append(reward)
-            infos.append(info)
             if done:
                 print("Episode finished after {} timesteps".format(step_))
                 break
         print(f"avg reward {np.mean(rewards):.8f}")
         if not cfg.testing:
             wandb.log({"avg reward_"+str(task_id): np.mean(rewards)})
-        import moviepy.editor as mpy
-        clip = mpy.ImageSequenceClip(list(frames), fps=20)
-        path_ = log_dir+"/sim-libero-90-"+str(task_id)+"-"+str(iter_)+".mp4"
-        clip.write_videofile(path_, fps=20)
         if not cfg.testing:
             wandb.log({"example": wandb.Video(path_)})
         env.close()
 import hydra
 from omegaconf import DictConfig
-from mini_grp import *
-@hydra.main(config_path="./conf", config_name="libero-simpleEnv-64pix-pose")
 def my_main(cfg: DictConfig):
     from mini_shuffel_buffer import CircularBuffer
     import torch
@@ -237,7 +276,8 @@ def my_main(cfg: DictConfig):
     # model_ = torch.load("/home/gberseth/playground/mini_grp/miniGRP.pth")
     model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
     print ("Loading model from:", model_dir)
-    model_ = torch.load(model_dir)
     # model_._cgf = cfg
     tokenizer = None

+import dill
+import numpy as np
+import torch
 def get_text_tokens(cfg, tokenizer, text_model, goal, model=None):
     """
         obs, reset_info = env.reset()
         obs_ = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)[:,:,:3]
         obs_hist = deque(maxlen=cfg.policy.obs_stacking)
+        for _ in range(cfg.policy.obs_stacking):
+            obs_hist.append(obs_)
         instruction = env_unwrapped.get_language_instruction()
         # print("Reset info", reset_info)
         print("Instruction", instruction)
         frames = []
         done, truncated, timeLimit, t = False, False, 100, 0
         txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
+        # obs_hist.append(image) ## Add the new observation to the history buffer
         while not (done or truncated or (t > timeLimit)):
             # action[:3]: delta xyz; action[3:6]: delta rotation in axis-angle representation;
             # action[6:7]: gripper (the meaning of open / close depends on robot URDF)
             # obs = [obs_["image"] for obs_ in obs] # obs is a list of dicts
             image = np.stack(obs_hist, axis=-1)  # stack along the last dimension
             image = rearrange(image, 'h w c t -> h w (c t)')  # add batch dimension
+            obs_state = torch.tensor(model.preprocess_state(image), dtype=torch.float32)
+            goal_state = torch.tensor(model.preprocess_goal_image(image[:,:,:3]), dtype=torch.float32)
             action, loss = model.forward(torch.tensor(obs_state.unsqueeze(0), dtype=torch.float32).to(device)
                                 ,torch.tensor(txt_goal).to(device)
                                 ,torch.tensor(goal_state.unsqueeze(0), dtype=torch.float32).to(device),
                                 )
             action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
+            ## If the actions are stacked into a longer vector execute the sequence of actions
+            for step_ in range(cfg.policy.action_stacking):
+                act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
+                obs, reward, done, truncated, info = env.step(act_)
+                image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
+                image = image[:,:,:3] ## Remove last dimension of image color
+                # Store the original image for video before stacking/processing
+                frames.append(image)
+                reward = -(np.linalg.norm(info["eof_to_obj1_diff"]) + np.linalg.norm(info["eof_to_obj1_diff"])) ## Use a shaped reward as distance between gripper and objects
+                rewards.append(reward)
+                t=t+1
+                if done or truncated:
+                    break
     episode_stats = info.get('episode_stats', {})
     episode_stats['rewards'] = np.mean(rewards)
     # print("Episode stats", episode_stats)
+    print(f"avg reward {np.mean(episode_stats['rewards']):.8f}")
     if not cfg.testing:
         wandb.log({"avg reward": np.mean(rewards)})
+    import os
+    path_ = os.path.join(log_dir, f"simple-env-{iter_}.mp4")
+    import imageio
+    imageio.mimsave(path_, frames, fps=20)
     if not cfg.testing:
+        try:
+            wandb.log({"example": wandb.Video(path_)})
+        except Exception as e:
+            print(f"Warning: failed to log video to wandb: {e}")
     return episode_stats
 import gymnasium as gym
     from libero.libero.utils import get_libero_path
     from gymnasium.wrappers import FrameStackObservation
     from einops import rearrange
+    from collections import deque
     benchmark_dict = benchmark.get_benchmark_dict()
         env.set_init_state(init_states[init_state_id])
         env = FrameStackObservation(DictWrapper(env, obs_key="agentview_image"), cfg.policy.obs_stacking) ## Stacking the observations
         obs, info = env.reset()
+        # obs_hist = deque(maxlen=cfg.policy.obs_stacking)
+        # for _ in range(cfg.policy.obs_stacking):
+        #     obs_hist.append(obs)
         mask = get_blocked_mask(cfg, targets=None, T=0) ## Get the blocked mask
         frames = []
         rewards = []
         infos = []
+        done, truncated, timeLimit, t, wait_steps = False, False, 400, 0, 10
+        while not (done or truncated or (t > (timeLimit + wait_steps))):
             ## Reshape the image to the correct size and stack the hostory on the last channel dimension
+            # image = obs[0]
+            if t < wait_steps: ## let object stabalize before acting.
+                obs, reward, done, truncated, info = env.step([0,0,0,0,0,0,-1])
+                # obs_hist.append(obs)
+                t += 1
+                continue
             # obs = obs.reshape((128, 128, 3*cfg.policy.obs_stacking)) ## Assuming the observation is an image of size 128x128 with 3 color channels
             obs = rearrange(obs, 't h w c -> h w (t c)', c=3, t=cfg.policy.obs_stacking) ## Rearranging the image to have the stacked history in the last channel dimension
             # image = obs[:,:,:3] ## Remove the last dimension of the image color
                         pose=torch.tensor([[np.concatenate( (info["robot0_eef_pos"],
                                                            info["robot0_eef_quat"][:3],
                                                             [(info["robot0_gripper_qpos"][0] - info["robot0_gripper_qpos"][0]) < 0.005 ]), axis=-1)]], dtype=torch.float32).to(device),
+                        # morphology=torch.tensor([0], dtype=torch.uint8).to(device) ## Morphology is 0 for arm, 1 for A1}
                         )
+            action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
+            ## If the actions are stacked into a longer vector execute the sequence of actions
+            for step_ in range(cfg.policy.action_stacking):
+                act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
+                ## Need to process LIBERO gripper action [0, 1] -> [-1, 1], then invert, https://github.com/moojink/openvla-oft/blob/e4287e94541f459edc4feabc4e181f537cd569a8/experiments/robot/libero/run_libero_eval.py#L265
+                act_[6] = ((act_[6] - 0.5) * 2) * -1.0
+                obs, reward, done, truncated, info = env.step(act_)
+                # image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
+                # image = image[:,:,:3] ## Remove last dimension of image color
+                # Store the original image for video before stacking/processing
+                image = obs[0]
+                frames.append(image)
+                # reward = -(np.linalg.norm(info["eof_to_obj1_diff"]) + np.linalg.norm(info["eof_to_obj1_diff"])) ## Use a shaped reward as distance between gripper and objects
+                rewards.append(reward)
+                infos.append(info)
+                t=t+1
+                if done or truncated:
+                    break
             if done:
                 print("Episode finished after {} timesteps".format(step_))
                 break
+        episode_stats = info.get('episode_stats', {})
+        episode_stats['rewards'] = np.mean(rewards)
         print(f"avg reward {np.mean(rewards):.8f}")
         if not cfg.testing:
             wandb.log({"avg reward_"+str(task_id): np.mean(rewards)})
+        import os
+        path_ = os.path.join(log_dir, f"libero-{iter_}.mp4")
+        import imageio
+        imageio.mimsave(path_, frames, fps=20)
         if not cfg.testing:
             wandb.log({"example": wandb.Video(path_)})
         env.close()
+        return episode_stats
 import hydra
 from omegaconf import DictConfig
+@hydra.main(config_path="./conf", config_name="64pix-pose")
 def my_main(cfg: DictConfig):
     from mini_shuffel_buffer import CircularBuffer
     import torch
     # model_ = torch.load("/home/gberseth/playground/mini_grp/miniGRP.pth")
     model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
     print ("Loading model from:", model_dir)
+    from grp_model import GRP
+    model_ = torch.load(model_dir, pickle_module=dill)
     # model_._cgf = cfg
     tokenizer = None