Neo-X commited on
Commit
f3d825f
·
1 Parent(s): daef8c2

Adding config.yaml to .gitignore and updating app.py to load GRP model and config correctly.

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. README.md +7 -0
  3. app.py +69 -28
  4. requests.csv +2 -0
  5. results.csv +2 -0
  6. sim_eval.py +273 -0
.gitignore CHANGED
@@ -14,3 +14,4 @@ temp.gif
14
  miniGRP.pth
15
  __pycache__/
16
  mini_grp.egg-info/
 
 
14
  miniGRP.pth
15
  __pycache__/
16
  mini_grp.egg-info/
17
+ conf/config.yaml
README.md CHANGED
@@ -9,3 +9,10 @@ short_description: test robot models
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
12
+
13
+
14
+ Submit your files in the form
15
+
16
+ miniGRP.pth
17
+ conf/config.yaml
18
+ grp_model.py
app.py CHANGED
@@ -11,11 +11,10 @@ import time
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
  REQUESTS_DATASET = "gberseth/rl-leaderboard-requests" # REPLACE THIS
13
  RESULTS_DATASET = "gberseth/rl-leaderboard-results" # REPLACE THIS
14
- ENV_NAME = "CartPole-v1" # The Gym environment to evaluate
15
  EVAL_EPISODES = 10 # How many times to run the agent
16
 
17
  # Authenticate
18
- login(token=HF_TOKEN)
19
  api = HfApi()
20
 
21
  def evaluate_policy(model_id):
@@ -27,35 +26,69 @@ def evaluate_policy(model_id):
27
  # 1. Download the model repository
28
  # We look for a file named "ppo_cartpole.zip" or just standard "model.zip"
29
  # Adjust 'allow_patterns' to match what you require users to submit.
30
- repo_path = snapshot_download(repo_id=model_id, allow_patterns=["*.zip"])
31
 
32
  # Find the .zip file in the downloaded folder
33
  model_file = None
34
  for root, dirs, files in os.walk(repo_path):
35
  for file in files:
36
- if file.endswith(".zip"):
37
  model_file = os.path.join(root, file)
38
- break
 
 
 
39
 
40
  if not model_file:
41
- return None, "Error: No .zip model file found in repo."
42
 
43
  # 2. Load the PPO Agent
44
  # custom_objects map may be needed if python versions differ, but usually fine for PPO
45
- from mini_shuffel_buffer import CircularBuffer
46
  import torch
47
  # ------------
48
  # Train and test splits
49
  # Loading data
50
  # create RLDS dataset builder
51
- log_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
 
 
 
52
  cfg.dataset.load_dataset = "skip"
53
- cBuffer = CircularBuffer(cfg.dataset.buffer_size, cfg)
54
- model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
55
- print ("Loading model from:", model_dir)
56
- model_ = torch.load(model_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # model_._cgf = cfg
58
  # model = PPO.load(model_file)
 
59
 
60
  # 3. Run Evaluation Loop
61
 
@@ -67,9 +100,9 @@ def evaluate_policy(model_id):
67
  text_model = T5ForConditionalGeneration.from_pretrained(cfg.dataset.t5_version)
68
 
69
  if "libero" in cfg.simEval:
70
- results = eval_libero(cBuffer, model_.to(cfg.device), device=cfg.device, cfg=cfg,
71
  iter_=0, tokenizer=tokenizer, text_model=text_model, wandb=None,
72
- log_dir=log_dir)
73
  if "simple_env" in cfg.simEval:
74
  import simpler_env
75
  task_name = "widowx_carrot_on_plate" # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
@@ -79,9 +112,10 @@ def evaluate_policy(model_id):
79
  del env
80
  env = simpler_env.make(task_name)
81
  env_unwrapped = env.env.env.env ## Updated gymnasium wrapper adds lots of wrappers.
82
- results = eval_model_in_sim(cfg, model_.to(cfg.device), device=cfg.device, log_dir=log_dir,
 
83
  env=env, env_unwrapped=env_unwrapped,
84
- buffer=cBuffer, wandb=None, iter_=0, tokenizer=tokenizer, text_model=text_model)
85
  print("results:", results)
86
 
87
  # cbuffer.save(cfg.dataset.to_name)
@@ -111,7 +145,8 @@ def run_evaluation_loop():
111
 
112
  # 2. Filter for Pending Submissions
113
  # Assuming columns: [model_id, status, submitted_by]
114
- pending_rows = requests_df[requests_df["status"] == "Pending"]
 
115
 
116
  if len(pending_rows) == 0:
117
  return "No pending submissions."
@@ -167,18 +202,24 @@ def run_evaluation_loop():
167
 
168
  return f"Processed {model_id}: Score {score}"
169
 
170
- # --- GRADIO UI (To keep the Space running) ---
171
- with gr.Blocks() as demo:
172
- gr.Markdown("# RL Evaluation Backend")
173
- gr.Markdown("This space runs in the background to evaluate new submissions.")
174
 
175
- # A button to manually trigger eval (useful for debugging)
176
- eval_btn = gr.Button("Run Evaluator Now")
177
- output = gr.Textbox(label="Logs")
178
 
179
- eval_btn.click(fn=run_evaluation_loop, outputs=output)
180
 
181
- # Auto-run every 60 seconds (requires Gradio 'live' updates or external scheduler)
182
- # In a real deployment, you might use a simplified cron loop or `gradio.Timer`
183
 
184
- demo.queue().launch()
 
 
 
 
 
 
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
  REQUESTS_DATASET = "gberseth/rl-leaderboard-requests" # REPLACE THIS
13
  RESULTS_DATASET = "gberseth/rl-leaderboard-results" # REPLACE THIS
 
14
  EVAL_EPISODES = 10 # How many times to run the agent
15
 
16
  # Authenticate
17
+ # login(token=HF_TOKEN)
18
  api = HfApi()
19
 
20
  def evaluate_policy(model_id):
 
26
  # 1. Download the model repository
27
  # We look for a file named "ppo_cartpole.zip" or just standard "model.zip"
28
  # Adjust 'allow_patterns' to match what you require users to submit.
29
+ repo_path = snapshot_download(repo_id=model_id, allow_patterns=["*.pth", "*.pt", "*.zip", "*.yaml", "*.py"])
30
 
31
  # Find the .zip file in the downloaded folder
32
  model_file = None
33
  for root, dirs, files in os.walk(repo_path):
34
  for file in files:
35
+ if file.endswith(".pth"):
36
  model_file = os.path.join(root, file)
37
+ if file.endswith("model.py"):
38
+ grp_file_path = os.path.join(root, file)
39
+ if file.endswith(".yaml") or file.endswith(".yalm"):
40
+ hydra_config_file_path = os.path.join(root, file)
41
 
42
  if not model_file:
43
+ return None, "Error: No .pth model file found in repo."
44
 
45
  # 2. Load the PPO Agent
46
  # custom_objects map may be needed if python versions differ, but usually fine for PPO
 
47
  import torch
48
  # ------------
49
  # Train and test splits
50
  # Loading data
51
  # create RLDS dataset builder
52
+ # log_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
53
+ ## Load the hydra config
54
+ from omegaconf import OmegaConf
55
+ cfg = OmegaConf.load(hydra_config_file_path)
56
  cfg.dataset.load_dataset = "skip"
57
+ ## load the GRP model from the file doanloaded in the snappshot
58
+ # Dynamically load the module
59
+ import importlib.util, sys
60
+ # module_name = "GRP"
61
+ # spec = importlib.util.spec_from_file_location(module_name, grp_file_path)
62
+ # if spec is None:
63
+ # print(f"Could not find a spec for module {module_name} at {grp_file_path}")
64
+ # return None
65
+
66
+ # module = importlib.util.module_from_spec(spec)
67
+ # sys.modules[module_name] = module
68
+ # try:
69
+ # spec.loader.exec_module(module)
70
+ # print(f"Successfully loaded module: {module_name}")
71
+ # except Exception as e:
72
+ # print(f"Error executing module {module_name}: {e}") # module_name = "GRP"
73
+ # spec = importlib.util.spec_from_file_location(module_name, grp_file_path)
74
+ # if spec is None:
75
+ # print(f"Could not find a spec for module {module_name} at {grp_file_path}")
76
+ # return None
77
+
78
+ # module = importlib.util.module_from_spec(spec)
79
+ # sys.modules[module_name] = module
80
+ # try:
81
+ # spec.loader.exec_module(module)
82
+ # print(f"Successfully loaded module: {module_name}")
83
+ # except Exception as e:
84
+ # print(f"Error executing module {module_name}: {e}")
85
+ sys.path.insert(0, repo_path+"/")
86
+ from grp_model import GRP
87
+
88
+ model_ = torch.load(model_file)
89
  # model_._cgf = cfg
90
  # model = PPO.load(model_file)
91
+ print("Memory used by the model:", torch.cuda.memory_allocated(cfg.device) / 1e6, "MB") ## This to the database later.
92
 
93
  # 3. Run Evaluation Loop
94
 
 
100
  text_model = T5ForConditionalGeneration.from_pretrained(cfg.dataset.t5_version)
101
 
102
  if "libero" in cfg.simEval:
103
+ results = eval_libero(model_.to(cfg.device), device=cfg.device, cfg=cfg,
104
  iter_=0, tokenizer=tokenizer, text_model=text_model, wandb=None,
105
+ log_dir="./")
106
  if "simple_env" in cfg.simEval:
107
  import simpler_env
108
  task_name = "widowx_carrot_on_plate" # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
 
112
  del env
113
  env = simpler_env.make(task_name)
114
  env_unwrapped = env.env.env.env ## Updated gymnasium wrapper adds lots of wrappers.
115
+ from sim_eval import eval_model_in_sim
116
+ results = eval_model_in_sim(cfg, model_.to(cfg.device), device=cfg.device, log_dir="./",
117
  env=env, env_unwrapped=env_unwrapped,
118
+ wandb=None, iter_=0, tokenizer=tokenizer, text_model=text_model)
119
  print("results:", results)
120
 
121
  # cbuffer.save(cfg.dataset.to_name)
 
145
 
146
  # 2. Filter for Pending Submissions
147
  # Assuming columns: [model_id, status, submitted_by]
148
+ # pending_rows = requests_df[requests_df["status"] == "Pending"]
149
+ pending_rows = requests_df[requests_df["status"].isin(["Pending", "In Progress", "Failed"])]
150
 
151
  if len(pending_rows) == 0:
152
  return "No pending submissions."
 
202
 
203
  return f"Processed {model_id}: Score {score}"
204
 
205
+ # # --- GRADIO UI (To keep the Space running) ---
206
+ # with gr.Blocks() as demo:
207
+ # gr.Markdown("# RL Evaluation Backend")
208
+ # gr.Markdown("This space runs in the background to evaluate new submissions.")
209
 
210
+ # # A button to manually trigger eval (useful for debugging)
211
+ # eval_btn = gr.Button("Run Evaluator Now")
212
+ # output = gr.Textbox(label="Logs")
213
 
214
+ # eval_btn.click(fn=run_evaluation_loop, outputs=output)
215
 
216
+ # # Auto-run every 60 seconds (requires Gradio 'live' updates or external scheduler)
217
+ # # In a real deployment, you might use a simplified cron loop or `gradio.Timer`
218
 
219
+ # demo.queue().launch()
220
+
221
+ if __name__ == "__main__":
222
+ # while True:
223
+ log = run_evaluation_loop()
224
+ print(log)
225
+ # time.sleep(60) # Check every 60 seconds
requests.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_id,status,created_at,submitted_by
2
+ gberseth/mini-grp,Done,2026-01-05 23:02:20,
results.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_id,mean_reward,status,completed_at
2
+ gberseth/mini-grp,-0.49810627,Success,
sim_eval.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ def get_text_tokens(cfg, tokenizer, text_model, goal, model=None):
4
+ """
5
+ Get the text tokens/embeddings for the goal.
6
+ If a `model` with `encode_text_goal` is provided, use it so callers don't need a buffer.
7
+ """
8
+ if model is not None:
9
+ return model.encode_text_goal(goal, tokenizer=tokenizer, text_model=text_model)
10
+ # fallback to legacy behaviour
11
+ if cfg.dataset.encode_with_t5:
12
+ goal_ = np.zeros((cfg.max_block_size, cfg.n_embd), dtype=np.float32)
13
+ input_ids = tokenizer(goal, return_tensors="pt").input_ids
14
+ goal_t = text_model.encoder(input_ids).last_hidden_state.detach().cpu().numpy() ## Get the goal embedding
15
+ goal_[:len(goal_t[0]), :] = goal_t[0][:cfg.max_block_size] ## Overwrite just the zeros up to the size of this vector, smaller vectors will have < max_block_size
16
+ else:
17
+ goal_ = " " * cfg.max_block_size
18
+ goal_ = goal[:cfg.max_block_size] + goal_[len(goal):cfg.max_block_size]
19
+ # legacy buffer-based encoding is not available here
20
+ raise RuntimeError("Text encoding without model requires a buffer; pass model into get_text_tokens")
21
+ return np.expand_dims(goal_, axis=0)
22
+
23
+ def get_blocked_mask(cfg, targets=None, T=0):
24
+ ## Compute blocked masks
25
+ c=192 ## Number of patches/channels in the image
26
+ mask = torch.ones((1 + (c * cfg.policy.obs_stacking) + T + c, ), device=cfg.device) ## (1, T)
27
+ if targets is None:
28
+ pass
29
+ elif (torch.rand(1)[0] > 0.66):
30
+ mask[1 + (c * cfg.policy.obs_stacking): 1 + (c * cfg.policy.obs_stacking) + T] = torch.zeros((1,T), device=cfg.device) ## Mask goal string
31
+ elif (torch.rand(1)[0] > 0.33):
32
+ mask[1 + (c * cfg.policy.obs_stacking) + T: 1 + (c * cfg.policy.obs_stacking) + T + c] = torch.zeros((1,c), device=cfg.device) ## Mask goal image
33
+
34
+ def eval_model_in_sim(cfg, model, device, log_dir, env, env_unwrapped,
35
+ wandb, iter_, tokenizer=None, text_model=None):
36
+ from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
37
+ print("Evaluating model in sim environment")
38
+ from collections import deque
39
+ from einops import rearrange
40
+
41
+ rewards = []
42
+ for j in range(cfg.sim.eval_episodes): ## Better to eval over a few different goal configurations
43
+ obs, reset_info = env.reset()
44
+ obs_ = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)[:,:,:3]
45
+ obs_hist = deque(maxlen=cfg.policy.obs_stacking)
46
+ obs_hist.append(obs_)
47
+ obs_hist.append(obs_)
48
+ obs_hist.append(obs_)
49
+ instruction = env_unwrapped.get_language_instruction()
50
+ # print("Reset info", reset_info)
51
+ print("Instruction", instruction)
52
+ frames = []
53
+ done, truncated, timeLimit, t = False, False, 100, 0
54
+ txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
55
+ while not (done or truncated or (t > timeLimit)):
56
+ # action[:3]: delta xyz; action[3:6]: delta rotation in axis-angle representation;
57
+ # action[6:7]: gripper (the meaning of open / close depends on robot URDF)
58
+ image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
59
+ image = image[:,:,:3] ## Remove last dimension of image color
60
+
61
+ obs_hist.append(image) ## Add the new observation to the history buffer
62
+ # obs = [obs_["image"] for obs_ in obs] # obs is a list of dicts
63
+ image = np.stack(obs_hist, axis=-1) # stack along the last dimension
64
+ image = rearrange(image, 'h w c t -> h w (c t)') # add batch dimension
65
+
66
+ obs_state = model.preprocess_state(image).to(device)
67
+ goal_state = model.preprocess_goal_image(image[:,:,:3]).to(device)
68
+ action, loss = model.forward(torch.tensor(obs_state.unsqueeze(0), dtype=torch.float32).to(device)
69
+ ,torch.tensor(txt_goal).to(device)
70
+ ,torch.tensor(goal_state.unsqueeze(0), dtype=torch.float32).to(device),
71
+ mask_=True, ## Masks goal image
72
+ pose=torch.tensor([[obs["extra"]["tcp_pose"]]], dtype=torch.float32).to(device),
73
+ )
74
+
75
+ action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
76
+ obs, reward, done, truncated, info = env.step(action)
77
+ reward = -np.linalg.norm(info["eof_to_obj1_diff"])
78
+ frames.append(image)
79
+ rewards.append(reward)
80
+ t=t+1
81
+
82
+ episode_stats = info.get('episode_stats', {})
83
+ episode_stats['rewards'] = np.mean(rewards)
84
+ # print("Episode stats", episode_stats)
85
+ # print(f"avg reward {np.mean(episode_stats['rewards']):.8f}")
86
+ if not cfg.testing:
87
+ wandb.log({"avg reward": np.mean(rewards)})
88
+ import moviepy.editor as mpy
89
+ clip = mpy.ImageSequenceClip(list(frames), fps=20)
90
+ path_ = log_dir+"/sim-env-"+str(iter_)+".mp4"
91
+ # clip.write_videofile(path_, fps=20, audio=False, logger=None) ## Getting weird Nonetype issues. Will need to fix version issue later.
92
+ if not cfg.testing:
93
+ wandb.log({"example": wandb.Video(path_)})
94
+ return episode_stats
95
+
96
+ import gymnasium as gym
97
+ # --- History Stacking Wrapper ---
98
+ class DictWrapper(gym.ObservationWrapper):
99
+ # from gymnasium.spaces import Box
100
+ """
101
+ A wrapper that grabs the observation from a specific key in the dictionary.
102
+ """
103
+ def __init__(self, env, obs_key=""):
104
+ # gym.Wrapper.__init__(self, env)
105
+ self.env = env
106
+ self.observation_space = gym.spaces.Box(
107
+ low=0,
108
+ high=255,
109
+ shape=(128,128,3), # Assuming the observation is an image of size 128x128 with 3 color channels
110
+ dtype=np.uint8)
111
+ self._obs_key = obs_key
112
+
113
+ def observation(self, observation):
114
+ """
115
+ This method is called by the gym.ObservationWrapper after the environment's
116
+ step or reset methods return an observation.
117
+ """
118
+ # Add the new observation to the history buffer
119
+ return observation[self._obs_key]
120
+
121
+ def step(self, action):
122
+ """
123
+ Step the environment and return the observation from the specified key.
124
+ """
125
+ obs, reward, done, info = self.env.step(action) ## LIBERO does not return truncated
126
+ return obs[self._obs_key][::-1, :, :], reward, done, False, obs ## Not sure why the image was upside down.
127
+
128
+ def reset(self, **kwargs):
129
+ """
130
+ Reset the environment and return the observation from the specified key.
131
+ """
132
+ obs = self.env.reset()
133
+ return obs[self._obs_key][::-1, :, :], obs
134
+
135
+ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
136
+ tokenizer=None, text_model=None, wandb=None):
137
+ # cfg, model, device, log_dir, env, env_unwrapped, buffer,
138
+ # wandb, iter_, tokenizer=None, text_model=None):
139
+
140
+ from libero.libero import benchmark
141
+ from libero.libero.envs import OffScreenRenderEnv, DenseRewardEnv
142
+ import os
143
+ from libero.libero.utils import get_libero_path
144
+ from gymnasium.wrappers import FrameStackObservation
145
+ from einops import rearrange
146
+
147
+
148
+ benchmark_dict = benchmark.get_benchmark_dict()
149
+ task_suite_name = "libero_90" # can also choose libero_spatial, libero_object, etc.
150
+ task_suite = benchmark_dict[task_suite_name]()
151
+
152
+ # retrieve a specific task
153
+ tasks = cfg.sim.eval_tasks
154
+ for task_id in tasks:
155
+ task = task_suite.get_task(task_id)
156
+ task_name = task.name
157
+ instruction = task.language
158
+ task_bddl_file = os.path.join(get_libero_path("bddl_files"), task.problem_folder, task.bddl_file)
159
+ print(f"[info] retrieving task {task_id} from suite {task_suite_name}, the " + \
160
+ f"language instruction is {instruction}, and the bddl file is {task_bddl_file}")
161
+
162
+ # step over the environment
163
+ env_args = {
164
+ "bddl_file_name": task_bddl_file,
165
+ "camera_heights": 128,
166
+ "camera_widths": 128
167
+ }
168
+ env = DenseRewardEnv(**env_args)
169
+ env.seed(0)
170
+ init_states = task_suite.get_task_init_states(task_id) # for benchmarking purpose, we fix the a set of initial states
171
+ init_state_id = 0
172
+ env.set_init_state(init_states[init_state_id])
173
+ env = FrameStackObservation(DictWrapper(env, obs_key="agentview_image"), cfg.policy.obs_stacking) ## Stacking the observations
174
+ obs, info = env.reset()
175
+
176
+ mask = get_blocked_mask(cfg, targets=None, T=0) ## Get the blocked mask
177
+
178
+ txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
179
+ image_goal = obs.reshape((128, 128, 3*cfg.policy.obs_stacking))[:,:,:3] ## Assuming the observation is an image of size 128x128 with 3 color channels
180
+ frames = []
181
+ rewards = []
182
+ infos = []
183
+ for step_ in range(250):
184
+ ## Reshape the image to the correct size and stack the hostory on the last channel dimension
185
+ image = obs[0]
186
+ # obs = obs.reshape((128, 128, 3*cfg.policy.obs_stacking)) ## Assuming the observation is an image of size 128x128 with 3 color channels
187
+ obs = rearrange(obs, 't h w c -> h w (t c)', c=3, t=cfg.policy.obs_stacking) ## Rearranging the image to have the stacked history in the last channel dimension
188
+ # image = obs[:,:,:3] ## Remove the last dimension of the image color
189
+ obs_state = model.preprocess_state(obs)
190
+ goal_state = model.preprocess_goal_image(image_goal)
191
+ action, loss = model.forward(torch.tensor(np.array([obs_state])).to(device)
192
+ ,torch.tensor(txt_goal).to(device)
193
+ ,torch.tensor(np.array([goal_state])).to(device), ## Not the correct goal image... Should mask this.
194
+ mask_=True,
195
+ pose=torch.tensor([[np.concatenate( (info["robot0_eef_pos"],
196
+ info["robot0_eef_quat"][:3],
197
+ [(info["robot0_gripper_qpos"][0] - info["robot0_gripper_qpos"][0]) < 0.005 ]), axis=-1)]], dtype=torch.float32).to(device),
198
+ morphology=torch.tensor([0], dtype=torch.uint8).to(device) ## Morphology is 0 for arm, 1 for A1}
199
+ )
200
+
201
+ action = model.decode_action(action[0,0,:7]).cpu().detach().numpy() ## Add in the gripper close action
202
+ frames.append(image)
203
+ x = env.step(action)
204
+ obs, reward, done, truncated, info = x
205
+ rewards.append(reward)
206
+ infos.append(info)
207
+ if done:
208
+ print("Episode finished after {} timesteps".format(step_))
209
+ break
210
+
211
+ print(f"avg reward {np.mean(rewards):.8f}")
212
+ if not cfg.testing:
213
+ wandb.log({"avg reward_"+str(task_id): np.mean(rewards)})
214
+ import moviepy.editor as mpy
215
+ clip = mpy.ImageSequenceClip(list(frames), fps=20)
216
+ path_ = log_dir+"/sim-libero-90-"+str(task_id)+"-"+str(iter_)+".mp4"
217
+ clip.write_videofile(path_, fps=20)
218
+ if not cfg.testing:
219
+ wandb.log({"example": wandb.Video(path_)})
220
+ env.close()
221
+
222
+ import hydra
223
+ from omegaconf import DictConfig
224
+ from mini_grp import *
225
+
226
+ @hydra.main(config_path="./conf", config_name="libero-simpleEnv-64pix-pose")
227
+ def my_main(cfg: DictConfig):
228
+ from mini_shuffel_buffer import CircularBuffer
229
+ import torch
230
+ # ------------
231
+ # Train and test splits
232
+ # Loading data
233
+ # create RLDS dataset builder
234
+ log_dir = hydra.core.hydra_config.HydraConfig.get().runtime.output_dir
235
+ cfg.dataset.load_dataset = "skip"
236
+ # model = GRP(cfg)
237
+ # model_ = torch.load("/home/gberseth/playground/mini_grp/miniGRP.pth")
238
+ model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
239
+ print ("Loading model from:", model_dir)
240
+ model_ = torch.load(model_dir)
241
+ # model_._cgf = cfg
242
+
243
+ tokenizer = None
244
+ text_model = None
245
+ if cfg.dataset.encode_with_t5: ## Load T5 model
246
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
247
+ tokenizer = T5Tokenizer.from_pretrained(cfg.dataset.t5_version)
248
+ text_model = T5ForConditionalGeneration.from_pretrained(cfg.dataset.t5_version)
249
+
250
+ if "libero" in cfg.simEval:
251
+ results = eval_libero(model_.to(cfg.device), device=cfg.device, cfg=cfg,
252
+ iter_=0, tokenizer=tokenizer, text_model=text_model, wandb=None,
253
+ log_dir=log_dir)
254
+ if "simple_env" in cfg.simEval:
255
+ import simpler_env
256
+ task_name = "widowx_carrot_on_plate" # @param ["google_robot_pick_coke_can", "google_robot_move_near", "google_robot_open_drawer", "google_robot_close_drawer", "widowx_spoon_on_towel", "widowx_carrot_on_plate", "widowx_stack_cube", "widowx_put_eggplant_in_basket"]
257
+ if 'env' in locals():
258
+ print("Closing existing env")
259
+ env.close()
260
+ del env
261
+ env = simpler_env.make(task_name)
262
+ env_unwrapped = env.env.env.env ## Updated gymnasium wrapper adds lots of wrappers.
263
+ results = eval_model_in_sim(cfg, model_.to(cfg.device), device=cfg.device, log_dir=log_dir,
264
+ env=env, env_unwrapped=env_unwrapped,
265
+ wandb=None, iter_=0, tokenizer=tokenizer, text_model=text_model)
266
+ print("results:", results)
267
+
268
+ # cbuffer.save(cfg.dataset.to_name)
269
+
270
+
271
+ if __name__ == "__main__":
272
+ results = my_main()
273
+ print("results:", results)