Neo-X commited on
Commit
3e934a0
·
1 Parent(s): e18f7ef

Updating simulation and adding live evaluation loop.

Browse files
Files changed (4) hide show
  1. DockerFile → Dockerfile +36 -10
  2. README.md +9 -1
  3. main.py +19 -0
  4. sim_eval.py +196 -80
DockerFile → Dockerfile RENAMED
@@ -1,11 +1,16 @@
1
  # Base container that includes all dependencies but not the actual repo
2
  # Updated from templates in the [softlearning (SAC) library](https://github.com/rail-berkeley/softlearning)
3
 
 
 
4
  FROM dsalvat1/cudagl:12.3.1-runtime-ubuntu22.04
5
-
 
 
6
 
7
  SHELL ["/bin/bash", "-c"]
8
 
 
9
  ENV DEBIAN_FRONTEND="noninteractive"
10
  # See http://bugs.python.org/issue19846
11
  ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
@@ -59,16 +64,19 @@ RUN source activate roble
59
  # Set the working directory for your application
60
  WORKDIR /playground
61
 
 
62
  ## Install the requirements for your learning code.
63
  COPY requirements.txt requirements.txt
64
  RUN pip install -r requirements.txt
 
 
65
 
66
  ## Install pytorch and cuda
67
  RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
68
 
69
  ## Install simulators simpleEnv
70
- RUN apt-get update && apt-get install -y --no-install-recommends git cmake build-essential libgl1 libglib2.0-0 libsm6 libxext6 libxrender1
71
- RUN pip install cmake==3.24.3
72
  RUN git clone https://github.com/milarobotlearningcourse/SimplerEnv --recurse-submodules
73
  ## Change directory to SimplerEnv and install ManiSkill2 and ManiSkill2_real2sim
74
  # RUN cd SimplerEnv/ManiSkill2
@@ -96,12 +104,30 @@ ENV NVIDIA_DRIVER_CAPABILITIES=all
96
  ENV SAP_NO_GUI=1
97
  ENV DISPLAY=:0
98
 
 
 
 
99
  ## Install Libero
100
  # RUN pip install cmake==3.24.3
101
- # RUN
102
-
103
- ## Check the file were copied
104
- RUN ls
105
- COPY --link . /playground
106
-
107
- ENTRYPOINT [ "python" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Base container that includes all dependencies but not the actual repo
2
  # Updated from templates in the [softlearning (SAC) library](https://github.com/rail-berkeley/softlearning)
3
 
4
+ # FROM nvidia/cuda:12.9.1-cudnn-runtime-ubuntu24.04
5
+ # FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
6
  FROM dsalvat1/cudagl:12.3.1-runtime-ubuntu22.04
7
+ # FROM nvidia/cuda:11.6.2-runtime-ubuntu20.04 as base
8
+ # ARCH and CUDA are specified again because the FROM directive resets ARGs
9
+ # (but their default value is retained if set previously)
10
 
11
  SHELL ["/bin/bash", "-c"]
12
 
13
+
14
  ENV DEBIAN_FRONTEND="noninteractive"
15
  # See http://bugs.python.org/issue19846
16
  ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
 
64
  # Set the working directory for your application
65
  WORKDIR /playground
66
 
67
+ RUN apt-get update && apt-get install -y --no-install-recommends git cmake build-essential libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 ffmpeg libx264-dev
68
  ## Install the requirements for your learning code.
69
  COPY requirements.txt requirements.txt
70
  RUN pip install -r requirements.txt
71
+ # RUN pip freeze
72
+ # RUN python -c "import libero"
73
 
74
  ## Install pytorch and cuda
75
  RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
76
 
77
  ## Install simulators simpleEnv
78
+ # RUN pip install cmake==3.24.3
79
+ RUN conda install -c conda-forge cmake
80
  RUN git clone https://github.com/milarobotlearningcourse/SimplerEnv --recurse-submodules
81
  ## Change directory to SimplerEnv and install ManiSkill2 and ManiSkill2_real2sim
82
  # RUN cd SimplerEnv/ManiSkill2
 
104
  ENV SAP_NO_GUI=1
105
  ENV DISPLAY=:0
106
 
107
+ # Add LIBERO to Python path
108
+ # ENV PYTHONPATH=/playground/LIBERO:$PYTHONPATH
109
+
110
  ## Install Libero
111
  # RUN pip install cmake==3.24.3
112
+ RUN apt-get update && apt-get install -y --no-install-recommends \
113
+ cmake \
114
+ libglvnd-dev \
115
+ libgl1-mesa-dev \
116
+ libegl1-mesa-dev \
117
+ libgles2-mesa-dev \
118
+ libgbm-dev \
119
+ build-essential \
120
+ && rm -rf /var/lib/apt/lists/*
121
+
122
+
123
+ RUN git clone https://github.com/montrealrobotics/LIBERO.git
124
+ # COPY --link ./LIBERO /playground/LIBERO
125
+ RUN pip install -r ./LIBERO/requirements.txt
126
+ RUN pip install -e ./LIBERO
127
+ # ENV PYTHONPATH=/playground/LIBERO:$PYTHONPATH
128
+ RUN python -c "import libero"
129
+
130
+ # COPY --link . /playground
131
+
132
+ # ENTRYPOINT [ "python" ]
133
+ CMD
README.md CHANGED
@@ -14,4 +14,12 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
14
  Submit your files in the form
15
 
16
  miniGRP.pth
17
- conf/config.yaml
 
 
 
 
 
 
 
 
 
14
  Submit your files in the form
15
 
16
  miniGRP.pth
17
+ conf/config.yaml
18
+ grp_model.py
19
+
20
+
21
+ ## Build the Docker File
22
+
23
+ ```
24
+ docker build -t ghcr.io/neo-x/mini-grp/roble:latest .
25
+ ```
main.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import gymnasium as gym
3
+ # from stable_baselines3 import PPO
4
+ from huggingface_hub import HfApi, snapshot_download, login
5
+ import pandas as pd
6
+ import os
7
+ import shutil
8
+ import time
9
+
10
+ # Authenticate
11
+ # login(token=HF_TOKEN)
12
+ api = HfApi()
13
+ from app import run_evaluation_loop
14
+
15
+ if __name__ == "__main__":
16
+ # while True:
17
+ log = run_evaluation_loop()
18
+ print(log)
19
+ # time.sleep(60) # Check every 60 seconds
sim_eval.py CHANGED
@@ -1,5 +1,6 @@
1
 
2
  import dill
 
3
  import numpy as np
4
  import torch
5
 
@@ -46,6 +47,7 @@ def eval_model_in_sim(cfg, model, device, log_dir, env, env_unwrapped,
46
  obs, reset_info = env.reset()
47
  obs_ = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)[:,:,:3]
48
  obs_hist = deque(maxlen=cfg.policy.obs_stacking)
 
49
  for _ in range(cfg.policy.obs_stacking):
50
  obs_hist.append(obs_)
51
  instruction = env_unwrapped.get_language_instruction()
@@ -64,14 +66,22 @@ def eval_model_in_sim(cfg, model, device, log_dir, env, env_unwrapped,
64
 
65
  obs_state = torch.tensor(model.preprocess_state(image), dtype=torch.float32)
66
  goal_state = torch.tensor(model.preprocess_goal_image(image[:,:,:3]), dtype=torch.float32)
 
 
 
 
 
 
67
  action, loss = model.forward(torch.tensor(obs_state.unsqueeze(0), dtype=torch.float32).to(device)
68
  ,torch.tensor(txt_goal).to(device)
69
  ,torch.tensor(goal_state.unsqueeze(0), dtype=torch.float32).to(device),
70
  mask_=True, ## Masks goal image
71
  pose=torch.tensor([[obs["extra"]["tcp_pose"]]], dtype=torch.float32).to(device),
 
72
  )
73
 
74
  action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
 
75
  ## If the actions are stacked into a longer vector execute the sequence of actions
76
  for step_ in range(cfg.policy.action_stacking):
77
  act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
@@ -120,7 +130,7 @@ class DictWrapper(gym.ObservationWrapper):
120
  self.observation_space = gym.spaces.Box(
121
  low=0,
122
  high=255,
123
- shape=(128,128,3), # Assuming the observation is an image of size 128x128 with 3 color channels
124
  dtype=np.uint8)
125
  self._obs_key = obs_key
126
 
@@ -161,8 +171,19 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
161
 
162
 
163
  benchmark_dict = benchmark.get_benchmark_dict()
164
- task_suite_name = "libero_90" # can also choose libero_spatial, libero_object, etc.
165
  task_suite = benchmark_dict[task_suite_name]()
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # retrieve a specific task
168
  tasks = cfg.sim.eval_tasks
@@ -177,94 +198,180 @@ def eval_libero(model, device, cfg, iter_=0, log_dir="./",
177
  # step over the environment
178
  env_args = {
179
  "bddl_file_name": task_bddl_file,
180
- "camera_heights": 128,
181
- "camera_widths": 128
182
  }
183
- env = DenseRewardEnv(**env_args)
184
  env.seed(0)
185
- init_states = task_suite.get_task_init_states(task_id) # for benchmarking purpose, we fix the a set of initial states
186
- init_state_id = 0
187
- env.set_init_state(init_states[init_state_id])
188
- env = FrameStackObservation(DictWrapper(env, obs_key="agentview_image"), cfg.policy.obs_stacking) ## Stacking the observations
189
- obs, info = env.reset()
190
- # obs_hist = deque(maxlen=cfg.policy.obs_stacking)
191
- # for _ in range(cfg.policy.obs_stacking):
192
- # obs_hist.append(obs)
193
-
194
- mask = get_blocked_mask(cfg, targets=None, T=0) ## Get the blocked mask
195
 
196
- txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
197
- image_goal = obs.reshape((128, 128, 3*cfg.policy.obs_stacking))[:,:,:3] ## Assuming the observation is an image of size 128x128 with 3 color channels
198
- frames = []
199
- rewards = []
200
- infos = []
201
- done, truncated, timeLimit, t, wait_steps = False, False, 400, 0, 10
202
- while not (done or truncated or (t > (timeLimit + wait_steps))):
203
- ## Reshape the image to the correct size and stack the hostory on the last channel dimension
204
- # image = obs[0]
205
- if t < wait_steps: ## let object stabalize before acting.
206
- obs, reward, done, truncated, info = env.step([0,0,0,0,0,0,-1])
207
- # obs_hist.append(obs)
208
- t += 1
209
- continue
210
- # obs = obs.reshape((128, 128, 3*cfg.policy.obs_stacking)) ## Assuming the observation is an image of size 128x128 with 3 color channels
211
- obs = rearrange(obs, 't h w c -> h w (t c)', c=3, t=cfg.policy.obs_stacking) ## Rearranging the image to have the stacked history in the last channel dimension
212
- # image = obs[:,:,:3] ## Remove the last dimension of the image color
213
- obs_state = model.preprocess_state(obs)
214
- goal_state = model.preprocess_goal_image(image_goal)
215
- action, loss = model.forward(torch.tensor(np.array([obs_state])).to(device)
216
- ,torch.tensor(txt_goal).to(device)
217
- ,torch.tensor(np.array([goal_state])).to(device), ## Not the correct goal image... Should mask this.
218
- mask_=True,
219
- pose=torch.tensor([[np.concatenate( (info["robot0_eef_pos"],
220
- info["robot0_eef_quat"][:3],
221
- [(info["robot0_gripper_qpos"][0] - info["robot0_gripper_qpos"][0]) < 0.005 ]), axis=-1)]], dtype=torch.float32).to(device),
222
- # morphology=torch.tensor([0], dtype=torch.uint8).to(device) ## Morphology is 0 for arm, 1 for A1}
223
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
 
225
- action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
226
- ## If the actions are stacked into a longer vector execute the sequence of actions
227
- for step_ in range(cfg.policy.action_stacking):
228
- act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
229
- ## Need to process LIBERO gripper action [0, 1] -> [-1, 1], then invert, https://github.com/moojink/openvla-oft/blob/e4287e94541f459edc4feabc4e181f537cd569a8/experiments/robot/libero/run_libero_eval.py#L265
230
- act_[6] = ((act_[6] - 0.5) * 2) * -1.0
 
 
 
 
 
 
 
 
 
 
231
 
232
- obs, reward, done, truncated, info = env.step(act_)
233
- # image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
234
- # image = image[:,:,:3] ## Remove last dimension of image color
235
- # Store the original image for video before stacking/processing
236
- image = obs[0]
237
- frames.append(image)
238
- # reward = -(np.linalg.norm(info["eof_to_obj1_diff"]) + np.linalg.norm(info["eof_to_obj1_diff"])) ## Use a shaped reward as distance between gripper and objects
239
- rewards.append(reward)
240
- infos.append(info)
241
- t=t+1
242
- if done or truncated:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  break
244
- if done:
245
- print("Episode finished after {} timesteps".format(step_))
246
- break
247
-
248
- episode_stats = info.get('episode_stats', {})
249
- episode_stats['rewards'] = np.mean(rewards)
250
- print(f"avg reward {np.mean(rewards):.8f}")
251
- if not cfg.testing:
252
- wandb.log({"avg reward_"+str(task_id): np.mean(rewards)})
253
- import os
254
- path_ = os.path.join(log_dir, f"libero-{iter_}.mp4")
255
- import imageio
256
- imageio.mimsave(path_, frames, fps=20)
257
- if not cfg.testing:
258
- wandb.log({"example": wandb.Video(path_)})
259
- env.close()
260
- return episode_stats
 
 
 
261
 
262
  import hydra
263
  from omegaconf import DictConfig
264
 
265
  @hydra.main(config_path="./conf", config_name="64pix-pose")
266
  def my_main(cfg: DictConfig):
267
- from mini_shuffel_buffer import CircularBuffer
268
  import torch
269
  # ------------
270
  # Train and test splits
@@ -276,8 +383,17 @@ def my_main(cfg: DictConfig):
276
  # model_ = torch.load("/home/gberseth/playground/mini_grp/miniGRP.pth")
277
  model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
278
  print ("Loading model from:", model_dir)
279
- from grp_model import GRP
280
- model_ = torch.load(model_dir, pickle_module=dill)
 
 
 
 
 
 
 
 
 
281
  # model_._cgf = cfg
282
 
283
  tokenizer = None
 
1
 
2
  import dill
3
+ import h5py
4
  import numpy as np
5
  import torch
6
 
 
47
  obs, reset_info = env.reset()
48
  obs_ = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)[:,:,:3]
49
  obs_hist = deque(maxlen=cfg.policy.obs_stacking)
50
+ last_action = np.zeros(cfg.action_dim) # Track last action taken
51
  for _ in range(cfg.policy.obs_stacking):
52
  obs_hist.append(obs_)
53
  instruction = env_unwrapped.get_language_instruction()
 
66
 
67
  obs_state = torch.tensor(model.preprocess_state(image), dtype=torch.float32)
68
  goal_state = torch.tensor(model.preprocess_goal_image(image[:,:,:3]), dtype=torch.float32)
69
+
70
+ # Prepare last_action tensor if available
71
+ last_action_tensor = None
72
+ if last_action is not None:
73
+ last_action_tensor = torch.tensor(last_action[:cfg.action_dim], dtype=torch.float32).unsqueeze(0).to(device)
74
+
75
  action, loss = model.forward(torch.tensor(obs_state.unsqueeze(0), dtype=torch.float32).to(device)
76
  ,torch.tensor(txt_goal).to(device)
77
  ,torch.tensor(goal_state.unsqueeze(0), dtype=torch.float32).to(device),
78
  mask_=True, ## Masks goal image
79
  pose=torch.tensor([[obs["extra"]["tcp_pose"]]], dtype=torch.float32).to(device),
80
+ last_action=last_action_tensor,
81
  )
82
 
83
  action = model.decode_action(action[0]).cpu().detach().numpy() ## Add in the gripper close action
84
+ last_action = action.copy() # Store for next iteration
85
  ## If the actions are stacked into a longer vector execute the sequence of actions
86
  for step_ in range(cfg.policy.action_stacking):
87
  act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
 
130
  self.observation_space = gym.spaces.Box(
131
  low=0,
132
  high=255,
133
+ shape=(256,256,3), # Assuming the observation is an image of size 256x256 with 3 color channels
134
  dtype=np.uint8)
135
  self._obs_key = obs_key
136
 
 
171
 
172
 
173
  benchmark_dict = benchmark.get_benchmark_dict()
174
+ task_suite_name = cfg.sim.task_set # can also choose libero_spatial, libero_object, etc.
175
  task_suite = benchmark_dict[task_suite_name]()
176
+
177
+ # Load initial states and goal images from Hugging Face dataset if provided
178
+ init_states_dataset = None
179
+ if hasattr(cfg.sim, 'libero_init_state_hf_repo') and cfg.sim.libero_init_state_hf_repo:
180
+ print(f"Loading initial states from Hugging Face: {cfg.sim.libero_init_state_hf_repo}")
181
+ from datasets import load_dataset
182
+ init_states_dataset = load_dataset(cfg.sim.libero_init_state_hf_repo, split='train')
183
+ print(f"Loaded dataset with {len(init_states_dataset)} entries")
184
+ elif hasattr(cfg.sim, 'libero_init_state_file') and cfg.sim.libero_init_state_file:
185
+ print(f"Loading initial states from HDF5: {cfg.sim.libero_init_state_file}")
186
+ init_states_dataset = h5py.File(hydra.utils.get_original_cwd()+cfg.sim.libero_init_state_file, 'r')
187
 
188
  # retrieve a specific task
189
  tasks = cfg.sim.eval_tasks
 
198
  # step over the environment
199
  env_args = {
200
  "bddl_file_name": task_bddl_file,
201
+ "camera_heights": 256,
202
+ "camera_widths": 256
203
  }
204
+ env = DenseRewardEnv(**env_args) # env = OffScreenRenderEnv(**env_args)
205
  env.seed(0)
 
 
 
 
 
 
 
 
 
 
206
 
207
+ # Load initial states from dataset if available, otherwise use default
208
+ task_description = instruction.replace(" ", "_")
209
+ task_demos = None
210
+ if init_states_dataset is not None:
211
+ if isinstance(init_states_dataset, h5py.File):
212
+ # HDF5 format
213
+ if task_description in init_states_dataset:
214
+ task_grp = init_states_dataset[task_description]
215
+ num_init_states = len(task_grp.keys())
216
+ print(f"Loaded {num_init_states} initial states from HDF5 for task: {task_description}")
217
+ else:
218
+ task_grp = None
219
+ init_states = task_suite.get_task_init_states(task_id)
220
+ num_init_states = len(init_states)
221
+ print(f"Using default initial states for task: {task_description}")
222
+ else:
223
+ # Hugging Face dataset format
224
+ task_demos = [item for item in init_states_dataset if item.get('task_description') == task_description]
225
+ num_init_states = len(task_demos)
226
+ if num_init_states > 0:
227
+ print(f"Loaded {num_init_states} initial states from HF dataset for task: {task_description}")
228
+ else:
229
+ init_states = task_suite.get_task_init_states(task_id)
230
+ num_init_states = len(init_states)
231
+ print(f"Using default initial states for task: {task_description}")
232
+ else:
233
+ init_states = task_suite.get_task_init_states(task_id) # for benchmarking purpose, we fix the a set of initial states
234
+ num_init_states = len(init_states)
235
+ print(f"Using default initial states for task: {task_description}")
236
+
237
+ # for init_state_id in range(len(init_states)):
238
+ for init_state_id in range(min(2, num_init_states)): ## Just do a couple different initializations for eval
239
+ # Load init_state and goal_img from dataset or use default
240
+ if init_states_dataset is not None:
241
+ if isinstance(init_states_dataset, h5py.File):
242
+ # HDF5 format
243
+ if task_grp is not None:
244
+ demo_key = f"demo_{init_state_id}"
245
+ if demo_key in task_grp:
246
+ init_state = task_grp[demo_key]['init_state'][()]
247
+ goal_img = task_grp[demo_key]['goal_img'][()] if 'goal_img' in task_grp[demo_key] else None
248
+ print(f"Loaded init_state and goal_img from HDF5 for {demo_key}")
249
+ else:
250
+ init_state = init_states[init_state_id]
251
+ goal_img = None
252
+ else:
253
+ init_state = init_states[init_state_id]
254
+ goal_img = None
255
+ else:
256
+ # Hugging Face dataset format
257
+ if task_demos and init_state_id < len(task_demos):
258
+ demo = task_demos[init_state_id]
259
+ init_state = np.array(demo['init_state'])
260
+ goal_img = np.array(demo['goal_img']) if 'goal_img' in demo and demo['goal_img'] is not None else None
261
+ print(f"Loaded init_state and goal_img from HF dataset for demo {init_state_id}")
262
+ else:
263
+ init_state = init_states[init_state_id]
264
+ goal_img = None
265
+ else:
266
+ init_state = init_states[init_state_id]
267
+ goal_img = None
268
+
269
+ env.reset()
270
+ env.set_init_state(init_state)
271
+ env_ = FrameStackObservation(DictWrapper(env, obs_key="agentview_image"), cfg.policy.obs_stacking) ## Stacking the observations
272
+ obs, info = env_.reset()
273
 
274
+ mask = get_blocked_mask(cfg, targets=None, T=0) ## Get the blocked mask
275
+
276
+ txt_goal = get_text_tokens(cfg, tokenizer, text_model, instruction, model=model)
277
+
278
+ # Use goal image from HDF5 if available, otherwise use first observation
279
+ if goal_img is not None:
280
+ image_goal = goal_img
281
+ print(f"Using goal image from HDF5, shape: {image_goal.shape}")
282
+ else:
283
+ image_goal = obs.reshape((256, 256, 3*cfg.policy.obs_stacking))[:,:,:3]
284
+ print("Using first observation as goal image")
285
+ frames = []
286
+ rewards = []
287
+ infos = []
288
+ last_action = np.zeros(cfg.action_dim) # Track last action taken
289
+ done, truncated, timeLimit, t, wait_steps = False, False, 400, 0, 00
290
 
291
+ while not (done or truncated or (t > (timeLimit + wait_steps))):
292
+ ## Reshape the image to the correct size and stack the hostory on the last channel dimension
293
+ # image = obs[0]
294
+ if t < wait_steps: ## let object stabalize before acting.
295
+ obs, reward, done, truncated, info = env_.step([0,0,0,0,0,0,0])
296
+ t += 1
297
+ continue
298
+ # obs = obs.reshape((128, 128, 3*cfg.policy.obs_stacking)) ## Assuming the observation is an image of size 128x128 with 3 color channels
299
+ obs = rearrange(obs, 't h w c -> h w (t c)', c=3, t=cfg.policy.obs_stacking) ## Rearranging the image to have the stacked history in the last channel dimension
300
+ # image = obs[:,:,:3] ## Remove the last dimension of the image color
301
+ obs_state = model.preprocess_state(obs)
302
+ goal_state = model.preprocess_goal_image(image_goal)
303
+ pose_ = model.encode_pose(torch.tensor([[np.concatenate(
304
+ (info["robot0_eef_pos"],
305
+ info["robot0_eef_quat"][:3],
306
+ [(info["robot0_gripper_qpos"][0])]), axis=-1)]],
307
+ dtype=torch.float32)).to(device)
308
+
309
+ # Prepare last_action tensor if available
310
+ last_action_tensor = None
311
+ if last_action is not None:
312
+ last_action_tensor = model.encode_action(torch.tensor([last_action[:cfg.action_dim]], dtype=torch.float32)).to(device)
313
+
314
+ action, loss = model.forward(torch.tensor(np.array([obs_state])).to(device)
315
+ ,torch.tensor(txt_goal).to(device)
316
+ ,torch.tensor(np.array([goal_state])).to(device),
317
+ mask_=True,
318
+ pose=pose_,
319
+ last_action=last_action_tensor,
320
+ )
321
+
322
+ action = model.decode_action(action[0]).cpu().detach().numpy()
323
+ last_action = action.copy() # Store for next iteration
324
+ ## If the actions are stacked into a longer vector execute the sequence of actions
325
+ for step_ in range(cfg.policy.action_stacking):
326
+ act_ = action[cfg.action_dim*step_:(cfg.action_dim*(step_+1))]
327
+ ## Need to process LIBERO gripper action [0, 1] -> [-1, 1], then invert, https://github.com/moojink/openvla-oft/blob/e4287e94541f459edc4feabc4e181f537cd569a8/experiments/robot/libero/run_libero_eval.py#L265
328
+ ## If the model is the RepayModel don't do this conversion
329
+ # if not hasattr(model, 'trajectory_loaded'):
330
+ # act_[6] = ((act_[6] - 0.5) * 2) # * -1.0
331
+
332
+ obs, reward, done, truncated, info = env_.step(act_)
333
+ # image = get_image_from_maniskill2_obs_dict(env_unwrapped, obs)
334
+ # image = image[:,:,:3] ## Remove last dimension of image color
335
+ # Store the original image for video before stacking/processing
336
+ image = obs[0]
337
+ frames.append(image)
338
+ # reward = -(np.linalg.norm(info["eof_to_obj1_diff"]) + np.linalg.norm(info["eof_to_obj1_diff"])) ## Use a shaped reward as distance between gripper and objects
339
+ rewards.append(reward)
340
+ infos.append(info)
341
+ t=t+1
342
+ # print(f"Step {t}, reward: {reward:.4f}, done: {done}, truncated: {truncated}")
343
+ if done or truncated:
344
+ print("Episode finished with success after {} timesteps".format(step_))
345
+ break
346
+ if done:
347
+ print("Episode finished with success after {} timesteps".format(step_))
348
  break
349
+
350
+ import os
351
+ path_ = os.path.join(log_dir, f"libero-{iter_}-task-id-{task_id}-init-id-{init_state_id}.mp4")
352
+ import imageio
353
+ imageio.mimsave(path_, frames, fps=20)
354
+ episode_stats = info.get('episode_stats', {})
355
+ episode_stats['rewards'] = np.mean(rewards)
356
+ print(f"avg reward {np.mean(rewards):.8f}")
357
+ if not cfg.testing:
358
+ wandb.log({"avg reward_"+str(task_id): np.mean(rewards)})
359
+ if not cfg.testing:
360
+ wandb.log({"example": wandb.Video(path_)})
361
+ env.close()
362
+
363
+ # Close HDF5 file if it was opened
364
+ if init_states_dataset is not None and isinstance(init_states_dataset, h5py.File):
365
+ init_states_dataset.close()
366
+ print("Closed HDF5 file")
367
+
368
+ return episode_stats
369
 
370
  import hydra
371
  from omegaconf import DictConfig
372
 
373
  @hydra.main(config_path="./conf", config_name="64pix-pose")
374
  def my_main(cfg: DictConfig):
 
375
  import torch
376
  # ------------
377
  # Train and test splits
 
383
  # model_ = torch.load("/home/gberseth/playground/mini_grp/miniGRP.pth")
384
  model_dir = hydra.utils.get_original_cwd()+"/mini-grp/miniGRP.pth"
385
  print ("Loading model from:", model_dir)
386
+ if "dataset" == cfg.model.type:
387
+ ## load the dataset
388
+ from mini_shuffel_buffer import CircularBuffer
389
+ from mock_grp_model import ReplayModel
390
+ cfg.dataset.load_dataset = True
391
+ model_ = ReplayModel(cfg)
392
+ dataset_buffer = CircularBuffer(cfg.dataset.buffer_size, cfg, model=model_)
393
+ model_.set_dataset(dataset_buffer)
394
+ else:
395
+ from grp_model import GRP
396
+ model_ = torch.load(model_dir, pickle_module=dill)
397
  # model_._cgf = cfg
398
 
399
  tokenizer = None