lsnu commited on
Commit
2a268bb
·
verified ·
1 Parent(s): b1ef16c

Add files using upload-large-folder tool

Browse files
Files changed (24) hide show
  1. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229659.e52be9725b2e.1397.0 +3 -0
  2. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229898.e52be9725b2e.3678.0 +3 -0
  3. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229987.e52be9725b2e.3804.0 +3 -0
  4. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230133.e52be9725b2e.3957.0 +3 -0
  5. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230462.e52be9725b2e.4130.0 +3 -0
  6. artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt +3 -0
  7. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230948.e52be9725b2e.4400.0 +3 -0
  8. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231048.e52be9725b2e.4582.0 +3 -0
  9. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231173.e52be9725b2e.4703.0 +3 -0
  10. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231301.e52be9725b2e.4824.0 +3 -0
  11. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231438.e52be9725b2e.4951.0 +3 -0
  12. artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt +3 -0
  13. artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt +3 -0
  14. artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt +3 -0
  15. artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt +3 -0
  16. artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt +3 -0
  17. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt +3 -0
  18. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt +3 -0
  19. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt +3 -0
  20. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt +3 -0
  21. code/reveal_vla_bimanual/sim_rlbench/__pycache__/dataset.cpython-310.pyc +0 -0
  22. code/reveal_vla_bimanual/sim_rlbench/__pycache__/generate_smoke_dataset.cpython-310.pyc +0 -0
  23. code/reveal_vla_bimanual/sim_rlbench/dataset.py +312 -0
  24. code/reveal_vla_bimanual/sim_rlbench/launch_smoke.py +93 -0
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229659.e52be9725b2e.1397.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de379324d03cd07826de977c88b5b0af260f56775bac9274dbf4a7fa137206d
3
+ size 88
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229898.e52be9725b2e.3678.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe9cbe8ab18dd6bebc242d4fa4f080708080db28723815106734ce828d59aeb
3
+ size 88
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229987.e52be9725b2e.3804.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44120d28c2f86549b5eb1f094f75390a1d5c487ec9a9b23cf5dc592f06c4ac75
3
+ size 88
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230133.e52be9725b2e.3957.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ae48bb1181a3bf46fd265ebc5e3bda8ebb738bb60779fef5ac9f5f6e535ee8
3
+ size 88
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230462.e52be9725b2e.4130.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdc64fc04a005fbeae272ae65861ba291f73255668aa8f124c028ccf89402f7d
3
+ size 88
artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:145ce79566eef7d9dff9d15ec3cec6636eaed8c1a6981adb20279e14e0d9b141
3
+ size 332572639
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230948.e52be9725b2e.4400.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4075460938d070e509edd6a261928c25ba15d7b707f086273ece97ac31fb4603
3
+ size 88
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231048.e52be9725b2e.4582.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72479e8a2b6c9cade8ffeb37429a6d8ddc3c0bd9678f7f09a8efd11aceca6458
3
+ size 88
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231173.e52be9725b2e.4703.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a6e4307c460255d3251b4563350e9aa484ca6cfd06af44da2d12d487a28207
3
+ size 88
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231301.e52be9725b2e.4824.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a01b9dbac814fdfb0cb8bc364204ad053dc9f00eb58a9995a76974537f8625f
3
+ size 88
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231438.e52be9725b2e.4951.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:250476f994aca860bf7fa083340dc9303eb5d395949fd61ead6e3f2bb7b7eab3
3
+ size 140
artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2658c0b80b93d2701635898fe5abcc4303809f200920656c3c9a0a2fd4a193cc
3
+ size 332572639
artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb1090a52c89e840335b4736045d6153b8f1b827751d6b38fed2bd8494ed71e3
3
+ size 2720837
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e575c499b6b1595e062d4a374af9fe91ae5104b5acff0fc7cf6e3c71e94b4601
3
+ size 723184260
artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9aaca3280dc735adeac013f471183b09f9fffa203176e2a5a8cbb53f9a0a682
3
+ size 3958745
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af4a8d84b187a34450230256d85c34229679fcdea9ec0c622c2e6a9f66405e41
3
+ size 734374962
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f01315f74b4e069760ff32cd8660fc06236c29f0c2f1c64e2e95d899903c1001
3
+ size 723184644
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3132aee5fbbff4e7c47cf8e870fabc99e8438623aab7de0b2f254cfc342cc81
3
+ size 2720488
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c2e1de7e4246c3da2dd18cc0b4b9227300db701a7ae1291cc9027ec527ad5a
3
+ size 734374898
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6f095148284e79db7b8b114dce45edbc0e98c679992eda8c933ce097eba4a4
3
+ size 3957308
code/reveal_vla_bimanual/sim_rlbench/__pycache__/dataset.cpython-310.pyc ADDED
Binary file (10.8 kB). View file
 
code/reveal_vla_bimanual/sim_rlbench/__pycache__/generate_smoke_dataset.cpython-310.pyc ADDED
Binary file (4.1 kB). View file
 
code/reveal_vla_bimanual/sim_rlbench/dataset.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ import pickle
6
+ from typing import Any, Sequence
7
+
8
+ import numpy as np
9
+ import torch
10
+ from PIL import Image
11
+ from torch.utils.data import Dataset
12
+
13
+
14
+ THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
15
+
16
+
17
+ def _normalize_quaternion_wxyz(quaternion: np.ndarray) -> np.ndarray:
18
+ quaternion = np.asarray(quaternion, dtype=np.float32)
19
+ return quaternion / max(float(np.linalg.norm(quaternion)), 1e-8)
20
+
21
+
22
+ def _xyzw_to_wxyz(quaternion_xyzw: Sequence[float]) -> np.ndarray:
23
+ qx, qy, qz, qw = [float(value) for value in quaternion_xyzw]
24
+ return _normalize_quaternion_wxyz(np.array([qw, qx, qy, qz], dtype=np.float32))
25
+
26
+
27
+ def _wxyz_to_xyzw(quaternion_wxyz: Sequence[float]) -> np.ndarray:
28
+ qw, qx, qy, qz = [float(value) for value in quaternion_wxyz]
29
+ return _normalize_quaternion_wxyz(np.array([qx, qy, qz, qw], dtype=np.float32))
30
+
31
+
32
+ def _quat_multiply_wxyz(lhs: Sequence[float], rhs: Sequence[float]) -> np.ndarray:
33
+ lw, lx, ly, lz = [float(value) for value in lhs]
34
+ rw, rx, ry, rz = [float(value) for value in rhs]
35
+ return _normalize_quaternion_wxyz(
36
+ np.array(
37
+ [
38
+ lw * rw - lx * rx - ly * ry - lz * rz,
39
+ lw * rx + lx * rw + ly * rz - lz * ry,
40
+ lw * ry - lx * rz + ly * rw + lz * rx,
41
+ lw * rz + lx * ry - ly * rx + lz * rw,
42
+ ],
43
+ dtype=np.float32,
44
+ )
45
+ )
46
+
47
+
48
+ def _quat_inverse_wxyz(quaternion_wxyz: Sequence[float]) -> np.ndarray:
49
+ qw, qx, qy, qz = [float(value) for value in quaternion_wxyz]
50
+ return _normalize_quaternion_wxyz(np.array([qw, -qx, -qy, -qz], dtype=np.float32))
51
+
52
+
53
+ def _quat_to_rotvec_wxyz(quaternion_wxyz: Sequence[float]) -> np.ndarray:
54
+ quaternion_wxyz = _normalize_quaternion_wxyz(np.asarray(quaternion_wxyz, dtype=np.float32))
55
+ qw = float(np.clip(quaternion_wxyz[0], -1.0, 1.0))
56
+ xyz = quaternion_wxyz[1:]
57
+ sin_half = float(np.linalg.norm(xyz))
58
+ if sin_half < 1e-8:
59
+ return np.zeros(3, dtype=np.float32)
60
+ angle = 2.0 * np.arctan2(sin_half, qw)
61
+ axis = xyz / sin_half
62
+ return (axis * angle).astype(np.float32)
63
+
64
+
65
+ def _rotvec_to_quat_wxyz(rotvec: Sequence[float]) -> np.ndarray:
66
+ rotvec = np.asarray(rotvec, dtype=np.float32)
67
+ angle = float(np.linalg.norm(rotvec))
68
+ if angle < 1e-8:
69
+ return np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float32)
70
+ axis = rotvec / angle
71
+ half = angle * 0.5
72
+ sin_half = np.sin(half)
73
+ return _normalize_quaternion_wxyz(
74
+ np.array(
75
+ [np.cos(half), axis[0] * sin_half, axis[1] * sin_half, axis[2] * sin_half],
76
+ dtype=np.float32,
77
+ )
78
+ )
79
+
80
+
81
+ def bimanual_proprio_from_obs(
82
+ obs: Any,
83
+ timestep: int,
84
+ episode_length: int,
85
+ target_dim: int = 32,
86
+ ) -> np.ndarray:
87
+ time_feature = np.array(
88
+ [(1.0 - (timestep / float(max(1, episode_length - 1)))) * 2.0 - 1.0],
89
+ dtype=np.float32,
90
+ )
91
+ base = np.concatenate(
92
+ [
93
+ np.asarray(obs.right.gripper_pose, dtype=np.float32),
94
+ np.asarray(obs.left.gripper_pose, dtype=np.float32),
95
+ np.asarray(obs.right.joint_positions, dtype=np.float32),
96
+ np.asarray(obs.left.joint_positions, dtype=np.float32),
97
+ np.array([float(obs.right.gripper_open), float(obs.left.gripper_open)], dtype=np.float32),
98
+ time_feature,
99
+ ],
100
+ axis=0,
101
+ )
102
+ if base.shape[0] >= target_dim:
103
+ return base[:target_dim]
104
+ padding = np.zeros(target_dim - base.shape[0], dtype=np.float32)
105
+ return np.concatenate([base, padding], axis=0)
106
+
107
+
108
+ def delta_action_from_transition(current_obs: Any, next_obs: Any) -> np.ndarray:
109
+ action_parts: list[np.ndarray] = []
110
+ for arm_name in ("right", "left"):
111
+ current_arm = getattr(current_obs, arm_name)
112
+ next_arm = getattr(next_obs, arm_name)
113
+ current_pose = np.asarray(current_arm.gripper_pose, dtype=np.float32)
114
+ next_pose = np.asarray(next_arm.gripper_pose, dtype=np.float32)
115
+ position_delta = next_pose[:3] - current_pose[:3]
116
+ current_quat = _xyzw_to_wxyz(current_pose[3:])
117
+ next_quat = _xyzw_to_wxyz(next_pose[3:])
118
+ delta_quat = _quat_multiply_wxyz(next_quat, _quat_inverse_wxyz(current_quat))
119
+ delta_rotvec = _quat_to_rotvec_wxyz(delta_quat)
120
+ action_parts.append(
121
+ np.concatenate(
122
+ [
123
+ position_delta.astype(np.float32),
124
+ delta_rotvec.astype(np.float32),
125
+ np.array([float(next_arm.gripper_open)], dtype=np.float32),
126
+ ],
127
+ axis=0,
128
+ )
129
+ )
130
+ return np.concatenate(action_parts, axis=0).astype(np.float32)
131
+
132
+
133
+ def absolute_action_from_delta(current_obs: Any, delta_action: Sequence[float], ignore_collisions: bool = True) -> np.ndarray:
134
+ delta_action = np.asarray(delta_action, dtype=np.float32)
135
+ if delta_action.shape != (14,):
136
+ raise ValueError(f"Expected delta action shape (14,), received {delta_action.shape}")
137
+
138
+ env_action: list[float] = []
139
+ for arm_index, arm_name in enumerate(("right", "left")):
140
+ arm = getattr(current_obs, arm_name)
141
+ current_pose = np.asarray(arm.gripper_pose, dtype=np.float32)
142
+ offset = arm_index * 7
143
+ delta_position = delta_action[offset : offset + 3]
144
+ delta_rotvec = delta_action[offset + 3 : offset + 6]
145
+ gripper = float(delta_action[offset + 6] > 0.5)
146
+ current_quat = _xyzw_to_wxyz(current_pose[3:])
147
+ delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
148
+ next_quat = _quat_multiply_wxyz(delta_quat, current_quat)
149
+ next_pose = np.concatenate(
150
+ [
151
+ current_pose[:3] + delta_position,
152
+ _wxyz_to_xyzw(next_quat),
153
+ ],
154
+ axis=0,
155
+ )
156
+ env_action.extend(next_pose.tolist())
157
+ env_action.append(gripper)
158
+ env_action.append(float(ignore_collisions))
159
+ return np.asarray(env_action, dtype=np.float32)
160
+
161
+
162
+ def stack_live_rgb_obs(obs: Any, cameras: Sequence[str] = THREE_CAMERAS, resolution: int = 224) -> torch.Tensor:
163
+ images: list[np.ndarray] = []
164
+ for camera_name in cameras:
165
+ rgb = np.asarray(obs.perception_data[f"{camera_name}_rgb"], dtype=np.uint8)
166
+ image = Image.fromarray(rgb)
167
+ if image.size != (resolution, resolution):
168
+ image = image.resize((resolution, resolution), Image.Resampling.BILINEAR)
169
+ images.append(np.asarray(image, dtype=np.uint8))
170
+ stacked = np.stack(images, axis=0)
171
+ return torch.from_numpy(stacked).permute(0, 3, 1, 2).float() / 255.0
172
+
173
+
174
+ def language_goal_from_episode(episode_dir: Path) -> str:
175
+ description_path = episode_dir / "variation_descriptions.pkl"
176
+ with description_path.open("rb") as handle:
177
+ descriptions = pickle.load(handle)
178
+ if isinstance(descriptions, (list, tuple)) and descriptions:
179
+ return str(descriptions[0])
180
+ return str(descriptions)
181
+
182
+
183
+ def load_episode_observations(episode_dir: Path) -> Any:
184
+ with (episode_dir / "low_dim_obs.pkl").open("rb") as handle:
185
+ return pickle.load(handle)
186
+
187
+
188
+ @dataclass(frozen=True)
189
+ class EpisodeRecord:
190
+ task: str
191
+ episode_index: int
192
+ episode_dir: Path
193
+ language_goal: str
194
+ observations: Any
195
+
196
+
197
+ @dataclass(frozen=True)
198
+ class SampleRecord:
199
+ episode_key: str
200
+ task: str
201
+ episode_index: int
202
+ step_index: int
203
+
204
+
205
+ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
206
+ def __init__(
207
+ self,
208
+ dataset_root: str | Path,
209
+ tasks: Sequence[str],
210
+ episode_indices: Sequence[int],
211
+ resolution: int = 224,
212
+ chunk_size: int = 8,
213
+ proprio_dim: int = 32,
214
+ cameras: Sequence[str] = THREE_CAMERAS,
215
+ max_samples: int | None = None,
216
+ ) -> None:
217
+ self.dataset_root = Path(dataset_root)
218
+ self.tasks = tuple(tasks)
219
+ self.episode_indices = tuple(int(index) for index in episode_indices)
220
+ self.resolution = int(resolution)
221
+ self.chunk_size = int(chunk_size)
222
+ self.proprio_dim = int(proprio_dim)
223
+ self.cameras = tuple(cameras)
224
+ self._episodes: dict[str, EpisodeRecord] = {}
225
+ self._samples: list[SampleRecord] = []
226
+
227
+ for task in self.tasks:
228
+ for episode_index in self.episode_indices:
229
+ episode_dir = self.dataset_root / task / "all_variations" / "episodes" / f"episode{episode_index}"
230
+ if not episode_dir.exists():
231
+ continue
232
+ observations = load_episode_observations(episode_dir)
233
+ episode_key = f"{task}:episode{episode_index}"
234
+ self._episodes[episode_key] = EpisodeRecord(
235
+ task=task,
236
+ episode_index=episode_index,
237
+ episode_dir=episode_dir,
238
+ language_goal=language_goal_from_episode(episode_dir),
239
+ observations=observations,
240
+ )
241
+ for step_index in range(max(0, len(observations) - 1)):
242
+ self._samples.append(
243
+ SampleRecord(
244
+ episode_key=episode_key,
245
+ task=task,
246
+ episode_index=episode_index,
247
+ step_index=step_index,
248
+ )
249
+ )
250
+ if max_samples is not None and len(self._samples) >= max_samples:
251
+ return
252
+
253
+ def __len__(self) -> int:
254
+ return len(self._samples)
255
+
256
+ def _load_rgb_stack(self, episode_dir: Path, step_index: int) -> torch.Tensor:
257
+ frames: list[np.ndarray] = []
258
+ for camera_name in self.cameras:
259
+ image_path = episode_dir / f"{camera_name}_rgb" / f"rgb_{step_index:04d}.png"
260
+ image = Image.open(image_path).convert("RGB")
261
+ if image.size != (self.resolution, self.resolution):
262
+ image = image.resize((self.resolution, self.resolution), Image.Resampling.BILINEAR)
263
+ frames.append(np.asarray(image, dtype=np.uint8))
264
+ stacked = np.stack(frames, axis=0)
265
+ return torch.from_numpy(stacked).permute(0, 3, 1, 2).float() / 255.0
266
+
267
+ def _action_chunk(self, observations: Any, start_index: int) -> torch.Tensor:
268
+ actions: list[np.ndarray] = []
269
+ last_valid = delta_action_from_transition(observations[-2], observations[-1])
270
+ for offset in range(self.chunk_size):
271
+ index = start_index + offset
272
+ if index < len(observations) - 1:
273
+ action = delta_action_from_transition(observations[index], observations[index + 1])
274
+ last_valid = action
275
+ else:
276
+ action = last_valid
277
+ actions.append(action)
278
+ return torch.from_numpy(np.stack(actions, axis=0))
279
+
280
+ def __getitem__(self, index: int) -> dict[str, Any]:
281
+ sample = self._samples[index]
282
+ episode = self._episodes[sample.episode_key]
283
+ observations = episode.observations
284
+ obs = observations[sample.step_index]
285
+ return {
286
+ "images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
287
+ "proprio": torch.from_numpy(
288
+ bimanual_proprio_from_obs(
289
+ obs,
290
+ timestep=sample.step_index,
291
+ episode_length=len(observations),
292
+ target_dim=self.proprio_dim,
293
+ )
294
+ ),
295
+ "texts": episode.language_goal,
296
+ "action_chunk": self._action_chunk(observations, sample.step_index),
297
+ "task": sample.task,
298
+ "episode_index": sample.episode_index,
299
+ "step_index": sample.step_index,
300
+ }
301
+
302
+ def summary(self) -> dict[str, Any]:
303
+ return {
304
+ "dataset_root": str(self.dataset_root),
305
+ "tasks": list(self.tasks),
306
+ "episode_indices": list(self.episode_indices),
307
+ "num_episodes": len(self._episodes),
308
+ "num_samples": len(self._samples),
309
+ "resolution": self.resolution,
310
+ "chunk_size": self.chunk_size,
311
+ "proprio_dim": self.proprio_dim,
312
+ }
code/reveal_vla_bimanual/sim_rlbench/launch_smoke.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+
7
+ import numpy as np
8
+ from helpers.observation_utils import create_obs_config
9
+ from rlbench.action_modes.action_mode import BimanualJointPositionActionMode
10
+ from rlbench.backend.utils import task_file_to_task_class
11
+ from rlbench.environment import Environment
12
+
13
+ from sim_rlbench.obs_adapter import extract_canonical_bimanual_obs
14
+
15
+
16
+ def main() -> None:
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument("--task", default="bimanual_lift_ball")
19
+ parser.add_argument("--resolution", type=int, default=224)
20
+ parser.add_argument("--display", default=None)
21
+ parser.add_argument("--headless", action="store_true", default=True)
22
+ parser.add_argument("--visible", action="store_true")
23
+ args = parser.parse_args()
24
+
25
+ headless = args.headless and not args.visible
26
+ cameras = ["front", "wrist_left", "wrist_right"]
27
+ obs_config = create_obs_config(
28
+ cameras,
29
+ [args.resolution, args.resolution],
30
+ "BIMANUAL_PERACT",
31
+ "bimanual",
32
+ )
33
+ env = Environment(
34
+ action_mode=BimanualJointPositionActionMode(),
35
+ obs_config=obs_config,
36
+ headless=headless,
37
+ robot_setup="dual_panda",
38
+ )
39
+
40
+ try:
41
+ env.launch()
42
+ task_cls = task_file_to_task_class(args.task, bimanual=True)
43
+ task = env.get_task(task_cls)
44
+ descriptions, obs = task.reset()
45
+ canonical = extract_canonical_bimanual_obs(
46
+ obs,
47
+ descriptions[0],
48
+ include_point_cloud=True,
49
+ )
50
+
51
+ right_joint_positions = np.asarray(obs.right.joint_positions, dtype=np.float32)
52
+ left_joint_positions = np.asarray(obs.left.joint_positions, dtype=np.float32)
53
+ right_open = float(obs.right.gripper_open)
54
+ left_open = float(obs.left.gripper_open)
55
+ action = np.concatenate(
56
+ [
57
+ right_joint_positions,
58
+ np.array([right_open], dtype=np.float32),
59
+ left_joint_positions,
60
+ np.array([left_open], dtype=np.float32),
61
+ ],
62
+ axis=0,
63
+ )
64
+ next_obs, reward, done = task.step(action)
65
+
66
+ payload = {
67
+ "display": args.display or os.environ.get("DISPLAY"),
68
+ "headless": headless,
69
+ "task": task.get_name(),
70
+ "description": descriptions[0],
71
+ "rgb_shapes": {k: list(v.shape) for k, v in canonical.rgb.items()},
72
+ "intrinsic_shapes": {
73
+ k: list(v.shape) for k, v in canonical.camera_intrinsics.items()
74
+ },
75
+ "extrinsic_shapes": {
76
+ k: list(v.shape) for k, v in canonical.camera_extrinsics.items()
77
+ },
78
+ "point_cloud_shapes": {
79
+ k: list(v.shape) for k, v in canonical.point_cloud.items()
80
+ },
81
+ "proprio_shape": list(canonical.proprio.shape),
82
+ "action_shape": list(action.shape),
83
+ "reward": float(reward),
84
+ "done": bool(done),
85
+ "front_rgb_shape_after_step": list(next_obs.perception_data["front_rgb"].shape),
86
+ }
87
+ print(json.dumps(payload, indent=2))
88
+ finally:
89
+ env.shutdown()
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()