lsnu commited on
Commit
31ade1f
·
verified ·
1 Parent(s): 9638f58

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_smoke/proposal_alignment_diagnostics.json +358 -0
  2. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md +58 -0
  3. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json +42 -0
  4. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/default/reveal_benchmark.json +0 -0
  5. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/ignore_proposal_logits_in_shortlist/reveal_benchmark.json +0 -0
  6. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json +0 -0
  7. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.md +15 -0
  8. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.json +0 -0
  9. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.md +15 -0
  10. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.json +0 -0
  11. code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.md +15 -0
  12. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/.gitignore +12 -0
  13. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md +84 -0
  14. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md +67 -0
  15. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/upstream_pins.md +24 -0
  16. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/xorg.rtx6000.conf +32 -0
  17. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/reveal310.yaml +38 -0
  18. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/rlbench310.yaml +50 -0
  19. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__init__.py +3 -0
  20. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/ablations.py +7 -0
  21. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compare_rlbench_sweeps.py +143 -0
  22. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py +100 -0
  23. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/protocols.py +41 -0
  24. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_anybimanual_anchor_eval.py +179 -0
  25. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pyproject.toml +32 -0
  26. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__init__.py +15 -0
  27. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/base.py +32 -0
  28. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py +73 -0
  29. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/dataset.py +634 -0
  30. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/generate_dataset.py +50 -0
  31. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py +29 -0
  32. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py +16 -0
  33. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/labels.py +61 -0
  34. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py +1389 -0
  35. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/teachers.py +41 -0
  36. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/base.yaml +20 -0
  37. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml +26 -0
  38. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_base_fast.yaml +92 -0
  39. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_rank_only_rebuild128.yaml +91 -0
  40. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml +97 -0
  41. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml +100 -0
  42. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nodepth.yaml +16 -0
  43. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_noplanner.yaml +16 -0
  44. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_norolesym.yaml +16 -0
  45. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nowm.yaml +16 -0
  46. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_clip.yaml +72 -0
  47. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_dummy.yaml +75 -0
  48. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_clip.yaml +18 -0
  49. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_dummy.yaml +17 -0
  50. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd.yaml +18 -0
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/proposal_alignment_diagnostics_smoke/proposal_alignment_diagnostics.json ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
3
+ "dataset_path": "/workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase.pt",
4
+ "teacher_macro_names_by_task": {
5
+ "foliage": [
6
+ "teacher",
7
+ "pin_canopy",
8
+ "maintain_gap",
9
+ "premature_retrieve",
10
+ "reveal_with_release",
11
+ "wrong_side_reveal",
12
+ "foliage_immediate_reocclusion",
13
+ "over_disturbance"
14
+ ]
15
+ },
16
+ "overall": {
17
+ "samples": 8,
18
+ "teacher_oracle_top1_accuracy": 1.0,
19
+ "proposal_teacher_utility_spearman": 0.9880235200593538,
20
+ "slotwise_reconstruction_mse": [
21
+ 0.030513444915413857,
22
+ 0.12480158358812332,
23
+ 0.12676900625228882,
24
+ 0.23568345606327057,
25
+ 0.1129273921251297,
26
+ 0.18383629620075226,
27
+ 0.11721545457839966,
28
+ 0.24060414731502533
29
+ ],
30
+ "slotwise_best_match_mse": [
31
+ 0.030513444915413857,
32
+ 0.08985880762338638,
33
+ 0.07267005741596222,
34
+ 0.08659016340970993,
35
+ 0.08734967559576035,
36
+ 0.08924143761396408,
37
+ 0.0867096558213234,
38
+ 0.09597492218017578
39
+ ],
40
+ "diagonal_reconstruction_mse": 0.14654386043548584,
41
+ "best_match_reconstruction_mse": 0.0798635184764862,
42
+ "teacher_slot_coverage_rate": [
43
+ 1.0,
44
+ 0.25,
45
+ 0.0,
46
+ 0.0,
47
+ 0.375,
48
+ 0.0,
49
+ 0.0,
50
+ 0.0
51
+ ],
52
+ "proposal_slot_teacher_confusion": [
53
+ [
54
+ 8,
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0,
59
+ 0,
60
+ 0,
61
+ 0
62
+ ],
63
+ [
64
+ 6,
65
+ 2,
66
+ 0,
67
+ 0,
68
+ 0,
69
+ 0,
70
+ 0,
71
+ 0
72
+ ],
73
+ [
74
+ 8,
75
+ 0,
76
+ 0,
77
+ 0,
78
+ 0,
79
+ 0,
80
+ 0,
81
+ 0
82
+ ],
83
+ [
84
+ 8,
85
+ 0,
86
+ 0,
87
+ 0,
88
+ 0,
89
+ 0,
90
+ 0,
91
+ 0
92
+ ],
93
+ [
94
+ 5,
95
+ 0,
96
+ 0,
97
+ 0,
98
+ 3,
99
+ 0,
100
+ 0,
101
+ 0
102
+ ],
103
+ [
104
+ 7,
105
+ 1,
106
+ 0,
107
+ 0,
108
+ 0,
109
+ 0,
110
+ 0,
111
+ 0
112
+ ],
113
+ [
114
+ 5,
115
+ 0,
116
+ 0,
117
+ 0,
118
+ 3,
119
+ 0,
120
+ 0,
121
+ 0
122
+ ],
123
+ [
124
+ 8,
125
+ 0,
126
+ 0,
127
+ 0,
128
+ 0,
129
+ 0,
130
+ 0,
131
+ 0
132
+ ]
133
+ ],
134
+ "proposal_slot_best_teacher_slot": [
135
+ {
136
+ "proposal_slot": 0,
137
+ "best_teacher_slot": 0,
138
+ "best_teacher_slot_rate": 1.0,
139
+ "support": 8
140
+ },
141
+ {
142
+ "proposal_slot": 1,
143
+ "best_teacher_slot": 0,
144
+ "best_teacher_slot_rate": 0.75,
145
+ "support": 8
146
+ },
147
+ {
148
+ "proposal_slot": 2,
149
+ "best_teacher_slot": 0,
150
+ "best_teacher_slot_rate": 1.0,
151
+ "support": 8
152
+ },
153
+ {
154
+ "proposal_slot": 3,
155
+ "best_teacher_slot": 0,
156
+ "best_teacher_slot_rate": 1.0,
157
+ "support": 8
158
+ },
159
+ {
160
+ "proposal_slot": 4,
161
+ "best_teacher_slot": 0,
162
+ "best_teacher_slot_rate": 0.625,
163
+ "support": 8
164
+ },
165
+ {
166
+ "proposal_slot": 5,
167
+ "best_teacher_slot": 0,
168
+ "best_teacher_slot_rate": 0.875,
169
+ "support": 8
170
+ },
171
+ {
172
+ "proposal_slot": 6,
173
+ "best_teacher_slot": 0,
174
+ "best_teacher_slot_rate": 0.625,
175
+ "support": 8
176
+ },
177
+ {
178
+ "proposal_slot": 7,
179
+ "best_teacher_slot": 0,
180
+ "best_teacher_slot_rate": 1.0,
181
+ "support": 8
182
+ }
183
+ ],
184
+ "proposal_candidate_pairwise_l2": 2.573041468858719
185
+ },
186
+ "by_task": {
187
+ "foliage": {
188
+ "samples": 8,
189
+ "teacher_oracle_top1_accuracy": 1.0,
190
+ "proposal_teacher_utility_spearman": 0.9880235200593538,
191
+ "slotwise_reconstruction_mse": [
192
+ 0.030513444915413857,
193
+ 0.12480158358812332,
194
+ 0.12676900625228882,
195
+ 0.23568345606327057,
196
+ 0.1129273921251297,
197
+ 0.18383629620075226,
198
+ 0.11721545457839966,
199
+ 0.24060414731502533
200
+ ],
201
+ "slotwise_best_match_mse": [
202
+ 0.030513444915413857,
203
+ 0.08985880762338638,
204
+ 0.07267005741596222,
205
+ 0.08659016340970993,
206
+ 0.08734967559576035,
207
+ 0.08924143761396408,
208
+ 0.0867096558213234,
209
+ 0.09597492218017578
210
+ ],
211
+ "diagonal_reconstruction_mse": 0.14654386043548584,
212
+ "best_match_reconstruction_mse": 0.0798635184764862,
213
+ "teacher_slot_coverage_rate": [
214
+ 1.0,
215
+ 0.25,
216
+ 0.0,
217
+ 0.0,
218
+ 0.375,
219
+ 0.0,
220
+ 0.0,
221
+ 0.0
222
+ ],
223
+ "proposal_slot_teacher_confusion": [
224
+ [
225
+ 8,
226
+ 0,
227
+ 0,
228
+ 0,
229
+ 0,
230
+ 0,
231
+ 0,
232
+ 0
233
+ ],
234
+ [
235
+ 6,
236
+ 2,
237
+ 0,
238
+ 0,
239
+ 0,
240
+ 0,
241
+ 0,
242
+ 0
243
+ ],
244
+ [
245
+ 8,
246
+ 0,
247
+ 0,
248
+ 0,
249
+ 0,
250
+ 0,
251
+ 0,
252
+ 0
253
+ ],
254
+ [
255
+ 8,
256
+ 0,
257
+ 0,
258
+ 0,
259
+ 0,
260
+ 0,
261
+ 0,
262
+ 0
263
+ ],
264
+ [
265
+ 5,
266
+ 0,
267
+ 0,
268
+ 0,
269
+ 3,
270
+ 0,
271
+ 0,
272
+ 0
273
+ ],
274
+ [
275
+ 7,
276
+ 1,
277
+ 0,
278
+ 0,
279
+ 0,
280
+ 0,
281
+ 0,
282
+ 0
283
+ ],
284
+ [
285
+ 5,
286
+ 0,
287
+ 0,
288
+ 0,
289
+ 3,
290
+ 0,
291
+ 0,
292
+ 0
293
+ ],
294
+ [
295
+ 8,
296
+ 0,
297
+ 0,
298
+ 0,
299
+ 0,
300
+ 0,
301
+ 0,
302
+ 0
303
+ ]
304
+ ],
305
+ "proposal_slot_best_teacher_slot": [
306
+ {
307
+ "proposal_slot": 0,
308
+ "best_teacher_slot": 0,
309
+ "best_teacher_slot_rate": 1.0,
310
+ "support": 8
311
+ },
312
+ {
313
+ "proposal_slot": 1,
314
+ "best_teacher_slot": 0,
315
+ "best_teacher_slot_rate": 0.75,
316
+ "support": 8
317
+ },
318
+ {
319
+ "proposal_slot": 2,
320
+ "best_teacher_slot": 0,
321
+ "best_teacher_slot_rate": 1.0,
322
+ "support": 8
323
+ },
324
+ {
325
+ "proposal_slot": 3,
326
+ "best_teacher_slot": 0,
327
+ "best_teacher_slot_rate": 1.0,
328
+ "support": 8
329
+ },
330
+ {
331
+ "proposal_slot": 4,
332
+ "best_teacher_slot": 0,
333
+ "best_teacher_slot_rate": 0.625,
334
+ "support": 8
335
+ },
336
+ {
337
+ "proposal_slot": 5,
338
+ "best_teacher_slot": 0,
339
+ "best_teacher_slot_rate": 0.875,
340
+ "support": 8
341
+ },
342
+ {
343
+ "proposal_slot": 6,
344
+ "best_teacher_slot": 0,
345
+ "best_teacher_slot_rate": 0.625,
346
+ "support": 8
347
+ },
348
+ {
349
+ "proposal_slot": 7,
350
+ "best_teacher_slot": 0,
351
+ "best_teacher_slot_rate": 1.0,
352
+ "support": 8
353
+ }
354
+ ],
355
+ "proposal_candidate_pairwise_l2": 2.573041468858719
356
+ }
357
+ }
358
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/status.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RVT Overlap Branch Status
2
+
3
+ ## Code Changes
4
+
5
+ - added RVT residual output adapter in `/workspace/VLAarchtests/code/reveal_vla_bimanual/models/rvt_backbone.py`
6
+ - added RVT frozen-stage config in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17.yaml`
7
+ - added RVT upper-layer stage config in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_rvt_100demo_unfreeze_top2_seed17.yaml`
8
+ - made RLBench init checkpoint loading skip incompatible shapes when `init_strict=false` in `/workspace/VLAarchtests/code/reveal_vla_bimanual/train/run_rlbench_experiment.py`
9
+ - added offline RLBench pickle bootstrap in `/workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/dataset.py`
10
+ - added overlap task alias mapping in `/workspace/VLAarchtests/code/reveal_vla_bimanual/sim_rlbench/task_resolver.py`
11
+ - added branch summary script in `/workspace/VLAarchtests/code/reveal_vla_bimanual/eval/summarize_rvt_overlap_branch.py`
12
+ - added branch runner in `/workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh`
13
+
14
+ ## Verification
15
+
16
+ - `python -m py_compile` passed for:
17
+ - `models/rvt_backbone.py`
18
+ - `train/run_rlbench_experiment.py`
19
+ - `sim_rlbench/dataset.py`
20
+ - `sim_rlbench/task_resolver.py`
21
+ - `eval/summarize_rvt_overlap_branch.py`
22
+ - `bash -n /workspace/VLAarchtests/code/reveal_vla_bimanual/scripts/run_rvt_overlap_branch.sh` passed
23
+ - `pytest -q` passed for:
24
+ - `/workspace/VLAarchtests/tests/test_rvt_backbone_forward.py`
25
+ - `/workspace/VLAarchtests/tests/test_rlbench_dataset_rgbd_geometry.py`
26
+ - `/workspace/VLAarchtests/tests/test_eval_toggle_paths_work.py`
27
+ - `/workspace/VLAarchtests/tests/test_rlbench_init_checkpoint.py`
28
+ - `/workspace/VLAarchtests/tests/test_rlbench_pickle_bootstrap.py`
29
+ - `/workspace/VLAarchtests/tests/test_rlbench_task_resolver_aliases.py`
30
+ - `/workspace/VLAarchtests/tests/test_summarize_rvt_overlap_branch.py`
31
+
32
+ ## Stage 1 Train
33
+
34
+ - checkpoint: `/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt`
35
+ - train summary: `/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/summary.json`
36
+ - final train total: `0.043179353826920445`
37
+ - final val total: `0.039591669984665984`
38
+ - train seconds: `2261.2839448451996`
39
+
40
+ ## Stage 1 Overlap Eval
41
+
42
+ - rollout summary: `/workspace/reports/rvt_overlap_branch_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17_noplan_split/rollout_eval.json`
43
+ - `bimanual_push_box`: mean_success=`0.0`, mean_return=`0.0`
44
+ - `bimanual_lift_ball`: mean_success=`0.0`, mean_return=`0.0`
45
+ - `bimanual_dual_push_buttons`: mean_success=`0.0`, mean_return=`0.0`
46
+ - stage1 mean_success=`0.0`
47
+
48
+ ## Gate
49
+
50
+ - local AnyBimanual overlap floor: `0.16`
51
+ - public AnyBimanual overlap best: `0.6933333333333334`
52
+ - stage1 clears local floor: `false`
53
+ - stage2 run: `false`
54
+
55
+ ## Summary Artifact
56
+
57
+ - `/workspace/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json`
58
+ - `/workspace/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.md`
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/rvt_overlap_branch_20260330/summary.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "anybimanual_local_overlap_floor": {
3
+ "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
4
+ "step": 1000,
5
+ "mean_success": 0.16,
6
+ "per_task_success": {
7
+ "coordinated_push_box": 0.0,
8
+ "coordinated_lift_ball": 0.0,
9
+ "dual_push_buttons": 0.48
10
+ }
11
+ },
12
+ "anybimanual_public_best_overlap": {
13
+ "path": "/workspace/VLAarchtests/artifacts/reports/anybimanual_overlap_baseline_20260330/resume1000_summary/summary.json",
14
+ "step": 60000,
15
+ "mean_success": 0.6933333333333334,
16
+ "per_task_success": {
17
+ "coordinated_push_box": 0.8,
18
+ "coordinated_lift_ball": 0.32,
19
+ "dual_push_buttons": 0.96
20
+ }
21
+ },
22
+ "stage1_frozen": {
23
+ "path": "/workspace/reports/rvt_overlap_branch_20260330/evals/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17_noplan_split/rollout_eval.json",
24
+ "checkpoint": "/workspace/outputs/rlbench_rvt_branch/rlbench_subset3_backbone_only_rvt_100demo_frozen_seed17/checkpoint_best.pt",
25
+ "mean_success": 0.0,
26
+ "per_task_success": {
27
+ "bimanual_push_box": 0.0,
28
+ "bimanual_lift_ball": 0.0,
29
+ "bimanual_dual_push_buttons": 0.0
30
+ },
31
+ "per_task_return": {
32
+ "bimanual_push_box": 0.0,
33
+ "bimanual_lift_ball": 0.0,
34
+ "bimanual_dual_push_buttons": 0.0
35
+ }
36
+ },
37
+ "stage2_unfreeze_top2": null,
38
+ "gates": {
39
+ "stage1_clears_local_floor": false,
40
+ "stage2_clears_local_floor": false
41
+ }
42
+ }
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/default/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter3/ignore_proposal_logits_in_shortlist/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/bag_fixed_default/reveal_benchmark.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## selector_finetune_iter8
4
+ - controller: model
5
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
6
+ - episodes: 100.000
7
+ - mean_success: 0.410
8
+ - visibility_integral: 38.913
9
+ - corridor_availability: 0.816
10
+ - reocclusion_rate: 0.020
11
+ - disturbance_cost: 0.515
12
+ - premature_retrieve_rate: 0.109
13
+ - reocclusion_after_reveal_rate: 0.780
14
+ - planner_regret: 0.127
15
+ - bag_success: 0.410
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/cloth_fixed_default/reveal_benchmark.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## selector_finetune_iter8
4
+ - controller: model
5
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
6
+ - episodes: 100.000
7
+ - mean_success: 0.590
8
+ - visibility_integral: 37.920
9
+ - corridor_availability: 0.928
10
+ - reocclusion_rate: 0.000
11
+ - disturbance_cost: 0.206
12
+ - premature_retrieve_rate: 0.113
13
+ - reocclusion_after_reveal_rate: 0.000
14
+ - planner_regret: 0.167
15
+ - cloth_success: 0.590
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.json ADDED
The diff for this file is too large to render. See raw diff
 
code/VLAarchtests2_code/VLAarchtests/artifacts/reports/selector_finetune_v7_iter8/foliage_fixed_default/reveal_benchmark.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## selector_finetune_iter8
4
+ - controller: model
5
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_v7_selector_finetune_iter8_seed17/checkpoint_best.pt
6
+ - episodes: 100.000
7
+ - mean_success: 0.400
8
+ - visibility_integral: 44.134
9
+ - corridor_availability: 0.847
10
+ - reocclusion_rate: 0.034
11
+ - disturbance_cost: 0.302
12
+ - premature_retrieve_rate: 0.110
13
+ - reocclusion_after_reveal_rate: 0.570
14
+ - planner_regret: 0.093
15
+ - foliage_success: 0.400
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .DS_Store
4
+ .mypy_cache/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ .venv/
8
+ artifacts/
9
+ outputs/
10
+ logs/
11
+ wandb/
12
+ reports/
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reveal_vla_bimanual
2
+
3
+ Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
4
+
5
+ This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
6
+
7
+ This repo is structured around five top-level modules:
8
+
9
+ - `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
10
+ - `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
11
+ - `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
12
+ - `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
13
+ - `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
14
+
15
+ Current bootstrap priorities:
16
+
17
+ 1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
18
+ 2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
19
+ 3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
20
+
21
+ Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
22
+
23
+ ## RLBench env A
24
+
25
+ The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
26
+
27
+ Bring it up with:
28
+
29
+ ```bash
30
+ /workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
31
+ /workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
32
+ /workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
33
+ ```
34
+
35
+ Verify GPU GL on the headless display:
36
+
37
+ ```bash
38
+ DISPLAY=:99 glxinfo -B
39
+ ```
40
+
41
+ Run the RLBench launch/reset/step smoke test:
42
+
43
+ ```bash
44
+ env \
45
+ DISPLAY=:99 \
46
+ XDG_RUNTIME_DIR=/tmp/runtime-root \
47
+ COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
48
+ LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
49
+ QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
50
+ /workspace/.tools/micromamba/bin/micromamba run \
51
+ -r /workspace/.micromamba \
52
+ -p /workspace/envs/rlbench \
53
+ python -m sim_rlbench.launch_smoke --headless
54
+ ```
55
+
56
+ The working benchmark interface is fixed to three cameras only:
57
+
58
+ - `front`
59
+ - `wrist_left`
60
+ - `wrist_right`
61
+
62
+ The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
63
+
64
+ Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
65
+
66
+ ```bash
67
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
68
+ python -m sim_rlbench.smoke_test --print-train-command
69
+ ```
70
+
71
+ Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
72
+
73
+ ```bash
74
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
75
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train
76
+ ```
77
+
78
+ If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
79
+
80
+ ```bash
81
+ apt-get install -y squashfs-tools
82
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
83
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
84
+ ```
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapter Stack
2
+
3
+ This repo now contains a no-op-safe `trunk + adapter` path alongside the legacy monolithic policies.
4
+
5
+ ## Main classes
6
+
7
+ - `models/policy.py`
8
+ - `FoundationTrunkPolicy`
9
+ - `ElasticOcclusionAdapter`
10
+ - `AdapterWrappedPolicy`
11
+
12
+ - `models/backbones.py`
13
+ - `NoOpAdapterCompatibleTrunkOutput`
14
+ - `TrunkInterface`
15
+
16
+ - `models/action_decoder.py`
17
+ - `TaskRoutedProposalPrior`
18
+
19
+ - `models/planner.py`
20
+ - `ElasticFeasibilityGate`
21
+ - `ResidualActionReranker`
22
+ - `AdapterPlanner`
23
+
24
+ - `models/world_model.py`
25
+ - `LightweightRevealStateTransitionModel`
26
+
27
+ - `models/observation_memory.py`
28
+ - `RevealStateCache`
29
+
30
+ ## Trainer modes
31
+
32
+ `train/trainer.py` now supports:
33
+
34
+ - `policy_type: adapter_wrapped`
35
+ - `policy_type: foundation_trunk`
36
+
37
+ Relevant trainer fields:
38
+
39
+ - `training_regime`
40
+ - `eval_mode`
41
+ - `adapter_mode`
42
+ - `adapter_use_transition_model`
43
+ - `adapter_use_task_conditioning`
44
+
45
+ ## Guardrail tests
46
+
47
+ New tests:
48
+
49
+ - `tests/test_trunk_noop_equivalence.py`
50
+ - `tests/test_adapter_gate_blocks_unsafe_retrieve.py`
51
+ - `tests/test_task_specific_loss_masking.py`
52
+ - `tests/test_cloth_specific_metrics_affect_selection.py`
53
+ - `tests/test_general_eval_protocol_is_identical.py`
54
+
55
+ ## Config templates
56
+
57
+ - `train/configs/proxy_adapter_wrapped_clip_base.yaml`
58
+ - `train/configs/proxy_adapter_wrapped_clip_rank_only.yaml`
59
+ - `train/configs/proxy_adapter_wrapped_clip_noop_eval.yaml`
60
+
61
+ ## Benchmark wrappers
62
+
63
+ - `scripts/run_anchor_adapter_ablations.sh`
64
+ - `scripts/run_proxy_adapter_ablations.sh`
65
+ - `scripts/run_target_like_adapter_subset.sh`
66
+
67
+ All new configs and scripts default to `~/workspace` outputs and reports.
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/upstream_pins.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Upstream Pins
2
+
3
+ Pinned on 2026-03-22 in `/workspace/third_party`.
4
+
5
+ Mandatory benchmark stack:
6
+
7
+ - `peract_bimanual`: `bb0232a6ba3fe116566e9568f0c7af980ed6703d`
8
+ - `RLBench`: `8af748c51287989294e00c9c670e3330a0e35ed5`
9
+ - `PyRep`: `b8bd1d7a3182adcd570d001649c0849047ebf197`
10
+ - `YARR`: `6822ff78602c77878b27d4cfe759ce029c67bffb`
11
+
12
+ Optional published baseline:
13
+
14
+ - `AnyBimanual`: `76024e48b0e9489101459e85bc909c126ec581b4`
15
+
16
+ Reveal-proxy stack candidate:
17
+
18
+ - `IsaacLab`: `v2.3.1` was cloned for inspection, but it targets Python 3.11 and Isaac Sim 5.x.
19
+ - For the frozen project scope of Python 3.10 on Ubuntu 22.04, env B should stay on an Isaac Sim 4.5-compatible Isaac Lab release instead of the latest branch.
20
+
21
+ Notes:
22
+
23
+ - `peract_bimanual` defaults to 6 cameras and older Python/Torch pins. This repo overrides camera selection and environment creation rather than running the upstream install scripts unchanged.
24
+ - RLBench headless execution on this RunPod host will require an X server setup because the base image does not currently ship `X`, `xvfb`, or `nvidia-xconfig`.
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/xorg.rtx6000.conf ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Section "ServerLayout"
2
+ Identifier "Layout0"
3
+ Screen 0 "Screen0"
4
+ EndSection
5
+
6
+ Section "Monitor"
7
+ Identifier "Monitor0"
8
+ VendorName "Unknown"
9
+ ModelName "Unknown"
10
+ Option "DPMS"
11
+ EndSection
12
+
13
+ Section "Device"
14
+ Identifier "Device0"
15
+ Driver "nvidia"
16
+ VendorName "NVIDIA Corporation"
17
+ BusID "PCI:65:0:0"
18
+ Option "AllowEmptyInitialConfiguration" "True"
19
+ Option "ProbeAllGpus" "False"
20
+ EndSection
21
+
22
+ Section "Screen"
23
+ Identifier "Screen0"
24
+ Device "Device0"
25
+ Monitor "Monitor0"
26
+ DefaultDepth 24
27
+ Option "AllowEmptyInitialConfiguration" "True"
28
+ SubSection "Display"
29
+ Depth 24
30
+ Virtual 1280 1024
31
+ EndSubSection
32
+ EndSection
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/reveal310.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: reveal310
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - python=3.10
8
+ - pip
9
+ - git
10
+ - cmake
11
+ - ninja
12
+ - make
13
+ - gxx_linux-64
14
+ - pkg-config
15
+ - numpy=1.26.*
16
+ - pandas=2.2.*
17
+ - scipy=1.13.*
18
+ - matplotlib=3.8.*
19
+ - pyyaml=6.*
20
+ - imageio
21
+ - trimesh
22
+ - networkx
23
+ - psutil
24
+ - tqdm
25
+ - pytorch=2.3.1
26
+ - torchvision=0.18.1
27
+ - torchaudio=2.3.1
28
+ - pytorch-cuda=12.1
29
+ - pip:
30
+ - accelerate==0.31.0
31
+ - einops==0.8.0
32
+ - hydra-core==1.3.2
33
+ - omegaconf==2.3.0
34
+ - safetensors==0.4.3
35
+ - tensorboard==2.16.2
36
+ - timm==1.0.7
37
+ - transformers==4.41.2
38
+ - wandb==0.18.0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/envs/rlbench310.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: rlbench310
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - python=3.10
8
+ - pip
9
+ - git
10
+ - cmake
11
+ - cffi
12
+ - ninja
13
+ - make
14
+ - gxx_linux-64
15
+ - pkg-config
16
+ - numpy=1.26.*
17
+ - pandas=2.2.*
18
+ - scipy=1.13.*
19
+ - matplotlib=3.8.*
20
+ - pyyaml=6.*
21
+ - h5py
22
+ - imageio
23
+ - pillow
24
+ - psutil
25
+ - tqdm
26
+ - trimesh
27
+ - pytorch=2.3.1
28
+ - torchvision=0.18.1
29
+ - torchaudio=2.3.1
30
+ - pytorch-cuda=12.1
31
+ - pip:
32
+ - accelerate==0.31.0
33
+ - absl-py==2.1.0
34
+ - clip @ git+https://github.com/openai/CLIP.git
35
+ - einops==0.8.0
36
+ - ftfy==6.2.0
37
+ - gym==0.26.2
38
+ - hydra-core==1.3.2
39
+ - natsort==8.4.0
40
+ - omegaconf==2.3.0
41
+ - perceiver-pytorch==0.8.8
42
+ - pyrender==0.1.45
43
+ - pytorch-lamb==1.0.0
44
+ - regex==2024.5.15
45
+ - rich==13.9.4
46
+ - rich-click==1.8.9
47
+ - safetensors==0.4.3
48
+ - tensorboard==2.16.2
49
+ - transformers==4.41.2
50
+ - wandb==0.18.0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from eval.metrics import BenchmarkMetrics
2
+
3
+ __all__ = ["BenchmarkMetrics"]
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/ablations.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ MANDATORY_ABLATIONS: tuple[str, ...] = (
2
+ "no_planner",
3
+ "no_spatial_memory",
4
+ "no_task_head",
5
+ "no_geometry",
6
+ "no_camera_pose",
7
+ )
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compare_rlbench_sweeps.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+
9
+ def _load_summary(path: Path) -> dict[str, Any]:
10
+ payload = json.loads(path.read_text(encoding="utf-8"))
11
+ task_scores = {
12
+ task_name: float(task_payload.get("mean_success", 0.0))
13
+ for task_name, task_payload in payload.get("tasks", {}).items()
14
+ }
15
+ task_returns = {
16
+ task_name: float(task_payload.get("mean_return", 0.0))
17
+ for task_name, task_payload in payload.get("tasks", {}).items()
18
+ }
19
+ task_path_recoveries = {
20
+ task_name: float(sum(task_payload.get("path_recoveries", [])) / max(1, len(task_payload.get("path_recoveries", []))))
21
+ for task_name, task_payload in payload.get("tasks", {}).items()
22
+ }
23
+ task_noop_fallbacks = {
24
+ task_name: float(sum(task_payload.get("noop_fallbacks", [])) / max(1, len(task_payload.get("noop_fallbacks", []))))
25
+ for task_name, task_payload in payload.get("tasks", {}).items()
26
+ }
27
+ return {
28
+ "path": str(path),
29
+ "checkpoint": payload.get("checkpoint"),
30
+ "mean_success": float(payload.get("mean_success", 0.0)),
31
+ "mean_return": float(sum(task_returns.values()) / max(1, len(task_returns))),
32
+ "mean_path_recoveries": float(sum(task_path_recoveries.values()) / max(1, len(task_path_recoveries))),
33
+ "mean_noop_fallbacks": float(sum(task_noop_fallbacks.values()) / max(1, len(task_noop_fallbacks))),
34
+ "plan_requested": bool(payload.get("plan_requested", False)),
35
+ "plan_applied": bool(payload.get("plan_applied", False)),
36
+ "no_planner": bool(payload.get("no_planner", False)),
37
+ "no_geometry": bool(payload.get("no_geometry", False)),
38
+ "disable_task_conditioning": bool(payload.get("disable_task_conditioning", False)),
39
+ "compact_world_model": bool(payload.get("compact_world_model", False)),
40
+ "task_scores": task_scores,
41
+ "task_returns": task_returns,
42
+ "task_path_recoveries": task_path_recoveries,
43
+ "task_noop_fallbacks": task_noop_fallbacks,
44
+ "error_tasks": list(payload.get("error_tasks", [])),
45
+ }
46
+
47
+
48
+ def _pairwise_delta(reference: dict[str, Any], candidate: dict[str, Any]) -> dict[str, Any]:
49
+ shared_tasks = sorted(set(reference["task_scores"]) & set(candidate["task_scores"]))
50
+ if not shared_tasks:
51
+ return {
52
+ "shared_task_count": 0,
53
+ "mean_success_delta": 0.0,
54
+ "mean_return_delta": 0.0,
55
+ "mean_path_recoveries_delta": 0.0,
56
+ "mean_noop_fallbacks_delta": 0.0,
57
+ "per_task_delta": {},
58
+ }
59
+ per_task_delta = {
60
+ task_name: float(candidate["task_scores"][task_name] - reference["task_scores"][task_name])
61
+ for task_name in shared_tasks
62
+ }
63
+ return {
64
+ "shared_task_count": len(shared_tasks),
65
+ "mean_success_delta": float(candidate["mean_success"] - reference["mean_success"]),
66
+ "mean_return_delta": float(candidate["mean_return"] - reference["mean_return"]),
67
+ "mean_path_recoveries_delta": float(candidate["mean_path_recoveries"] - reference["mean_path_recoveries"]),
68
+ "mean_noop_fallbacks_delta": float(candidate["mean_noop_fallbacks"] - reference["mean_noop_fallbacks"]),
69
+ "per_task_delta": per_task_delta,
70
+ }
71
+
72
+
73
+ def _markdown_lines(reference_label: str, comparison: dict[str, Any]) -> list[str]:
74
+ lines = [
75
+ "# RLBench Sweep Comparison",
76
+ "",
77
+ f"- Reference: `{reference_label}`",
78
+ "",
79
+ "## Runs",
80
+ "",
81
+ ]
82
+ for label, payload in comparison["runs"].items():
83
+ lines.append(
84
+ f"- `{label}`: mean_success={payload['mean_success']:.3f}, "
85
+ f"mean_return={payload['mean_return']:.3f}, "
86
+ f"mean_path_recoveries={payload['mean_path_recoveries']:.3f}, "
87
+ f"mean_noop_fallbacks={payload['mean_noop_fallbacks']:.3f}, "
88
+ f"plan_applied={payload['plan_applied']}, "
89
+ f"errors={len(payload['error_tasks'])}, "
90
+ f"path=`{payload['path']}`"
91
+ )
92
+ lines.extend(["", "## Pairwise Deltas", ""])
93
+ for label, payload in comparison["pairwise_against_reference"].items():
94
+ lines.append(
95
+ f"- `{label}`: mean_success_delta={payload['mean_success_delta']:.3f}, "
96
+ f"mean_return_delta={payload['mean_return_delta']:.3f}, "
97
+ f"mean_path_recoveries_delta={payload['mean_path_recoveries_delta']:.3f}, "
98
+ f"mean_noop_fallbacks_delta={payload['mean_noop_fallbacks_delta']:.3f}, "
99
+ f"shared_tasks={payload['shared_task_count']}"
100
+ )
101
+ return lines
102
+
103
+
104
+ def main() -> None:
105
+ parser = argparse.ArgumentParser()
106
+ parser.add_argument("--run", action="append", required=True, help="label=/abs/path/to/rollout_eval.json")
107
+ parser.add_argument("--reference-label", required=True)
108
+ parser.add_argument("--output-dir", required=True)
109
+ args = parser.parse_args()
110
+
111
+ runs: dict[str, dict[str, Any]] = {}
112
+ for item in args.run:
113
+ label, raw_path = item.split("=", 1)
114
+ runs[label] = _load_summary(Path(raw_path).resolve())
115
+
116
+ if args.reference_label not in runs:
117
+ raise ValueError(f"Missing reference label {args.reference_label!r} in provided runs.")
118
+
119
+ reference = runs[args.reference_label]
120
+ comparison = {
121
+ "reference_label": args.reference_label,
122
+ "runs": runs,
123
+ "pairwise_against_reference": {
124
+ label: _pairwise_delta(reference, payload)
125
+ for label, payload in runs.items()
126
+ if label != args.reference_label
127
+ },
128
+ }
129
+
130
+ output_dir = Path(args.output_dir).resolve()
131
+ output_dir.mkdir(parents=True, exist_ok=True)
132
+ (output_dir / "rlbench_comparison.json").write_text(
133
+ json.dumps(comparison, indent=2),
134
+ encoding="utf-8",
135
+ )
136
+ (output_dir / "rlbench_comparison.md").write_text(
137
+ "\n".join(_markdown_lines(args.reference_label, comparison)) + "\n",
138
+ encoding="utf-8",
139
+ )
140
+
141
+
142
+ if __name__ == "__main__":
143
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/compose_task_routed_proxy_summary.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from eval.metrics import summarize_episode_records
9
+
10
+
11
+ def _load_benchmark_payload(path: Path) -> dict[str, Any]:
12
+ return json.loads(path.read_text(encoding="utf-8"))
13
+
14
+
15
+ def _infer_label(payload: dict[str, Any]) -> str:
16
+ labels = [key for key in payload.keys() if key != "benchmark_config"]
17
+ if len(labels) != 1:
18
+ raise ValueError(f"Expected exactly one model label in benchmark JSON, found {labels}.")
19
+ return labels[0]
20
+
21
+
22
+ def _load_records(path: Path, label: str | None) -> tuple[str, list[dict[str, Any]]]:
23
+ payload = _load_benchmark_payload(path)
24
+ resolved_label = label or _infer_label(payload)
25
+ if resolved_label not in payload:
26
+ raise KeyError(f"Missing label {resolved_label!r} in benchmark JSON {path}.")
27
+ return resolved_label, list(payload[resolved_label].get("episode_records", []))
28
+
29
+
30
+ def _mean_metrics(summary: dict[str, Any]) -> dict[str, float]:
31
+ return {
32
+ "visibility_integral": float(summary.get("visibility_integral", 0.0)),
33
+ "corridor_availability": float(summary.get("corridor_availability", 0.0)),
34
+ "reocclusion_rate": float(summary.get("reocclusion_rate", 0.0)),
35
+ "disturbance_cost": float(summary.get("disturbance_cost", 0.0)),
36
+ "premature_retrieve_rate": float(summary.get("premature_retrieve_rate", 0.0)),
37
+ "reocclusion_after_reveal_rate": float(summary.get("reocclusion_after_reveal_rate", 0.0)),
38
+ "planner_regret": float(summary.get("planner_regret", 0.0)),
39
+ }
40
+
41
+
42
+ def main() -> None:
43
+ parser = argparse.ArgumentParser()
44
+ parser.add_argument("--source", action="append", required=True, help="task_name=/abs/path/to/reveal_benchmark.json")
45
+ parser.add_argument("--label", action="append", default=[], help="task_name=model_label within the benchmark JSON")
46
+ parser.add_argument("--output-dir", required=True)
47
+ args = parser.parse_args()
48
+
49
+ labels_by_task: dict[str, str] = {}
50
+ for item in args.label:
51
+ task_name, label = item.split("=", maxsplit=1)
52
+ labels_by_task[task_name] = label
53
+
54
+ routing_policy: dict[str, str] = {}
55
+ sources: dict[str, str] = {}
56
+ combined_records: list[dict[str, Any]] = []
57
+
58
+ for item in args.source:
59
+ task_name, raw_path = item.split("=", maxsplit=1)
60
+ path = Path(raw_path).resolve()
61
+ label, records = _load_records(path, labels_by_task.get(task_name))
62
+ task_records = [record for record in records if str(record.get("task_name")) == task_name]
63
+ routing_policy[task_name] = label
64
+ sources[task_name] = str(path)
65
+ combined_records.extend(task_records)
66
+
67
+ summary = summarize_episode_records(combined_records)
68
+ per_task_success = summary.get("per_task_success", {})
69
+ payload = {
70
+ "controller": "task_routed_checkpoint_selection",
71
+ "routing_policy": routing_policy,
72
+ "per_task_success": per_task_success,
73
+ "mean_success": float(sum(per_task_success.values()) / max(1, len(per_task_success))),
74
+ "mean_metrics": _mean_metrics(summary),
75
+ "sources": sources,
76
+ }
77
+
78
+ output_dir = Path(args.output_dir).resolve()
79
+ output_dir.mkdir(parents=True, exist_ok=True)
80
+ (output_dir / "summary.json").write_text(json.dumps(payload, indent=2), encoding="utf-8")
81
+ lines = [
82
+ "# Task-Routed Proxy Controller",
83
+ "",
84
+ "- routing rule: "
85
+ + ", ".join(f"`{task} -> {label}`" for task, label in routing_policy.items()),
86
+ f"- mean success: `{payload['mean_success']:.4f}`",
87
+ "",
88
+ "## Per-Task Success",
89
+ "",
90
+ ]
91
+ for task_name, score in per_task_success.items():
92
+ lines.append(f"- {task_name}: `{score:.2f}`")
93
+ lines.extend(["", "## Sources", ""])
94
+ for task_name, source in sources.items():
95
+ lines.append(f"- {task_name}: `{source}`")
96
+ (output_dir / "summary.md").write_text("\n".join(lines) + "\n", encoding="utf-8")
97
+
98
+
99
+ if __name__ == "__main__":
100
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/protocols.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Sequence
4
+
5
+
6
+ def build_eval_protocol(
7
+ *,
8
+ eval_mode: str,
9
+ task_subset: Sequence[str],
10
+ seed: int = 17,
11
+ episodes: int = 25,
12
+ episode_length: int = 120,
13
+ resolution: int = 256,
14
+ cameras: Sequence[str] = ("front", "left_wrist", "right_wrist"),
15
+ ) -> dict[str, object]:
16
+ return {
17
+ "eval_mode": str(eval_mode),
18
+ "task_subset": tuple(str(task) for task in task_subset),
19
+ "seed": int(seed),
20
+ "episodes": int(episodes),
21
+ "episode_length": int(episode_length),
22
+ "resolution": int(resolution),
23
+ "cameras": tuple(str(camera) for camera in cameras),
24
+ "observation_stack": "rgbd_3cam",
25
+ "action_horizon": 8,
26
+ "action_space": "bimanual_delta_pose",
27
+ }
28
+
29
+
30
+ def protocol_identity_signature(protocol: dict[str, object]) -> tuple[object, ...]:
31
+ return (
32
+ protocol["task_subset"],
33
+ protocol["seed"],
34
+ protocol["episodes"],
35
+ protocol["episode_length"],
36
+ protocol["resolution"],
37
+ protocol["cameras"],
38
+ protocol["observation_stack"],
39
+ protocol["action_horizon"],
40
+ protocol["action_space"],
41
+ )
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_anybimanual_anchor_eval.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import csv
5
+ import json
6
+ import os
7
+ import re
8
+ import shutil
9
+ import subprocess
10
+ import sys
11
+ from pathlib import Path
12
+
13
+ from models.action_decoder import infer_task_name_from_text
14
+
15
+
16
+ def _infer_task_families(tasks: list[str], task_name: str) -> list[str]:
17
+ families = [infer_task_name_from_text(task_name)]
18
+ families.extend(infer_task_name_from_text(task) for task in tasks)
19
+ return sorted(set(families))
20
+
21
+
22
+ def _parse_episode_scores(stdout: str) -> list[float]:
23
+ scores: list[float] = []
24
+ pattern = re.compile(r"Episode\s+\d+\s+\|\s+Score:\s*([0-9]+(?:\.[0-9]+)?)")
25
+ for match in pattern.finditer(stdout):
26
+ scores.append(float(match.group(1)))
27
+ return scores
28
+
29
+
30
+ def _parse_final_score(stdout: str) -> float | None:
31
+ match = re.search(r"Final Score:\s*([0-9]+(?:\.[0-9]+)?)", stdout)
32
+ if match is None:
33
+ return None
34
+ return float(match.group(1))
35
+
36
+
37
+ def _parse_eval_data_score(eval_csv_path: Path) -> float | None:
38
+ if not eval_csv_path.exists():
39
+ return None
40
+ with eval_csv_path.open("r", encoding="utf-8") as handle:
41
+ reader = csv.DictReader(handle)
42
+ last_row: dict[str, str] | None = None
43
+ for row in reader:
44
+ last_row = row
45
+ if last_row is None:
46
+ return None
47
+ value = last_row.get("eval_envs/return")
48
+ if value is None or value == "":
49
+ return None
50
+ return float(value)
51
+
52
+
53
+ def _bootstrap_eval_seed_dir(
54
+ *,
55
+ framework_logdir: Path,
56
+ task_name: str,
57
+ method: str,
58
+ ) -> None:
59
+ seed_dir = framework_logdir / task_name / method / "seed0"
60
+ if (seed_dir / "config.yaml").exists():
61
+ return
62
+ release_seed_dir = Path("/workspace/baselines/AnyBimanual_release_eval_live") / task_name / method / "seed0"
63
+ if not (release_seed_dir / "config.yaml").exists():
64
+ return
65
+ seed_dir.mkdir(parents=True, exist_ok=True)
66
+ shutil.copy2(release_seed_dir / "config.yaml", seed_dir / "config.yaml")
67
+ release_weights_dir = release_seed_dir / "weights"
68
+ if release_weights_dir.exists() and not (seed_dir / "weights").exists():
69
+ os.symlink(release_weights_dir, seed_dir / "weights", target_is_directory=True)
70
+
71
+
72
+ def main() -> None:
73
+ parser = argparse.ArgumentParser()
74
+ parser.add_argument("--task-name", required=True, help="Existing AnyBimanual logdir task name, e.g. perlf_release_dual_push_buttons_smoke1")
75
+ parser.add_argument("--tasks", nargs="+", required=True, help="RLBench task module names, e.g. dual_push_buttons")
76
+ parser.add_argument("--adapter-mode", choices=("trunk_only", "adapter_noop", "adapter_active"), default="trunk_only")
77
+ parser.add_argument("--episodes", type=int, default=1)
78
+ parser.add_argument("--eval-type", default="60000")
79
+ parser.add_argument("--output-dir", required=True)
80
+ parser.add_argument("--framework-logdir", default=None)
81
+ parser.add_argument("--demo-path", default="/workspace/baselines/AnyBimanual_subset3_demo_root")
82
+ parser.add_argument("--method", default="PERACT_BC")
83
+ parser.add_argument("--gpu", type=int, default=0)
84
+ args = parser.parse_args()
85
+
86
+ output_dir = Path(args.output_dir)
87
+ output_dir.mkdir(parents=True, exist_ok=True)
88
+ framework_logdir = Path(args.framework_logdir) if args.framework_logdir else (output_dir / "anybimanual_logdir")
89
+ framework_logdir.mkdir(parents=True, exist_ok=True)
90
+ _bootstrap_eval_seed_dir(
91
+ framework_logdir=framework_logdir,
92
+ task_name=args.task_name,
93
+ method=args.method,
94
+ )
95
+
96
+ task_families = _infer_task_families(list(args.tasks), args.task_name)
97
+ passthrough_only = all(family == "generic" for family in task_families)
98
+ if args.adapter_mode == "adapter_active" and not passthrough_only:
99
+ raise NotImplementedError(
100
+ "The AnyBimanual anchor bridge only supports generic-task pass-through active mode for now. "
101
+ f"Resolved task families: {task_families}"
102
+ )
103
+
104
+ command = [
105
+ sys.executable,
106
+ "/workspace/third_party/AnyBimanual/eval.py",
107
+ f"method={args.method}",
108
+ f"framework.logdir={framework_logdir}",
109
+ "framework.start_seed=0",
110
+ f"framework.eval_type={args.eval_type}",
111
+ f"framework.eval_episodes={args.episodes}",
112
+ "framework.eval_envs=1",
113
+ f"framework.gpu={args.gpu}",
114
+ f"rlbench.task_name={args.task_name}",
115
+ f"rlbench.tasks=[{','.join(args.tasks)}]",
116
+ f"rlbench.demo_path={args.demo_path}",
117
+ "rlbench.headless=True",
118
+ "rlbench.gripper_mode=BimanualDiscrete",
119
+ "rlbench.arm_action_mode=BimanualEndEffectorPoseViaPlanning",
120
+ "rlbench.action_mode=BimanualMoveArmThenGripper",
121
+ ]
122
+ env = os.environ.copy()
123
+ env.setdefault("DISPLAY", ":99")
124
+ env.setdefault("XDG_RUNTIME_DIR", "/workspace/runtime")
125
+ env.setdefault("COPPELIASIM_ROOT", "/workspace/assets/coppeliasim_v4_1_0")
126
+ env.setdefault("QT_QPA_PLATFORM_PLUGIN_PATH", env["COPPELIASIM_ROOT"])
127
+ env["LD_LIBRARY_PATH"] = f"{env['COPPELIASIM_ROOT']}:{env.get('LD_LIBRARY_PATH', '')}".rstrip(":")
128
+ pythonpath_items = [
129
+ "/workspace/third_party/RLBench",
130
+ "/workspace/third_party/YARR",
131
+ "/workspace/third_party/AnyBimanual",
132
+ "/workspace/reveal_vla_bimanual",
133
+ ]
134
+ existing_pythonpath = env.get("PYTHONPATH", "")
135
+ env["PYTHONPATH"] = ":".join(pythonpath_items + ([existing_pythonpath] if existing_pythonpath else []))
136
+
137
+ stdout_path = output_dir / "stdout.txt"
138
+ stderr_path = output_dir / "stderr.txt"
139
+ with stdout_path.open("w", encoding="utf-8") as stdout_handle, stderr_path.open("w", encoding="utf-8") as stderr_handle:
140
+ completed = subprocess.run(
141
+ command,
142
+ env=env,
143
+ text=True,
144
+ stdout=stdout_handle,
145
+ stderr=stderr_handle,
146
+ close_fds=True,
147
+ check=False,
148
+ )
149
+ stdout = stdout_path.read_text(encoding="utf-8")
150
+ stderr = stderr_path.read_text(encoding="utf-8")
151
+ scores = _parse_episode_scores(stdout)
152
+ eval_csv_path = framework_logdir / args.task_name / args.method / "seed0" / "eval_data.csv"
153
+ final_score = _parse_eval_data_score(eval_csv_path)
154
+ if final_score is None:
155
+ final_score = _parse_final_score(stdout)
156
+ payload = {
157
+ "adapter_mode": args.adapter_mode,
158
+ "task_name": args.task_name,
159
+ "tasks": list(args.tasks),
160
+ "task_families": task_families,
161
+ "passthrough_only": passthrough_only,
162
+ "passthrough_reason": "generic_task_family" if passthrough_only else "unsupported_active_family",
163
+ "episodes_requested": int(args.episodes),
164
+ "episode_scores": scores,
165
+ "mean_score": final_score if final_score is not None else ((sum(scores) / float(len(scores))) if scores else 0.0),
166
+ "final_score": final_score,
167
+ "subprocess_returncode": int(completed.returncode),
168
+ "eval_csv_path": str(eval_csv_path),
169
+ "command": command,
170
+ }
171
+ (output_dir / "command.txt").write_text(" ".join(command) + "\n", encoding="utf-8")
172
+ (output_dir / "summary.json").write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
173
+ print(json.dumps(payload, indent=2))
174
+ if completed.returncode != 0:
175
+ raise SystemExit(completed.returncode)
176
+
177
+
178
+ if __name__ == "__main__":
179
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/pyproject.toml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "reveal-vla-bimanual"
7
+ version = "0.1.0"
8
+ description = "Language-conditioned bimanual reveal-and-retrieve policy prototype"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10,<3.11"
11
+ dependencies = [
12
+ "accelerate>=0.31.0",
13
+ "einops>=0.7.0",
14
+ "hydra-core>=1.3.2",
15
+ "matplotlib>=3.8.0",
16
+ "numpy>=1.26,<2.0",
17
+ "omegaconf>=2.3.0",
18
+ "pandas>=2.2.0",
19
+ "pyyaml>=6.0.1",
20
+ "safetensors>=0.4.3",
21
+ "tensorboard>=2.16.2",
22
+ "timm>=1.0.7",
23
+ "torch>=2.3.0",
24
+ "torchvision>=0.18.0",
25
+ "transformers>=4.41.0",
26
+ ]
27
+
28
+ [tool.setuptools]
29
+ include-package-data = true
30
+
31
+ [tool.setuptools.packages.find]
32
+ include = ["sim_rlbench*", "sim_reveal*", "models*", "train*", "eval*", "pytorch3d*"]
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sim_reveal.base import RevealProxyConfig, RevealState, SupportMode
2
+ from sim_reveal.procedural_envs import ProceduralRevealEnv, available_proxy_names, make_proxy_env
3
+ from sim_reveal.proxy_specs import BAG_PROXY, CLOTH_PROXY, FOLIAGE_PROXY
4
+
5
+ __all__ = [
6
+ "BAG_PROXY",
7
+ "CLOTH_PROXY",
8
+ "FOLIAGE_PROXY",
9
+ "ProceduralRevealEnv",
10
+ "RevealProxyConfig",
11
+ "RevealState",
12
+ "SupportMode",
13
+ "available_proxy_names",
14
+ "make_proxy_env",
15
+ ]
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/base.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass, field
4
+ from enum import IntEnum
5
+
6
+ import numpy as np
7
+
8
+
9
+ class SupportMode(IntEnum):
10
+ HOLD = 0
11
+ TRANSFER = 1
12
+ PASSIVE = 2
13
+
14
+
15
+ @dataclass
16
+ class RevealState:
17
+ support_mode_logits: np.ndarray
18
+ corridor_logits: np.ndarray
19
+ persistence_horizon: np.ndarray
20
+ disturbance_cost: np.ndarray
21
+ belief_map: np.ndarray | None = None
22
+
23
+
24
+ @dataclass
25
+ class RevealProxyConfig:
26
+ name: str
27
+ num_templates: int = 32
28
+ rollout_horizon: int = 5
29
+ max_steps: int = 80
30
+ disturbance_key: str = "disturbance_cost"
31
+ success_key: str = "retrieval_success"
32
+ metadata: dict[str, str] = field(default_factory=dict)
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/build_task_specialized_episode_specs.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from sim_reveal.proxy_specs import TASK_ID_BY_NAME, TASK_NAME_BY_PROXY_NAME
8
+
9
+
10
+ def _proxy_name_for_task(task_name: str) -> str:
11
+ for proxy_name, mapped_task_name in TASK_NAME_BY_PROXY_NAME.items():
12
+ if mapped_task_name == task_name:
13
+ return proxy_name
14
+ raise KeyError(f"Unknown task name: {task_name}")
15
+
16
+
17
+ def main() -> None:
18
+ parser = argparse.ArgumentParser()
19
+ parser.add_argument("--task-name", required=True)
20
+ parser.add_argument("--base-seed", type=int, default=0)
21
+ parser.add_argument(
22
+ "--block",
23
+ action="append",
24
+ required=True,
25
+ help="stress_slice,difficulty_bin,count",
26
+ )
27
+ parser.add_argument("--output-path", required=True)
28
+ args = parser.parse_args()
29
+
30
+ task_name = str(args.task_name)
31
+ if task_name not in TASK_ID_BY_NAME:
32
+ raise KeyError(f"Unknown task name: {task_name}")
33
+ proxy_name = _proxy_name_for_task(task_name)
34
+ task_id = TASK_ID_BY_NAME[task_name]
35
+
36
+ specs: list[dict[str, object]] = []
37
+ episode_index = 0
38
+ for block_index, raw_block in enumerate(args.block):
39
+ stress_slice, difficulty_bin, raw_count = [part.strip() for part in raw_block.split(",")]
40
+ count = int(raw_count)
41
+ for sample_index in range(count):
42
+ specs.append(
43
+ {
44
+ "proxy_name": proxy_name,
45
+ "task_name": task_name,
46
+ "task_id": int(task_id),
47
+ "stress_slice": stress_slice,
48
+ "difficulty_bin": difficulty_bin,
49
+ "episode_id": episode_index,
50
+ "episode_index": episode_index,
51
+ "seed": int(args.base_seed) + block_index * 10_000 + sample_index,
52
+ }
53
+ )
54
+ episode_index += 1
55
+
56
+ output_path = Path(args.output_path).resolve()
57
+ output_path.parent.mkdir(parents=True, exist_ok=True)
58
+ output_path.write_text(json.dumps(specs, indent=2), encoding="utf-8")
59
+ print(
60
+ json.dumps(
61
+ {
62
+ "output_path": str(output_path),
63
+ "task_name": task_name,
64
+ "episodes": len(specs),
65
+ "blocks": args.block,
66
+ },
67
+ indent=2,
68
+ )
69
+ )
70
+
71
+
72
+ if __name__ == "__main__":
73
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/dataset.py ADDED
@@ -0,0 +1,634 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import pickle
4
+ from pathlib import Path
5
+ from typing import Any, Callable, Sequence
6
+
7
+ import torch
8
+ from torch import Tensor
9
+ from torch.utils.data import Dataset
10
+
11
+ import numpy as np
12
+
13
+ from sim_reveal.procedural_envs import available_proxy_names, default_camera_matrices, make_proxy_env, render_views_from_state
14
+ from sim_reveal.proxy_specs import task_id_from_task_name, task_name_from_proxy_name
15
+
16
+ NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v5_noleak_actionhist"
17
+ RGBD_PROXY_DATASET_VERSION = "reveal_proxy_v6_rgbd_elastic_state"
18
+ LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
19
+ {
20
+ "target_template",
21
+ "support_mode",
22
+ "visibility",
23
+ "actor_template",
24
+ "actor_progress",
25
+ "corridor_current",
26
+ }
27
+ )
28
+
29
+
30
+ def dataset_uses_rgbd(dataset_version: Any) -> bool:
31
+ version = str(dataset_version or "")
32
+ return version.startswith(RGBD_PROXY_DATASET_VERSION)
33
+
34
+
35
+ def _assert_noleak_sample(sample: dict[str, Any]) -> None:
36
+ render_state = sample.get("render_state", {})
37
+ leaked_keys = sorted(LEGACY_PRIVILEGED_RENDER_KEYS.intersection(render_state))
38
+ if leaked_keys:
39
+ joined = ", ".join(leaked_keys)
40
+ raise ValueError(
41
+ "Legacy leaked proxy sample detected. Rebuild the dataset with the current "
42
+ f"sim_reveal/procedural_envs.py. Privileged render keys found: {joined}"
43
+ )
44
+
45
+
46
+ def collect_teacher_dataset(
47
+ proxy_names: Sequence[str] | None = None,
48
+ episodes_per_proxy: int = 32,
49
+ resolution: int = 96,
50
+ seed: int = 0,
51
+ chunk_horizon: int = 8,
52
+ rollout_horizon: int = 5,
53
+ history_steps: int = 2,
54
+ planner_candidates: int = 4,
55
+ dataset_version: str = NOLEAK_PROXY_DATASET_VERSION,
56
+ episode_specs: Sequence[dict[str, Any]] | None = None,
57
+ proposal_target_builder: Callable[[Any, dict[str, Any], dict[str, Any]], dict[str, Any] | None] | None = None,
58
+ ) -> dict[str, Any]:
59
+ proxy_names = tuple(proxy_names or available_proxy_names())
60
+ samples: list[dict[str, Any]] = []
61
+ summary: dict[str, dict[str, float]] = {}
62
+ if episode_specs is None:
63
+ resolved_episode_specs = []
64
+ for proxy_offset, proxy_name in enumerate(proxy_names):
65
+ for episode_idx in range(episodes_per_proxy):
66
+ resolved_episode_specs.append(
67
+ {
68
+ "proxy_name": proxy_name,
69
+ "episode_id": episode_idx,
70
+ "seed": seed + proxy_offset * 10_000 + episode_idx,
71
+ "stress_slice": "nominal",
72
+ "difficulty_bin": "medium",
73
+ }
74
+ )
75
+ else:
76
+ resolved_episode_specs = list(episode_specs)
77
+
78
+ specs_by_proxy: dict[str, list[dict[str, Any]]] = {proxy_name: [] for proxy_name in proxy_names}
79
+ for spec in resolved_episode_specs:
80
+ proxy_name = str(spec["proxy_name"])
81
+ specs_by_proxy.setdefault(proxy_name, []).append(spec)
82
+
83
+ for proxy_name in proxy_names:
84
+ proxy_specs = specs_by_proxy.get(proxy_name, [])
85
+ proxy_samples = 0
86
+ proxy_success = 0
87
+ for episode_idx, episode_spec in enumerate(proxy_specs):
88
+ episode_seed = int(episode_spec.get("seed", seed + episode_idx))
89
+ env = make_proxy_env(
90
+ proxy_name=proxy_name,
91
+ resolution=resolution,
92
+ seed=episode_seed,
93
+ rollout_horizon=rollout_horizon,
94
+ stress_slice=str(episode_spec.get("stress_slice", "nominal")),
95
+ difficulty_bin=str(episode_spec.get("difficulty_bin", "medium")),
96
+ )
97
+ observation, privileged_state = env.reset(seed=episode_seed)
98
+ history_buffer: list[dict[str, Any]] = []
99
+ while True:
100
+ action_chunk, rollout = env.teacher_chunk_and_rollout(
101
+ chunk_horizon=chunk_horizon,
102
+ rollout_horizon=rollout_horizon,
103
+ )
104
+ observation = env.get_observation(privileged_state)
105
+ candidate_action_chunks, candidate_outcomes = env.sample_candidate_action_chunks(
106
+ teacher_chunk=action_chunk,
107
+ num_candidates=planner_candidates,
108
+ rollout_horizon=rollout_horizon,
109
+ )
110
+ padded_history_render_states = []
111
+ padded_history_proprio = []
112
+ padded_history_actions = []
113
+ padded_history_camera_intrinsics = []
114
+ padded_history_camera_extrinsics = []
115
+ padded_history_camera_valid_mask = []
116
+ history_count = min(history_steps, len(history_buffer))
117
+ pad_count = history_steps - history_count
118
+ if history_count > 0:
119
+ recent_history = history_buffer[-history_count:]
120
+ else:
121
+ recent_history = []
122
+ for _ in range(pad_count):
123
+ padded_history_render_states.append(env.render_state(privileged_state))
124
+ padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
125
+ padded_history_actions.append(np.zeros((action_chunk.shape[-1],), dtype=np.float32))
126
+ padded_history_camera_intrinsics.append(np.zeros((3, 3, 3), dtype=np.float32))
127
+ padded_history_camera_extrinsics.append(np.zeros((3, 4, 4), dtype=np.float32))
128
+ padded_history_camera_valid_mask.append(np.zeros((3,), dtype=np.float32))
129
+ for item in recent_history:
130
+ padded_history_render_states.append(item["render_state"])
131
+ padded_history_proprio.append(item["proprio"])
132
+ padded_history_actions.append(item["action"])
133
+ padded_history_camera_intrinsics.append(item["camera_intrinsics"])
134
+ padded_history_camera_extrinsics.append(item["camera_extrinsics"])
135
+ padded_history_camera_valid_mask.append(item["camera_valid_mask"])
136
+ task_name = str(observation.get("task_name", task_name_from_proxy_name(proxy_name)))
137
+ task_id = int(observation.get("task_id", task_id_from_task_name(task_name)))
138
+ sample = {
139
+ "dataset_version": dataset_version,
140
+ "proxy_name": proxy_name,
141
+ "episode_id": int(episode_spec.get("episode_id", episode_idx)),
142
+ "episode_seed": episode_seed,
143
+ "task_name": task_name,
144
+ "task_id": task_id,
145
+ "stress_slice": str(observation.get("stress_slice", episode_spec.get("stress_slice", "nominal"))),
146
+ "difficulty_bin": str(observation.get("difficulty_bin", episode_spec.get("difficulty_bin", "medium"))),
147
+ "episode_metadata": dict(observation.get("episode_metadata", {})),
148
+ "render_state": env.render_state(privileged_state),
149
+ "camera_intrinsics": observation.get("camera_intrinsics", default_camera_matrices()[0]).astype("float32"),
150
+ "camera_extrinsics": observation.get("camera_extrinsics", default_camera_matrices()[1]).astype("float32"),
151
+ "camera_valid_mask": observation.get("camera_valid_mask", np.ones((3,), dtype=np.float32)).astype("float32"),
152
+ "proprio": observation["proprio"].astype("float32"),
153
+ "language_goal": observation["text"],
154
+ "action_chunk": action_chunk.astype("float32"),
155
+ "support_mode": int(privileged_state["support_mode"]),
156
+ "phase": int(privileged_state.get("phase_label", 0)),
157
+ "subgoal_progress": float(privileged_state.get("subgoal_progress", 0.0)),
158
+ "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
159
+ "persistence_horizon": privileged_state["persistence_horizon"].astype("float32"),
160
+ "disturbance_cost": float(privileged_state["disturbance_cost"]),
161
+ "belief_map": privileged_state["belief_map"].astype("float32"),
162
+ "visibility_map": privileged_state["visibility_map"].astype("float32"),
163
+ "clearance_map": privileged_state["clearance_map"].astype("float32"),
164
+ "occluder_contact_map": privileged_state["occluder_contact_map"].astype("float32"),
165
+ "grasp_affordance_map": privileged_state["grasp_affordance_map"].astype("float32"),
166
+ "support_stability": float(privileged_state["support_stability"]),
167
+ "support_stability_map": privileged_state["support_stability_map"].astype("float32"),
168
+ "reocclusion_target": float(privileged_state["reocclusion_target"]),
169
+ "reocclusion_map": privileged_state["reocclusion_map"].astype("float32"),
170
+ "gap_width": float(privileged_state.get("gap_width", 0.0)),
171
+ "damage_proxy": float(privileged_state.get("damage_proxy", 0.0)),
172
+ "release_collapse_rate": float(privileged_state.get("release_collapse_rate", 0.0)),
173
+ "target_visibility_confidence": float(privileged_state.get("target_visibility_confidence", 0.0)),
174
+ "mouth_aperture": float(privileged_state.get("mouth_aperture", 0.0)),
175
+ "hold_quality": float(privileged_state.get("hold_quality", 0.0)),
176
+ "rim_slip_risk": float(privileged_state.get("rim_slip_risk", 0.0)),
177
+ "insertable_actor_corridor": float(privileged_state.get("insertable_actor_corridor", 0.0)),
178
+ "layer_separation_quality": float(privileged_state.get("layer_separation_quality", 0.0)),
179
+ "fold_preservation": float(privileged_state.get("fold_preservation", 0.0)),
180
+ "insertion_corridor": float(privileged_state.get("insertion_corridor", 0.0)),
181
+ "top_layer_stability": float(privileged_state.get("top_layer_stability", 0.0)),
182
+ "lift_too_much_risk": float(privileged_state.get("lift_too_much_risk", 0.0)),
183
+ "rollout_support_mode": rollout["rollout_support_mode"].astype("int64"),
184
+ "rollout_phase": rollout.get("rollout_phase", np.zeros((rollout["rollout_support_mode"].shape[0],), dtype=np.int64)).astype("int64"),
185
+ "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
186
+ "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
187
+ "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
188
+ "rollout_belief_map": rollout["rollout_belief_map"].astype("float32"),
189
+ "rollout_visibility_map": rollout["rollout_visibility_map"].astype("float32"),
190
+ "rollout_clearance_map": rollout["rollout_clearance_map"].astype("float32"),
191
+ "rollout_support_stability": rollout["rollout_support_stability"].astype("float32"),
192
+ "rollout_reocclusion_target": rollout["rollout_reocclusion_target"].astype("float32"),
193
+ "rollout_occluder_contact_map": rollout["rollout_occluder_contact_map"].astype("float32"),
194
+ "rollout_grasp_affordance_map": rollout["rollout_grasp_affordance_map"].astype("float32"),
195
+ "history_render_states": padded_history_render_states,
196
+ "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
197
+ if padded_history_proprio
198
+ else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
199
+ "history_actions": np.stack(padded_history_actions, axis=0).astype("float32")
200
+ if padded_history_actions
201
+ else np.zeros((0, action_chunk.shape[-1]), dtype=np.float32),
202
+ "history_camera_intrinsics": np.stack(padded_history_camera_intrinsics, axis=0).astype("float32")
203
+ if padded_history_camera_intrinsics
204
+ else np.zeros((0, 3, 3, 3), dtype=np.float32),
205
+ "history_camera_extrinsics": np.stack(padded_history_camera_extrinsics, axis=0).astype("float32")
206
+ if padded_history_camera_extrinsics
207
+ else np.zeros((0, 3, 4, 4), dtype=np.float32),
208
+ "history_camera_valid_mask": np.stack(padded_history_camera_valid_mask, axis=0).astype("float32")
209
+ if padded_history_camera_valid_mask
210
+ else np.zeros((0, 3), dtype=np.float32),
211
+ "candidate_action_chunks": candidate_action_chunks.astype("float32"),
212
+ **candidate_outcomes,
213
+ }
214
+ if proposal_target_builder is not None:
215
+ extra_fields = proposal_target_builder(env, observation, sample)
216
+ if extra_fields:
217
+ sample.update(extra_fields)
218
+ samples.append(sample)
219
+ proxy_samples += 1
220
+ executed_action = env.teacher_action().astype("float32")
221
+ _, _, terminated, truncated, privileged_state = env.step(executed_action)
222
+ history_buffer.append(
223
+ {
224
+ "render_state": env.render_state(privileged_state),
225
+ "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
226
+ "action": executed_action,
227
+ "camera_intrinsics": env.get_observation(privileged_state).get("camera_intrinsics", default_camera_matrices()[0]).astype("float32"),
228
+ "camera_extrinsics": env.get_observation(privileged_state).get("camera_extrinsics", default_camera_matrices()[1]).astype("float32"),
229
+ "camera_valid_mask": env.get_observation(privileged_state).get("camera_valid_mask", np.ones((3,), dtype=np.float32)).astype("float32"),
230
+ }
231
+ )
232
+ if terminated:
233
+ proxy_success += 1
234
+ if terminated or truncated:
235
+ break
236
+ summary[proxy_name] = {
237
+ "episodes": float(len(proxy_specs)),
238
+ "samples": float(proxy_samples),
239
+ "teacher_success": proxy_success / float(max(1, len(proxy_specs))),
240
+ }
241
+ return {
242
+ "dataset_version": dataset_version,
243
+ "resolution": resolution,
244
+ "chunk_horizon": chunk_horizon,
245
+ "rollout_horizon": rollout_horizon,
246
+ "history_steps": history_steps,
247
+ "planner_candidates": planner_candidates,
248
+ "episode_specs": resolved_episode_specs,
249
+ "samples": samples,
250
+ "summary": summary,
251
+ }
252
+
253
+
254
+ def save_teacher_dataset(output_path: str | Path, dataset_bundle: dict[str, Any]) -> Path:
255
+ output_path = Path(output_path)
256
+ output_path.parent.mkdir(parents=True, exist_ok=True)
257
+ torch.save(dataset_bundle, output_path)
258
+ return output_path
259
+
260
+
261
+ def load_teacher_dataset(dataset_path: str | Path) -> dict[str, Any]:
262
+ return torch.load(Path(dataset_path), map_location="cpu", weights_only=False)
263
+
264
+
265
+ class RevealOfflineDataset(Dataset[dict[str, Any]]):
266
+ def __init__(self, samples: Sequence[dict[str, Any]], resolution: int = 96) -> None:
267
+ self.samples = list(samples)
268
+ self.resolution = resolution
269
+ self._render_cache: dict[bytes, dict[str, np.ndarray]] = {}
270
+ self._item_cache: dict[int, dict[str, Any]] = {}
271
+
272
+ def __len__(self) -> int:
273
+ return len(self.samples)
274
+
275
+ def _render_cache_key(self, sample: dict[str, Any], render_state: dict[str, Any]) -> bytes:
276
+ include_depth = dataset_uses_rgbd(sample.get("dataset_version"))
277
+ return pickle.dumps(
278
+ (sample["proxy_name"], self.resolution, include_depth, render_state),
279
+ protocol=4,
280
+ )
281
+
282
+ def _render_sample(self, sample: dict[str, Any], render_state: dict[str, Any]) -> dict[str, np.ndarray]:
283
+ cache_key = self._render_cache_key(sample, render_state)
284
+ cached = self._render_cache.get(cache_key)
285
+ if cached is not None:
286
+ return cached
287
+ include_depth = dataset_uses_rgbd(sample.get("dataset_version"))
288
+ rendered = render_views_from_state(
289
+ proxy_name=sample["proxy_name"],
290
+ render_state=render_state,
291
+ resolution=self.resolution,
292
+ include_depth=include_depth,
293
+ )
294
+ self._render_cache[cache_key] = rendered
295
+ return rendered
296
+
297
+ def __getitem__(self, index: int) -> dict[str, Any]:
298
+ cached_item = self._item_cache.get(index)
299
+ if cached_item is not None:
300
+ return cached_item
301
+ sample = self.samples[index]
302
+ _assert_noleak_sample(sample)
303
+ candidate_count = int(sample.get("candidate_action_chunks", np.zeros((0, 0, 0), dtype=np.float32)).shape[0])
304
+ proposal_target_count = int(
305
+ sample.get("proposal_target_action_chunks", np.zeros((0, 0, 0), dtype=np.float32)).shape[0]
306
+ )
307
+ images = self._render_sample(sample, sample["render_state"])
308
+ history_images = []
309
+ history_depths = []
310
+ history_depth_valid = []
311
+ for history_state in sample.get("history_render_states", []):
312
+ rendered = self._render_sample(sample, history_state)
313
+ history_images.append(
314
+ torch.stack(
315
+ [
316
+ torch.from_numpy(rendered["front"]),
317
+ torch.from_numpy(rendered["wrist_left"]),
318
+ torch.from_numpy(rendered["wrist_right"]),
319
+ ],
320
+ dim=0,
321
+ )
322
+ )
323
+ if dataset_uses_rgbd(sample.get("dataset_version")):
324
+ history_depths.append(
325
+ torch.stack(
326
+ [
327
+ torch.from_numpy(rendered["front_depth"]),
328
+ torch.from_numpy(rendered["wrist_left_depth"]),
329
+ torch.from_numpy(rendered["wrist_right_depth"]),
330
+ ],
331
+ dim=0,
332
+ )
333
+ )
334
+ history_depth_valid.append(
335
+ torch.stack(
336
+ [
337
+ torch.from_numpy(rendered["front_depth_valid"]),
338
+ torch.from_numpy(rendered["wrist_left_depth_valid"]),
339
+ torch.from_numpy(rendered["wrist_right_depth_valid"]),
340
+ ],
341
+ dim=0,
342
+ )
343
+ )
344
+ stacked = torch.from_numpy(
345
+ torch.stack(
346
+ [
347
+ torch.from_numpy(images["front"]),
348
+ torch.from_numpy(images["wrist_left"]),
349
+ torch.from_numpy(images["wrist_right"]),
350
+ ],
351
+ dim=0,
352
+ ).numpy()
353
+ ).permute(0, 3, 1, 2).float() / 255.0
354
+ if history_images:
355
+ history_stacked = torch.stack(history_images, dim=0).permute(0, 1, 4, 2, 3).float() / 255.0
356
+ else:
357
+ history_stacked = torch.zeros((0, 3, 3, self.resolution, self.resolution), dtype=torch.float32)
358
+ if dataset_uses_rgbd(sample.get("dataset_version")):
359
+ depths = torch.stack(
360
+ [
361
+ torch.from_numpy(images["front_depth"]),
362
+ torch.from_numpy(images["wrist_left_depth"]),
363
+ torch.from_numpy(images["wrist_right_depth"]),
364
+ ],
365
+ dim=0,
366
+ ).unsqueeze(1).float()
367
+ depth_valid = torch.stack(
368
+ [
369
+ torch.from_numpy(images["front_depth_valid"]),
370
+ torch.from_numpy(images["wrist_left_depth_valid"]),
371
+ torch.from_numpy(images["wrist_right_depth_valid"]),
372
+ ],
373
+ dim=0,
374
+ ).unsqueeze(1).float()
375
+ if history_depths:
376
+ history_depths_tensor = torch.stack(history_depths, dim=0).unsqueeze(2).float()
377
+ history_depth_valid_tensor = torch.stack(history_depth_valid, dim=0).unsqueeze(2).float()
378
+ else:
379
+ history_depths_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
380
+ history_depth_valid_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
381
+ else:
382
+ depths = torch.zeros((3, 1, self.resolution, self.resolution), dtype=torch.float32)
383
+ depth_valid = torch.zeros_like(depths)
384
+ history_depths_tensor = torch.zeros((0, 3, 1, self.resolution, self.resolution), dtype=torch.float32)
385
+ history_depth_valid_tensor = torch.zeros_like(history_depths_tensor)
386
+ camera_intrinsics = sample.get("camera_intrinsics")
387
+ camera_extrinsics = sample.get("camera_extrinsics")
388
+ camera_valid_mask = sample.get("camera_valid_mask")
389
+ if camera_intrinsics is None or camera_extrinsics is None:
390
+ default_intrinsics, default_extrinsics = default_camera_matrices()
391
+ camera_intrinsics = default_intrinsics
392
+ camera_extrinsics = default_extrinsics
393
+ if camera_valid_mask is None:
394
+ camera_valid_mask = np.ones((3,), dtype=np.float32)
395
+ history_length = len(sample.get("history_render_states", []))
396
+ history_camera_intrinsics = torch.as_tensor(
397
+ sample.get("history_camera_intrinsics", np.zeros((history_length, 3, 3, 3), dtype=np.float32)),
398
+ dtype=torch.float32,
399
+ )
400
+ history_camera_extrinsics = torch.as_tensor(
401
+ sample.get("history_camera_extrinsics", np.zeros((history_length, 3, 4, 4), dtype=np.float32)),
402
+ dtype=torch.float32,
403
+ )
404
+ history_camera_valid_mask = torch.as_tensor(
405
+ sample.get("history_camera_valid_mask", np.zeros((history_length, 3), dtype=np.float32)),
406
+ dtype=torch.float32,
407
+ )
408
+ item = {
409
+ "images": stacked,
410
+ "depths": depths,
411
+ "depth_valid": depth_valid,
412
+ "history_images": history_stacked,
413
+ "history_depths": history_depths_tensor,
414
+ "history_depth_valid": history_depth_valid_tensor,
415
+ "history_camera_intrinsics": history_camera_intrinsics,
416
+ "history_camera_extrinsics": history_camera_extrinsics,
417
+ "history_camera_valid_mask": history_camera_valid_mask,
418
+ "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
419
+ "history_actions": torch.as_tensor(
420
+ sample.get(
421
+ "history_actions",
422
+ np.zeros((len(sample.get("history_render_states", [])), sample["action_chunk"].shape[-1]), dtype=np.float32),
423
+ ),
424
+ dtype=torch.float32,
425
+ ),
426
+ "camera_intrinsics": torch.as_tensor(camera_intrinsics, dtype=torch.float32),
427
+ "camera_extrinsics": torch.as_tensor(camera_extrinsics, dtype=torch.float32),
428
+ "camera_valid_mask": torch.as_tensor(camera_valid_mask, dtype=torch.float32),
429
+ "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
430
+ "texts": sample["language_goal"],
431
+ "task_name": sample.get("task_name", task_name_from_proxy_name(sample["proxy_name"])),
432
+ "task_id": torch.as_tensor(
433
+ sample.get(
434
+ "task_id",
435
+ task_id_from_task_name(sample.get("task_name", task_name_from_proxy_name(sample["proxy_name"]))),
436
+ ),
437
+ dtype=torch.long,
438
+ ),
439
+ "stress_slice": sample.get("stress_slice", "nominal"),
440
+ "difficulty_bin": sample.get("difficulty_bin", "medium"),
441
+ "episode_metadata_json": str(sample.get("episode_metadata", {})),
442
+ "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
443
+ "support_mode": torch.as_tensor(sample["support_mode"], dtype=torch.long),
444
+ "phase": torch.as_tensor(sample.get("phase", 0), dtype=torch.long),
445
+ "subgoal_progress": torch.as_tensor(sample.get("subgoal_progress", 0.0), dtype=torch.float32),
446
+ "corridor_feasible": torch.as_tensor(sample["corridor_feasible"], dtype=torch.float32),
447
+ "persistence_horizon": torch.as_tensor(sample["persistence_horizon"], dtype=torch.float32),
448
+ "disturbance_cost": torch.as_tensor(sample["disturbance_cost"], dtype=torch.float32),
449
+ "belief_map": torch.as_tensor(sample["belief_map"], dtype=torch.float32).unsqueeze(0),
450
+ "visibility_map": torch.as_tensor(sample.get("visibility_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
451
+ "clearance_map": torch.as_tensor(sample.get("clearance_map", np.zeros((2, 32, 32), dtype=np.float32)), dtype=torch.float32),
452
+ "occluder_contact_map": torch.as_tensor(sample.get("occluder_contact_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
453
+ "grasp_affordance_map": torch.as_tensor(sample.get("grasp_affordance_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
454
+ "support_stability": torch.as_tensor(sample.get("support_stability", 0.0), dtype=torch.float32),
455
+ "support_stability_map": torch.as_tensor(sample.get("support_stability_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
456
+ "reocclusion_target": torch.as_tensor(sample.get("reocclusion_target", 0.0), dtype=torch.float32),
457
+ "reocclusion_map": torch.as_tensor(sample.get("reocclusion_map", np.zeros((32, 32), dtype=np.float32)), dtype=torch.float32).unsqueeze(0),
458
+ "gap_width": torch.as_tensor(sample.get("gap_width", 0.0), dtype=torch.float32),
459
+ "damage_proxy": torch.as_tensor(sample.get("damage_proxy", 0.0), dtype=torch.float32),
460
+ "release_collapse_rate": torch.as_tensor(sample.get("release_collapse_rate", 0.0), dtype=torch.float32),
461
+ "target_visibility_confidence": torch.as_tensor(sample.get("target_visibility_confidence", 0.0), dtype=torch.float32),
462
+ "mouth_aperture": torch.as_tensor(sample.get("mouth_aperture", 0.0), dtype=torch.float32),
463
+ "hold_quality": torch.as_tensor(sample.get("hold_quality", 0.0), dtype=torch.float32),
464
+ "rim_slip_risk": torch.as_tensor(sample.get("rim_slip_risk", 0.0), dtype=torch.float32),
465
+ "insertable_actor_corridor": torch.as_tensor(sample.get("insertable_actor_corridor", 0.0), dtype=torch.float32),
466
+ "layer_separation_quality": torch.as_tensor(sample.get("layer_separation_quality", 0.0), dtype=torch.float32),
467
+ "fold_preservation": torch.as_tensor(sample.get("fold_preservation", 0.0), dtype=torch.float32),
468
+ "insertion_corridor": torch.as_tensor(sample.get("insertion_corridor", 0.0), dtype=torch.float32),
469
+ "top_layer_stability": torch.as_tensor(sample.get("top_layer_stability", 0.0), dtype=torch.float32),
470
+ "lift_too_much_risk": torch.as_tensor(sample.get("lift_too_much_risk", 0.0), dtype=torch.float32),
471
+ "rollout_support_mode": torch.as_tensor(sample["rollout_support_mode"], dtype=torch.long),
472
+ "rollout_phase": torch.as_tensor(sample.get("rollout_phase", np.zeros((0,), dtype=np.int64)), dtype=torch.long),
473
+ "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
474
+ "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
475
+ "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
476
+ "rollout_belief_map": torch.as_tensor(sample.get("rollout_belief_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
477
+ "rollout_visibility_map": torch.as_tensor(sample.get("rollout_visibility_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
478
+ "rollout_clearance_map": torch.as_tensor(sample.get("rollout_clearance_map", np.zeros((0, 2, 32, 32), dtype=np.float32)), dtype=torch.float32),
479
+ "rollout_support_stability": torch.as_tensor(sample.get("rollout_support_stability", np.zeros((0,), dtype=np.float32)), dtype=torch.float32),
480
+ "rollout_reocclusion_target": torch.as_tensor(sample.get("rollout_reocclusion_target", np.zeros((0,), dtype=np.float32)), dtype=torch.float32),
481
+ "rollout_occluder_contact_map": torch.as_tensor(sample.get("rollout_occluder_contact_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
482
+ "rollout_grasp_affordance_map": torch.as_tensor(sample.get("rollout_grasp_affordance_map", np.zeros((0, 32, 32), dtype=np.float32)), dtype=torch.float32),
483
+ "candidate_action_chunks": torch.as_tensor(sample["candidate_action_chunks"], dtype=torch.float32),
484
+ "candidate_rollout_support_mode": torch.as_tensor(sample["candidate_rollout_support_mode"], dtype=torch.long),
485
+ "candidate_rollout_phase": torch.as_tensor(sample.get("candidate_rollout_phase", np.zeros((0, 0), dtype=np.int64)), dtype=torch.long),
486
+ "candidate_rollout_corridor_feasible": torch.as_tensor(sample["candidate_rollout_corridor_feasible"], dtype=torch.float32),
487
+ "candidate_rollout_persistence_horizon": torch.as_tensor(sample["candidate_rollout_persistence_horizon"], dtype=torch.float32),
488
+ "candidate_rollout_disturbance_cost": torch.as_tensor(sample["candidate_rollout_disturbance_cost"], dtype=torch.float32),
489
+ "candidate_rollout_belief_map": torch.as_tensor(sample.get("candidate_rollout_belief_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
490
+ "candidate_rollout_visibility_map": torch.as_tensor(sample.get("candidate_rollout_visibility_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
491
+ "candidate_rollout_clearance_map": torch.as_tensor(sample.get("candidate_rollout_clearance_map", np.zeros((0, 0, 2, 32, 32), dtype=np.float32)), dtype=torch.float32),
492
+ "candidate_rollout_support_stability": torch.as_tensor(sample.get("candidate_rollout_support_stability", np.zeros((0, 0), dtype=np.float32)), dtype=torch.float32),
493
+ "candidate_rollout_reocclusion_target": torch.as_tensor(sample.get("candidate_rollout_reocclusion_target", np.zeros((0, 0), dtype=np.float32)), dtype=torch.float32),
494
+ "candidate_rollout_occluder_contact_map": torch.as_tensor(sample.get("candidate_rollout_occluder_contact_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
495
+ "candidate_rollout_grasp_affordance_map": torch.as_tensor(sample.get("candidate_rollout_grasp_affordance_map", np.zeros((0, 0, 32, 32), dtype=np.float32)), dtype=torch.float32),
496
+ "candidate_retrieval_success": torch.as_tensor(sample["candidate_retrieval_success"], dtype=torch.float32),
497
+ "candidate_final_disturbance_cost": torch.as_tensor(sample["candidate_final_disturbance_cost"], dtype=torch.float32),
498
+ "candidate_reocclusion_rate": torch.as_tensor(sample["candidate_reocclusion_rate"], dtype=torch.float32),
499
+ "candidate_visibility_integral": torch.as_tensor(sample["candidate_visibility_integral"], dtype=torch.float32),
500
+ "candidate_actor_feasibility_auc": torch.as_tensor(sample.get("candidate_actor_feasibility_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
501
+ "candidate_reveal_achieved": torch.as_tensor(sample.get("candidate_reveal_achieved", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
502
+ "candidate_hold_persistence": torch.as_tensor(sample.get("candidate_hold_persistence", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
503
+ "candidate_support_stability_auc": torch.as_tensor(sample.get("candidate_support_stability_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
504
+ "candidate_disturbance_auc": torch.as_tensor(sample.get("candidate_disturbance_auc", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
505
+ "candidate_macro_ids": torch.as_tensor(sample.get("candidate_macro_ids", np.zeros((candidate_count,), dtype=np.int64)), dtype=torch.long),
506
+ "candidate_is_hard_negative": torch.as_tensor(sample.get("candidate_is_hard_negative", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
507
+ "candidate_risk": torch.as_tensor(sample["candidate_risk"], dtype=torch.float32),
508
+ "candidate_utility": torch.as_tensor(sample["candidate_utility"], dtype=torch.float32),
509
+ "candidate_gap_width": torch.as_tensor(sample.get("candidate_gap_width", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
510
+ "candidate_damage_proxy": torch.as_tensor(sample.get("candidate_damage_proxy", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
511
+ "candidate_mouth_aperture": torch.as_tensor(sample.get("candidate_mouth_aperture", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
512
+ "candidate_hold_quality": torch.as_tensor(sample.get("candidate_hold_quality", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
513
+ "candidate_rim_slip_risk": torch.as_tensor(sample.get("candidate_rim_slip_risk", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
514
+ "candidate_fold_preservation": torch.as_tensor(sample.get("candidate_fold_preservation", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
515
+ "candidate_layer_separation_quality": torch.as_tensor(sample.get("candidate_layer_separation_quality", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
516
+ "candidate_lift_too_much_risk": torch.as_tensor(sample.get("candidate_lift_too_much_risk", np.zeros((candidate_count,), dtype=np.float32)), dtype=torch.float32),
517
+ "proxy_name": sample["proxy_name"],
518
+ "episode_id": sample["episode_id"],
519
+ }
520
+ if "proposal_target_action_chunks" in sample:
521
+ rollout_horizon = int(np.asarray(sample.get("rollout_support_mode", np.zeros((0,), dtype=np.int64))).shape[0])
522
+ belief_map_shape = tuple(np.asarray(sample.get("rollout_belief_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:])
523
+ visibility_map_shape = tuple(np.asarray(sample.get("rollout_visibility_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:])
524
+ clearance_map_shape = tuple(np.asarray(sample.get("rollout_clearance_map", np.zeros((0, 2, 32, 32), dtype=np.float32))).shape[1:])
525
+ occluder_contact_shape = tuple(
526
+ np.asarray(sample.get("rollout_occluder_contact_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:]
527
+ )
528
+ grasp_affordance_shape = tuple(
529
+ np.asarray(sample.get("rollout_grasp_affordance_map", np.zeros((0, 32, 32), dtype=np.float32))).shape[1:]
530
+ )
531
+ item["proposal_target_action_chunks"] = torch.as_tensor(sample["proposal_target_action_chunks"], dtype=torch.float32)
532
+ item["proposal_target_retrieval_success"] = torch.as_tensor(
533
+ sample.get("proposal_target_retrieval_success", np.zeros((proposal_target_count,), dtype=np.float32)),
534
+ dtype=torch.float32,
535
+ )
536
+ item["proposal_target_risk"] = torch.as_tensor(
537
+ sample.get("proposal_target_risk", np.zeros((proposal_target_count,), dtype=np.float32)),
538
+ dtype=torch.float32,
539
+ )
540
+ item["proposal_target_utility"] = torch.as_tensor(
541
+ sample.get("proposal_target_utility", np.zeros((proposal_target_count,), dtype=np.float32)),
542
+ dtype=torch.float32,
543
+ )
544
+ item["proposal_target_rollout_support_mode"] = torch.as_tensor(
545
+ sample.get(
546
+ "proposal_target_rollout_support_mode",
547
+ np.zeros((proposal_target_count, rollout_horizon), dtype=np.int64),
548
+ ),
549
+ dtype=torch.long,
550
+ )
551
+ item["proposal_target_rollout_phase"] = torch.as_tensor(
552
+ sample.get(
553
+ "proposal_target_rollout_phase",
554
+ np.zeros((proposal_target_count, rollout_horizon), dtype=np.int64),
555
+ ),
556
+ dtype=torch.long,
557
+ )
558
+ item["proposal_target_rollout_corridor_feasible"] = torch.as_tensor(
559
+ sample.get(
560
+ "proposal_target_rollout_corridor_feasible",
561
+ np.zeros((proposal_target_count, rollout_horizon, 3), dtype=np.float32),
562
+ ),
563
+ dtype=torch.float32,
564
+ )
565
+ item["proposal_target_rollout_persistence_horizon"] = torch.as_tensor(
566
+ sample.get(
567
+ "proposal_target_rollout_persistence_horizon",
568
+ np.zeros((proposal_target_count, rollout_horizon, 3), dtype=np.float32),
569
+ ),
570
+ dtype=torch.float32,
571
+ )
572
+ item["proposal_target_rollout_disturbance_cost"] = torch.as_tensor(
573
+ sample.get(
574
+ "proposal_target_rollout_disturbance_cost",
575
+ np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
576
+ ),
577
+ dtype=torch.float32,
578
+ )
579
+ item["proposal_target_rollout_belief_map"] = torch.as_tensor(
580
+ sample.get(
581
+ "proposal_target_rollout_belief_map",
582
+ np.zeros((proposal_target_count, rollout_horizon, *belief_map_shape), dtype=np.float32),
583
+ ),
584
+ dtype=torch.float32,
585
+ )
586
+ item["proposal_target_rollout_visibility_map"] = torch.as_tensor(
587
+ sample.get(
588
+ "proposal_target_rollout_visibility_map",
589
+ np.zeros((proposal_target_count, rollout_horizon, *visibility_map_shape), dtype=np.float32),
590
+ ),
591
+ dtype=torch.float32,
592
+ )
593
+ item["proposal_target_rollout_clearance_map"] = torch.as_tensor(
594
+ sample.get(
595
+ "proposal_target_rollout_clearance_map",
596
+ np.zeros((proposal_target_count, rollout_horizon, *clearance_map_shape), dtype=np.float32),
597
+ ),
598
+ dtype=torch.float32,
599
+ )
600
+ item["proposal_target_rollout_support_stability"] = torch.as_tensor(
601
+ sample.get(
602
+ "proposal_target_rollout_support_stability",
603
+ np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
604
+ ),
605
+ dtype=torch.float32,
606
+ )
607
+ item["proposal_target_rollout_reocclusion_target"] = torch.as_tensor(
608
+ sample.get(
609
+ "proposal_target_rollout_reocclusion_target",
610
+ np.zeros((proposal_target_count, rollout_horizon), dtype=np.float32),
611
+ ),
612
+ dtype=torch.float32,
613
+ )
614
+ item["proposal_target_rollout_occluder_contact_map"] = torch.as_tensor(
615
+ sample.get(
616
+ "proposal_target_rollout_occluder_contact_map",
617
+ np.zeros((proposal_target_count, rollout_horizon, *occluder_contact_shape), dtype=np.float32),
618
+ ),
619
+ dtype=torch.float32,
620
+ )
621
+ item["proposal_target_rollout_grasp_affordance_map"] = torch.as_tensor(
622
+ sample.get(
623
+ "proposal_target_rollout_grasp_affordance_map",
624
+ np.zeros((proposal_target_count, rollout_horizon, *grasp_affordance_shape), dtype=np.float32),
625
+ ),
626
+ dtype=torch.float32,
627
+ )
628
+ self._item_cache[index] = item
629
+ return item
630
+
631
+
632
+ def dataset_from_bundle(dataset_bundle: dict[str, Any], resolution: int | None = None) -> RevealOfflineDataset:
633
+ resolution = resolution or int(dataset_bundle["resolution"])
634
+ return RevealOfflineDataset(dataset_bundle["samples"], resolution=resolution)
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/generate_dataset.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from sim_reveal.dataset import collect_teacher_dataset, save_teacher_dataset
8
+
9
+
10
+ def main() -> None:
11
+ parser = argparse.ArgumentParser()
12
+ parser.add_argument("--proxies", nargs="*", default=None)
13
+ parser.add_argument("--episodes-per-proxy", type=int, default=32)
14
+ parser.add_argument("--resolution", type=int, default=96)
15
+ parser.add_argument("--seed", type=int, default=0)
16
+ parser.add_argument("--chunk-horizon", type=int, default=8)
17
+ parser.add_argument("--rollout-horizon", type=int, default=5)
18
+ parser.add_argument("--history-steps", type=int, default=2)
19
+ parser.add_argument("--planner-candidates", type=int, default=4)
20
+ parser.add_argument("--episode-spec-path", default=None)
21
+ parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
22
+ args = parser.parse_args()
23
+ episode_specs = None
24
+ if args.episode_spec_path:
25
+ episode_specs = json.loads(Path(args.episode_spec_path).read_text(encoding="utf-8"))
26
+
27
+ dataset_bundle = collect_teacher_dataset(
28
+ proxy_names=args.proxies,
29
+ episodes_per_proxy=args.episodes_per_proxy,
30
+ resolution=args.resolution,
31
+ seed=args.seed,
32
+ chunk_horizon=args.chunk_horizon,
33
+ rollout_horizon=args.rollout_horizon,
34
+ history_steps=args.history_steps,
35
+ planner_candidates=args.planner_candidates,
36
+ episode_specs=episode_specs,
37
+ )
38
+ output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
39
+ payload = {
40
+ "output_path": str(output_path),
41
+ "resolution": dataset_bundle["resolution"],
42
+ "num_samples": len(dataset_bundle["samples"]),
43
+ "num_episode_specs": len(dataset_bundle.get("episode_specs", [])),
44
+ "summary": dataset_bundle["summary"],
45
+ }
46
+ print(json.dumps(payload, indent=2))
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+
6
+ from sim_reveal.isaac_wrapper import IsaacRevealRuntime
7
+
8
+
9
+ def main() -> None:
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument("--visible", action="store_true")
12
+ args = parser.parse_args()
13
+
14
+ runtime = IsaacRevealRuntime(headless=not args.visible)
15
+ try:
16
+ import isaacsim
17
+
18
+ payload = {
19
+ "headless": not args.visible,
20
+ "isaacsim_version": getattr(isaacsim, "__version__", "unknown"),
21
+ "status": "ok",
22
+ }
23
+ print(json.dumps(payload, indent=2))
24
+ finally:
25
+ runtime.close()
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+
6
+ @dataclass
7
+ class IsaacRevealRuntime:
8
+ headless: bool = True
9
+
10
+ def __post_init__(self) -> None:
11
+ from isaacsim import SimulationApp
12
+
13
+ self._simulation_app = SimulationApp({"headless": self.headless})
14
+
15
+ def close(self) -> None:
16
+ self._simulation_app.close()
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/labels.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import numpy as np
6
+
7
+ from sim_reveal.base import RevealState, SupportMode
8
+
9
+
10
+ def privileged_state_to_reveal_labels(
11
+ state: dict[str, Any],
12
+ num_modes: int = 3,
13
+ num_templates: int = 32,
14
+ rollout_horizon: int = 5,
15
+ ) -> RevealState:
16
+ support_mode = int(state["support_mode"])
17
+ support_logits = np.full((num_modes,), -4.0, dtype=np.float32)
18
+ support_logits[support_mode] = 4.0
19
+
20
+ corridor = np.asarray(state["corridor_feasible"], dtype=np.float32)
21
+ if corridor.shape != (num_modes, num_templates):
22
+ raise ValueError(
23
+ f"Expected corridor_feasible shape {(num_modes, num_templates)}, got {corridor.shape}"
24
+ )
25
+ corridor_logits = np.where(corridor > 0.5, 4.0, -4.0).astype(np.float32)
26
+
27
+ persistence = np.asarray(state["persistence_horizon"], dtype=np.float32)
28
+ if persistence.shape != (num_modes,):
29
+ raise ValueError(f"Expected persistence_horizon shape {(num_modes,)}, got {persistence.shape}")
30
+ persistence = np.clip(persistence, 0.0, float(rollout_horizon))
31
+
32
+ disturbance = np.asarray([state["disturbance_cost"]], dtype=np.float32)
33
+ belief_map = state.get("belief_map")
34
+ if belief_map is not None:
35
+ belief_map = np.asarray(belief_map, dtype=np.float32)
36
+
37
+ return RevealState(
38
+ support_mode_logits=support_logits,
39
+ corridor_logits=corridor_logits,
40
+ persistence_horizon=persistence,
41
+ disturbance_cost=disturbance,
42
+ belief_map=belief_map,
43
+ )
44
+
45
+
46
+ def reocclusion_rate(corridor_open_history: np.ndarray) -> float:
47
+ corridor_open_history = np.asarray(corridor_open_history, dtype=np.float32)
48
+ if corridor_open_history.ndim != 1:
49
+ raise ValueError("corridor_open_history must be 1D.")
50
+ if corridor_open_history.size < 2:
51
+ return 0.0
52
+ open_then_closed = np.logical_and(corridor_open_history[:-1] > 0.5, corridor_open_history[1:] <= 0.5)
53
+ return float(open_then_closed.mean())
54
+
55
+
56
+ def infer_support_mode_from_flags(holding: bool, transferred: bool) -> SupportMode:
57
+ if holding:
58
+ return SupportMode.HOLD
59
+ if transferred:
60
+ return SupportMode.TRANSFER
61
+ return SupportMode.PASSIVE
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/procedural_envs.py ADDED
@@ -0,0 +1,1389 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+
8
+ from sim_reveal.base import RevealProxyConfig, SupportMode
9
+ from sim_reveal.proxy_specs import (
10
+ BAG_PROXY,
11
+ CLOTH_PROXY,
12
+ CRITICAL_STRESS_BY_TASK_NAME,
13
+ FOLIAGE_PROXY,
14
+ SPRINT_DIFFICULTY_BINS,
15
+ task_id_from_task_name,
16
+ task_name_from_proxy_name,
17
+ )
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ProxyDynamics:
22
+ hold_decay: float
23
+ transfer_decay: float
24
+ passive_decay: float
25
+ disturbance_gain: float
26
+ settle_rate: float
27
+ desired_opening: float
28
+ preferred_mode: SupportMode
29
+ transfer_support_factor: float
30
+ passive_support_factor: float
31
+ visibility_bias: float
32
+ retrieve_visibility_threshold: float
33
+ palette: tuple[float, float, float]
34
+
35
+
36
+ PROXY_CONFIGS: dict[str, RevealProxyConfig] = {
37
+ FOLIAGE_PROXY.name: FOLIAGE_PROXY,
38
+ BAG_PROXY.name: BAG_PROXY,
39
+ CLOTH_PROXY.name: CLOTH_PROXY,
40
+ }
41
+
42
+ PROXY_DYNAMICS: dict[str, ProxyDynamics] = {
43
+ FOLIAGE_PROXY.name: ProxyDynamics(
44
+ hold_decay=0.02,
45
+ transfer_decay=0.07,
46
+ passive_decay=0.15,
47
+ disturbance_gain=0.06,
48
+ settle_rate=0.03,
49
+ desired_opening=0.60,
50
+ preferred_mode=SupportMode.HOLD,
51
+ transfer_support_factor=0.76,
52
+ passive_support_factor=0.42,
53
+ visibility_bias=0.03,
54
+ retrieve_visibility_threshold=0.42,
55
+ palette=(0.16, 0.30, 0.12),
56
+ ),
57
+ BAG_PROXY.name: ProxyDynamics(
58
+ hold_decay=0.04,
59
+ transfer_decay=0.03,
60
+ passive_decay=0.12,
61
+ disturbance_gain=0.05,
62
+ settle_rate=0.02,
63
+ desired_opening=0.68,
64
+ preferred_mode=SupportMode.TRANSFER,
65
+ transfer_support_factor=0.96,
66
+ passive_support_factor=0.55,
67
+ visibility_bias=0.06,
68
+ retrieve_visibility_threshold=0.48,
69
+ palette=(0.26, 0.17, 0.10),
70
+ ),
71
+ CLOTH_PROXY.name: ProxyDynamics(
72
+ hold_decay=0.03,
73
+ transfer_decay=0.05,
74
+ passive_decay=0.04,
75
+ disturbance_gain=0.04,
76
+ settle_rate=0.04,
77
+ desired_opening=0.50,
78
+ preferred_mode=SupportMode.PASSIVE,
79
+ transfer_support_factor=0.82,
80
+ passive_support_factor=0.90,
81
+ visibility_bias=0.08,
82
+ retrieve_visibility_threshold=0.38,
83
+ palette=(0.24, 0.24, 0.29),
84
+ ),
85
+ }
86
+
87
+ PROXY_GOALS = {
88
+ FOLIAGE_PROXY.name: "create a gap in the foliage and retrieve the target",
89
+ BAG_PROXY.name: "open the bag mouth and retrieve the target object",
90
+ CLOTH_PROXY.name: "lift the top layer enough to retrieve the hidden object",
91
+ }
92
+
93
+
94
+ def default_camera_matrices(
95
+ pose_jitter: float = 0.0,
96
+ focal_jitter: float = 0.0,
97
+ lateral_skew: float = 0.0,
98
+ ) -> tuple[np.ndarray, np.ndarray]:
99
+ intrinsics = np.asarray(
100
+ [
101
+ [[140.0, 0.0, 48.0], [0.0, 140.0, 48.0], [0.0, 0.0, 1.0]],
102
+ [[135.0, 0.0, 48.0], [0.0, 135.0, 48.0], [0.0, 0.0, 1.0]],
103
+ [[135.0, 0.0, 48.0], [0.0, 135.0, 48.0], [0.0, 0.0, 1.0]],
104
+ ],
105
+ dtype=np.float32,
106
+ )
107
+ extrinsics = np.asarray(
108
+ [
109
+ np.eye(4, dtype=np.float32),
110
+ [[1.0, 0.0, 0.0, -0.18], [0.0, 1.0, 0.0, 0.04], [0.0, 0.0, 1.0, 0.10], [0.0, 0.0, 0.0, 1.0]],
111
+ [[1.0, 0.0, 0.0, 0.18], [0.0, 1.0, 0.0, 0.04], [0.0, 0.0, 1.0, 0.10], [0.0, 0.0, 0.0, 1.0]],
112
+ ],
113
+ dtype=np.float32,
114
+ )
115
+ if pose_jitter != 0.0:
116
+ extrinsics[1, 0, 3] -= 0.6 * pose_jitter
117
+ extrinsics[2, 0, 3] += 0.6 * pose_jitter
118
+ extrinsics[1, 1, 3] += 0.25 * pose_jitter + lateral_skew
119
+ extrinsics[2, 1, 3] += 0.25 * pose_jitter - lateral_skew
120
+ extrinsics[1, 2, 3] += 0.15 * pose_jitter
121
+ extrinsics[2, 2, 3] += 0.10 * pose_jitter
122
+ if focal_jitter != 0.0:
123
+ intrinsics[0, 0, 0] *= 1.0 + 0.10 * focal_jitter
124
+ intrinsics[0, 1, 1] *= 1.0 - 0.05 * focal_jitter
125
+ intrinsics[1, 0, 0] *= 1.0 - 0.08 * focal_jitter
126
+ intrinsics[2, 1, 1] *= 1.0 + 0.08 * focal_jitter
127
+ intrinsics[1, 0, 2] += 3.0 * focal_jitter + 6.0 * lateral_skew
128
+ intrinsics[2, 0, 2] -= 3.0 * focal_jitter - 6.0 * lateral_skew
129
+ return intrinsics, extrinsics
130
+
131
+
132
+ def available_proxy_names() -> tuple[str, ...]:
133
+ return tuple(PROXY_CONFIGS.keys())
134
+
135
+
136
+ def make_proxy_env(
137
+ proxy_name: str,
138
+ resolution: int = 96,
139
+ seed: int = 0,
140
+ num_templates: int = 32,
141
+ rollout_horizon: int = 5,
142
+ max_steps: int | None = None,
143
+ stress_slice: str = "nominal",
144
+ difficulty_bin: str = "medium",
145
+ ) -> "ProceduralRevealEnv":
146
+ return ProceduralRevealEnv(
147
+ proxy_name=proxy_name,
148
+ resolution=resolution,
149
+ seed=seed,
150
+ num_templates=num_templates,
151
+ rollout_horizon=rollout_horizon,
152
+ max_steps=max_steps,
153
+ stress_slice=stress_slice,
154
+ difficulty_bin=difficulty_bin,
155
+ )
156
+
157
+
158
+ class ProceduralRevealEnv:
159
+ camera_names = ("front", "wrist_left", "wrist_right")
160
+
161
+ def __init__(
162
+ self,
163
+ proxy_name: str,
164
+ resolution: int = 96,
165
+ seed: int = 0,
166
+ num_templates: int = 32,
167
+ rollout_horizon: int = 5,
168
+ max_steps: int | None = None,
169
+ stress_slice: str = "nominal",
170
+ difficulty_bin: str = "medium",
171
+ ) -> None:
172
+ if proxy_name not in PROXY_CONFIGS:
173
+ raise KeyError(f"Unknown proxy: {proxy_name}")
174
+ if difficulty_bin not in SPRINT_DIFFICULTY_BINS:
175
+ raise ValueError(f"Unsupported difficulty bin: {difficulty_bin}")
176
+ self.proxy = PROXY_CONFIGS[proxy_name]
177
+ self.dynamics = PROXY_DYNAMICS[proxy_name]
178
+ self.proxy_name = proxy_name
179
+ self.task_name = task_name_from_proxy_name(proxy_name)
180
+ self.task_id = task_id_from_task_name(self.task_name)
181
+ self.critical_stress = CRITICAL_STRESS_BY_TASK_NAME[self.task_name]
182
+ self.stress_slice = str(stress_slice)
183
+ self.difficulty_bin = str(difficulty_bin)
184
+ self.resolution = resolution
185
+ self.num_templates = num_templates
186
+ self.rollout_horizon = rollout_horizon
187
+ self.max_steps = max_steps or self.proxy.max_steps
188
+ self.rng = np.random.default_rng(seed)
189
+ self.reset(seed=seed)
190
+
191
+ def clone_state(self) -> dict[str, Any]:
192
+ return {
193
+ "step_count": self.step_count,
194
+ "opening": self.opening,
195
+ "disturbance": self.disturbance,
196
+ "target_template": self.target_template,
197
+ "target_depth": self.target_depth,
198
+ "target_center": self.target_center,
199
+ "target_radius": self.target_radius,
200
+ "texture_phase": self.texture_phase,
201
+ "texture_scale": self.texture_scale,
202
+ "view_bias": self.view_bias,
203
+ "target_intensity": self.target_intensity,
204
+ "holding": self.holding,
205
+ "transferred": self.transferred,
206
+ "retrieved": self.retrieved,
207
+ "actor_progress": self.actor_progress,
208
+ "last_actor_template": self.last_actor_template,
209
+ "visibility_trace": list(self.visibility_trace),
210
+ "corridor_trace": list(self.corridor_trace),
211
+ }
212
+
213
+ def restore_state(self, state: dict[str, Any]) -> None:
214
+ self.step_count = int(state["step_count"])
215
+ self.opening = float(state["opening"])
216
+ self.disturbance = float(state["disturbance"])
217
+ self.target_template = int(state["target_template"])
218
+ self.target_depth = float(state["target_depth"])
219
+ self.target_center = float(state["target_center"])
220
+ self.target_radius = float(state["target_radius"])
221
+ self.texture_phase = float(state["texture_phase"])
222
+ self.texture_scale = float(state["texture_scale"])
223
+ self.view_bias = float(state["view_bias"])
224
+ self.target_intensity = float(state["target_intensity"])
225
+ self.holding = bool(state["holding"])
226
+ self.transferred = bool(state["transferred"])
227
+ self.retrieved = bool(state["retrieved"])
228
+ self.actor_progress = float(state["actor_progress"])
229
+ self.last_actor_template = int(state["last_actor_template"])
230
+ self.visibility_trace = list(state["visibility_trace"])
231
+ self.corridor_trace = list(state["corridor_trace"])
232
+
233
+ def _difficulty_ranges(self) -> dict[str, tuple[float, float]]:
234
+ if self.difficulty_bin == "hard":
235
+ return {
236
+ "opening": (0.07, 0.18),
237
+ "disturbance": (0.08, 0.18),
238
+ "target_depth": (0.28, 0.52),
239
+ "target_radius": (0.018, 0.030),
240
+ "view_bias": (-0.18, 0.18),
241
+ }
242
+ return {
243
+ "opening": (0.12, 0.24),
244
+ "disturbance": (0.03, 0.12),
245
+ "target_depth": (0.14, 0.40),
246
+ "target_radius": (0.022, 0.036),
247
+ "view_bias": (-0.10, 0.10),
248
+ }
249
+
250
+ def _stress_parameters(self) -> dict[str, float]:
251
+ params = {
252
+ "reocclusion_bias": 0.0,
253
+ "closure_scale": 1.0,
254
+ "disturbance_gain_scale": 1.0,
255
+ "corridor_scale": 1.0,
256
+ "support_stability_penalty": 0.0,
257
+ "camera_pose_jitter": 0.0,
258
+ "focal_jitter": 0.0,
259
+ "lateral_skew": 0.0,
260
+ "collateral_bias": 0.0,
261
+ "opening_shift": 0.0,
262
+ "disturbance_shift": 0.0,
263
+ "depth_shift": 0.0,
264
+ "view_bias_scale": 1.0,
265
+ }
266
+ if self.stress_slice == "high_reocclusion":
267
+ params.update(
268
+ {
269
+ "reocclusion_bias": 0.18,
270
+ "closure_scale": 1.22,
271
+ "disturbance_gain_scale": 1.12,
272
+ "opening_shift": -0.03,
273
+ }
274
+ )
275
+ elif self.stress_slice == "camera_perturbation":
276
+ params.update(
277
+ {
278
+ "camera_pose_jitter": 1.0 if self.difficulty_bin == "hard" else 0.65,
279
+ "focal_jitter": 0.9 if self.difficulty_bin == "hard" else 0.55,
280
+ "lateral_skew": 0.06 if self.proxy_name == BAG_PROXY.name else 0.02,
281
+ "view_bias_scale": 1.75 if self.difficulty_bin == "hard" else 1.4,
282
+ }
283
+ )
284
+ elif self.stress_slice == "tight_corridor_high_collateral":
285
+ params.update(
286
+ {
287
+ "corridor_scale": 0.76,
288
+ "disturbance_gain_scale": 1.25,
289
+ "collateral_bias": 0.14,
290
+ "support_stability_penalty": 0.08,
291
+ "opening_shift": -0.04,
292
+ }
293
+ )
294
+ elif self.stress_slice == "one_sided_slip":
295
+ params.update(
296
+ {
297
+ "corridor_scale": 0.84,
298
+ "disturbance_gain_scale": 1.10,
299
+ "reocclusion_bias": 0.10,
300
+ "camera_pose_jitter": 0.45,
301
+ "focal_jitter": 0.35,
302
+ "lateral_skew": 0.10 if self.difficulty_bin == "hard" else 0.06,
303
+ }
304
+ )
305
+ elif self.stress_slice == "fold_sensitive_long_persistence":
306
+ params.update(
307
+ {
308
+ "disturbance_gain_scale": 1.18,
309
+ "support_stability_penalty": 0.12,
310
+ "reocclusion_bias": 0.08,
311
+ "collateral_bias": 0.12,
312
+ "depth_shift": 0.04,
313
+ }
314
+ )
315
+ return params
316
+
317
+ def episode_metadata(self) -> dict[str, Any]:
318
+ return {
319
+ "proxy_name": self.proxy_name,
320
+ "task_name": self.task_name,
321
+ "task_id": self.task_id,
322
+ "stress_slice": self.stress_slice,
323
+ "difficulty_bin": self.difficulty_bin,
324
+ "camera_pose_jitter": float(self.camera_pose_jitter),
325
+ "focal_jitter": float(self.focal_jitter),
326
+ "lateral_skew": float(self.lateral_skew),
327
+ "reocclusion_bias": float(self.reocclusion_bias),
328
+ "closure_scale": float(self.closure_scale),
329
+ "disturbance_gain_scale": float(self.disturbance_gain_scale),
330
+ "corridor_scale": float(self.corridor_scale),
331
+ "support_stability_penalty": float(self.support_stability_penalty),
332
+ "collateral_bias": float(self.collateral_bias),
333
+ }
334
+
335
+ def reset(self, seed: int | None = None) -> tuple[dict[str, Any], dict[str, Any]]:
336
+ if seed is not None:
337
+ self.rng = np.random.default_rng(seed)
338
+ ranges = self._difficulty_ranges()
339
+ stress = self._stress_parameters()
340
+ self.reocclusion_bias = float(stress["reocclusion_bias"])
341
+ self.closure_scale = float(stress["closure_scale"])
342
+ self.disturbance_gain_scale = float(stress["disturbance_gain_scale"])
343
+ self.corridor_scale = float(stress["corridor_scale"])
344
+ self.support_stability_penalty = float(stress["support_stability_penalty"])
345
+ self.camera_pose_jitter = float(stress["camera_pose_jitter"])
346
+ self.focal_jitter = float(stress["focal_jitter"])
347
+ self.lateral_skew = float(stress["lateral_skew"])
348
+ self.collateral_bias = float(stress["collateral_bias"])
349
+ self.step_count = 0
350
+ self.opening = float(
351
+ np.clip(
352
+ self.rng.uniform(*ranges["opening"]) + stress["opening_shift"],
353
+ 0.03,
354
+ 0.95,
355
+ )
356
+ )
357
+ self.disturbance = float(
358
+ np.clip(
359
+ self.rng.uniform(*ranges["disturbance"]) + stress["disturbance_shift"],
360
+ 0.0,
361
+ 1.0,
362
+ )
363
+ )
364
+ self.target_template = int(self.rng.integers(4, self.num_templates - 4))
365
+ self.target_depth = float(
366
+ np.clip(
367
+ self.rng.uniform(*ranges["target_depth"]) + stress["depth_shift"],
368
+ 0.05,
369
+ 0.85,
370
+ )
371
+ )
372
+ base_center = self.target_template / float(max(1, self.num_templates - 1))
373
+ self.target_center = float(np.clip(base_center + self.rng.uniform(-0.01, 0.01), 0.06, 0.94))
374
+ self.target_radius = float(self.rng.uniform(*ranges["target_radius"]))
375
+ self.texture_phase = float(self.rng.uniform(0.0, 2.0 * np.pi))
376
+ self.texture_scale = float(self.rng.uniform(0.85, 1.25))
377
+ self.view_bias = float(
378
+ np.clip(
379
+ self.rng.uniform(*ranges["view_bias"]) * stress["view_bias_scale"],
380
+ -0.30,
381
+ 0.30,
382
+ )
383
+ )
384
+ self.target_intensity = float(self.rng.uniform(0.45, 0.8))
385
+ self.holding = False
386
+ self.transferred = False
387
+ self.retrieved = False
388
+ self.actor_progress = 0.0
389
+ self.last_actor_template = self.target_template
390
+ privileged_state = self.get_privileged_state()
391
+ self.visibility_trace = [float(privileged_state["visibility"])]
392
+ self.corridor_trace = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
393
+ return self.get_observation(privileged_state), privileged_state
394
+
395
+ def _normalized_template(self, template_index: int) -> float:
396
+ return (template_index / float(self.num_templates - 1)) * 2.0 - 1.0
397
+
398
+ def _current_support_mode(self) -> SupportMode:
399
+ if self.holding:
400
+ return SupportMode.HOLD
401
+ if self.transferred:
402
+ return SupportMode.TRANSFER
403
+ return SupportMode.PASSIVE
404
+
405
+ def _mode_from_action(self, action: np.ndarray) -> SupportMode:
406
+ hold_score = (np.tanh(float(action[6])) + 1.0) * 0.5
407
+ transfer_score = (np.tanh(float(action[1])) + 1.0) * 0.5
408
+ passive_score = (np.tanh(float(action[2])) + 1.0) * 0.5
409
+ if hold_score >= max(transfer_score, passive_score):
410
+ return SupportMode.HOLD
411
+ if transfer_score >= passive_score and self.opening >= 0.32:
412
+ return SupportMode.TRANSFER
413
+ return SupportMode.PASSIVE
414
+
415
+ def _visibility(self, opening: float | None = None, disturbance: float | None = None) -> float:
416
+ opening = self.opening if opening is None else float(opening)
417
+ disturbance = self.disturbance if disturbance is None else float(disturbance)
418
+ visibility = (
419
+ 1.35 * opening
420
+ - 0.58 * disturbance
421
+ - 0.25 * self.target_depth
422
+ + self.dynamics.visibility_bias
423
+ )
424
+ return float(np.clip(visibility, 0.0, 1.0))
425
+
426
+ def _mode_factor(self, mode: SupportMode) -> float:
427
+ if mode == SupportMode.HOLD:
428
+ return 1.0
429
+ if mode == SupportMode.TRANSFER:
430
+ return self.dynamics.transfer_support_factor
431
+ return self.dynamics.passive_support_factor
432
+
433
+ def _mode_decay(self, mode: SupportMode) -> float:
434
+ if mode == SupportMode.HOLD:
435
+ return self.dynamics.hold_decay
436
+ if mode == SupportMode.TRANSFER:
437
+ return self.dynamics.transfer_decay
438
+ return self.dynamics.passive_decay
439
+
440
+ def _corridor_for_mode(
441
+ self,
442
+ mode: SupportMode,
443
+ opening: float | None = None,
444
+ disturbance: float | None = None,
445
+ ) -> np.ndarray:
446
+ opening = self.opening if opening is None else float(opening)
447
+ disturbance = self.disturbance if disturbance is None else float(disturbance)
448
+ visibility = self._visibility(opening, disturbance)
449
+ effective = (
450
+ self.corridor_scale * opening * self._mode_factor(mode)
451
+ - 0.35 * disturbance
452
+ - 0.18 * self.target_depth
453
+ - 0.10 * self.collateral_bias
454
+ )
455
+ width = int(np.floor(max(0.0, effective) * 8.0))
456
+ corridor = np.zeros((self.num_templates,), dtype=np.float32)
457
+ if visibility < self.dynamics.retrieve_visibility_threshold * 0.7 or width <= 0:
458
+ return corridor
459
+ low = max(0, self.target_template - width)
460
+ high = min(self.num_templates, self.target_template + width + 1)
461
+ corridor[low:high] = 1.0
462
+ return corridor
463
+
464
+ def _persistence_for_mode(self, mode: SupportMode) -> float:
465
+ opening = self.opening
466
+ disturbance = self.disturbance
467
+ persisted = 0.0
468
+ for _ in range(self.rollout_horizon):
469
+ if self._corridor_for_mode(mode, opening, disturbance).any():
470
+ persisted += 1.0
471
+ else:
472
+ break
473
+ opening = float(np.clip(opening - self._mode_decay(mode) + (0.035 if mode == SupportMode.HOLD else 0.0), 0.0, 1.0))
474
+ disturbance = float(np.clip(disturbance * (1.0 - self.dynamics.settle_rate), 0.0, 1.0))
475
+ return persisted
476
+
477
+ def _belief_map(self, visibility: float) -> np.ndarray:
478
+ side = 32
479
+ x = np.linspace(0.0, 1.0, side, dtype=np.float32)
480
+ y = np.linspace(0.0, 1.0, side, dtype=np.float32)
481
+ yy, xx = np.meshgrid(y, x, indexing="ij")
482
+ center_x = self.target_template / float(self.num_templates - 1)
483
+ center_y = 0.72 - 0.25 * self.target_depth
484
+ sigma = 0.08 + 0.05 * (1.0 - visibility)
485
+ belief = np.exp(-(((xx - center_x) ** 2) + ((yy - center_y) ** 2)) / (2.0 * sigma**2))
486
+ belief *= visibility
487
+ return belief.astype(np.float32)
488
+
489
+ def _visibility_map(self, visibility: float) -> np.ndarray:
490
+ belief = self._belief_map(visibility)
491
+ gradient = np.linspace(0.65, 1.0, belief.shape[0], dtype=np.float32).reshape(-1, 1)
492
+ return np.clip(belief * gradient, 0.0, 1.0).astype(np.float32)
493
+
494
+ def _clearance_map(self, visibility: float) -> np.ndarray:
495
+ side = 32
496
+ x = np.linspace(0.0, 1.0, side, dtype=np.float32)
497
+ y = np.linspace(0.0, 1.0, side, dtype=np.float32)
498
+ yy, xx = np.meshgrid(y, x, indexing="ij")
499
+ corridor_width = np.clip(0.05 + 0.18 * self.opening - 0.10 * self.disturbance, 0.01, 0.28)
500
+ corridor = np.exp(-(((xx - self.target_center) ** 2) / max(1e-5, corridor_width**2)))
501
+ vertical = np.exp(-(((yy - (0.72 - 0.25 * self.target_depth)) ** 2) / 0.03))
502
+ left = np.clip(corridor * vertical * visibility * (0.92 - 0.15 * self.disturbance), 0.0, 1.0)
503
+ right = np.clip(corridor * vertical * visibility * (0.88 - 0.10 * self.disturbance), 0.0, 1.0)
504
+ return np.stack([left, right], axis=0).astype(np.float32)
505
+
506
+ def _occluder_contact_map(self) -> np.ndarray:
507
+ side = 32
508
+ x = np.linspace(0.0, 1.0, side, dtype=np.float32)
509
+ y = np.linspace(0.0, 1.0, side, dtype=np.float32)
510
+ yy, xx = np.meshgrid(y, x, indexing="ij")
511
+ gap_width = np.clip(0.03 + 0.16 * self.opening, 0.03, 0.24)
512
+ left_band = np.exp(-(((xx - (self.target_center - gap_width)) ** 2) / 0.0025))
513
+ right_band = np.exp(-(((xx - (self.target_center + gap_width)) ** 2) / 0.0025))
514
+ support = np.exp(-(((yy - 0.55) ** 2) / 0.04))
515
+ return np.clip((left_band + right_band) * support, 0.0, 1.0).astype(np.float32)
516
+
517
+ def _support_stability(self) -> float:
518
+ base = 1.0 - 0.45 * self.disturbance - 0.10 * max(0.0, self.opening - self.dynamics.desired_opening)
519
+ if self._current_support_mode() == self.dynamics.preferred_mode:
520
+ base += 0.08
521
+ base -= self.support_stability_penalty
522
+ return float(np.clip(base, 0.0, 1.0))
523
+
524
+ def _support_stability_map(self) -> np.ndarray:
525
+ return np.full((32, 32), self._support_stability(), dtype=np.float32)
526
+
527
+ def _reocclusion_target(self, persistence: np.ndarray) -> float:
528
+ current_mode = int(self._current_support_mode())
529
+ horizon_ratio = persistence[current_mode] / float(max(1, self.rollout_horizon))
530
+ return float(np.clip(1.0 - horizon_ratio + 0.35 * self.disturbance + self.reocclusion_bias, 0.0, 1.0))
531
+
532
+ def _phase_label(
533
+ self,
534
+ visibility: float,
535
+ corridor: np.ndarray,
536
+ persistence: np.ndarray,
537
+ disturbance_cost: float,
538
+ ) -> int:
539
+ support_mode = int(self._current_support_mode())
540
+ corridor_ready = bool(corridor[support_mode, self.target_template] > 0.5)
541
+ persistence_ratio = persistence[support_mode] / float(max(1, self.rollout_horizon))
542
+ opening_ready = self.opening >= (0.75 * self.dynamics.desired_opening)
543
+ retrieve_ready = (
544
+ corridor_ready
545
+ and visibility >= self.dynamics.retrieve_visibility_threshold
546
+ and self.actor_progress >= 0.55
547
+ )
548
+ recovering = disturbance_cost >= 0.55 or (opening_ready and persistence_ratio < 0.35)
549
+ if retrieve_ready:
550
+ return 3
551
+ if recovering:
552
+ return 4
553
+ if opening_ready and persistence_ratio >= 0.6:
554
+ return 2
555
+ if self.opening < self.dynamics.desired_opening or visibility < self.dynamics.retrieve_visibility_threshold:
556
+ return 1
557
+ return 0
558
+
559
+ def _subgoal_progress(
560
+ self,
561
+ visibility: float,
562
+ corridor: np.ndarray,
563
+ persistence: np.ndarray,
564
+ ) -> float:
565
+ support_mode = int(self._current_support_mode())
566
+ corridor_mass = float(corridor[support_mode].mean())
567
+ persistence_ratio = float(persistence[support_mode] / float(max(1, self.rollout_horizon)))
568
+ return float(
569
+ np.clip(
570
+ 0.35 * self.opening
571
+ + 0.25 * visibility
572
+ + 0.20 * corridor_mass
573
+ + 0.20 * persistence_ratio,
574
+ 0.0,
575
+ 1.0,
576
+ )
577
+ )
578
+
579
+ def _grasp_affordance_map(
580
+ self,
581
+ belief_map: np.ndarray,
582
+ visibility_map: np.ndarray,
583
+ clearance_map: np.ndarray,
584
+ ) -> np.ndarray:
585
+ combined = belief_map * visibility_map * clearance_map.mean(axis=0)
586
+ return np.clip(combined * (1.0 - 0.35 * self.disturbance), 0.0, 1.0).astype(np.float32)
587
+
588
+ def get_privileged_state(self) -> dict[str, Any]:
589
+ support_mode = int(self._current_support_mode())
590
+ corridor = np.stack(
591
+ [self._corridor_for_mode(mode) for mode in SupportMode],
592
+ axis=0,
593
+ )
594
+ persistence = np.asarray([self._persistence_for_mode(mode) for mode in SupportMode], dtype=np.float32)
595
+ visibility = self._visibility()
596
+ disturbance_cost = float(np.clip(self.disturbance + 0.08 * max(0.0, self.opening - self.dynamics.desired_opening), 0.0, 1.0))
597
+ belief_map = self._belief_map(visibility)
598
+ visibility_map = self._visibility_map(visibility)
599
+ clearance_map = self._clearance_map(visibility)
600
+ occluder_contact_map = self._occluder_contact_map()
601
+ support_stability = self._support_stability()
602
+ support_stability_map = self._support_stability_map()
603
+ reocclusion_target = self._reocclusion_target(persistence)
604
+ reocclusion_map = np.full((32, 32), reocclusion_target, dtype=np.float32)
605
+ grasp_affordance_map = self._grasp_affordance_map(belief_map, visibility_map, clearance_map)
606
+ task_metrics: dict[str, float] = {}
607
+ if self.proxy_name == FOLIAGE_PROXY.name:
608
+ task_metrics = {
609
+ "gap_width": float(np.clip(0.03 + 0.16 * self.opening, 0.03, 0.24)),
610
+ "damage_proxy": disturbance_cost,
611
+ "release_collapse_rate": reocclusion_target,
612
+ "target_visibility_confidence": visibility,
613
+ }
614
+ elif self.proxy_name == BAG_PROXY.name:
615
+ task_metrics = {
616
+ "mouth_aperture": float(self.opening),
617
+ "hold_quality": support_stability,
618
+ "rim_slip_risk": reocclusion_target,
619
+ "insertable_actor_corridor": float(corridor[support_mode, self.target_template]),
620
+ }
621
+ elif self.proxy_name == CLOTH_PROXY.name:
622
+ task_metrics = {
623
+ "layer_separation_quality": float(np.clip(self.opening * (1.0 - 0.20 * self.disturbance), 0.0, 1.0)),
624
+ "fold_preservation": float(np.clip(1.0 - disturbance_cost, 0.0, 1.0)),
625
+ "insertion_corridor": float(corridor[support_mode, self.target_template]),
626
+ "top_layer_stability": support_stability,
627
+ "lift_too_much_risk": float(np.clip(max(0.0, self.opening - self.dynamics.desired_opening), 0.0, 1.0)),
628
+ }
629
+ phase_label = self._phase_label(
630
+ visibility=visibility,
631
+ corridor=corridor,
632
+ persistence=persistence,
633
+ disturbance_cost=disturbance_cost,
634
+ )
635
+ subgoal_progress = self._subgoal_progress(
636
+ visibility=visibility,
637
+ corridor=corridor,
638
+ persistence=persistence,
639
+ )
640
+ return {
641
+ "support_mode": support_mode,
642
+ "corridor_feasible": corridor,
643
+ "persistence_horizon": persistence,
644
+ "disturbance_cost": disturbance_cost,
645
+ "belief_map": belief_map,
646
+ "visibility_map": visibility_map,
647
+ "clearance_map": clearance_map,
648
+ "occluder_contact_map": occluder_contact_map,
649
+ "grasp_affordance_map": grasp_affordance_map,
650
+ "support_stability": support_stability,
651
+ "support_stability_map": support_stability_map,
652
+ "reocclusion_target": reocclusion_target,
653
+ "reocclusion_map": reocclusion_map,
654
+ "visibility": visibility,
655
+ "retrieval_success": bool(self.retrieved),
656
+ "target_template": self.target_template,
657
+ "phase_label": int(phase_label),
658
+ "subgoal_progress": float(subgoal_progress),
659
+ "task_name": self.task_name,
660
+ "task_id": self.task_id,
661
+ "stress_slice": self.stress_slice,
662
+ "difficulty_bin": self.difficulty_bin,
663
+ "episode_metadata": self.episode_metadata(),
664
+ **task_metrics,
665
+ }
666
+
667
+ def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
668
+ return {
669
+ "opening": float(self.opening),
670
+ "disturbance": float(self.disturbance),
671
+ "target_center": float(self.target_center),
672
+ "target_depth": float(self.target_depth),
673
+ "target_radius": float(self.target_radius),
674
+ "texture_phase": float(self.texture_phase),
675
+ "texture_scale": float(self.texture_scale),
676
+ "view_bias": float(self.view_bias),
677
+ "target_intensity": float(self.target_intensity),
678
+ "step_fraction": float(self.step_count / max(1, self.max_steps)),
679
+ }
680
+
681
+ def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
682
+ features = np.zeros((32,), dtype=np.float32)
683
+ step_fraction = self.step_count / float(max(1, self.max_steps))
684
+ features[0] = step_fraction
685
+ features[1] = np.sin(np.pi * step_fraction)
686
+ features[2] = np.cos(np.pi * step_fraction)
687
+ return features
688
+
689
+ def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
690
+ privileged_state = privileged_state or self.get_privileged_state()
691
+ render_state = self.render_state(privileged_state)
692
+ images = render_views_from_state(
693
+ proxy_name=self.proxy_name,
694
+ render_state=render_state,
695
+ resolution=self.resolution,
696
+ num_templates=self.num_templates,
697
+ include_depth=True,
698
+ )
699
+ camera_intrinsics, camera_extrinsics = default_camera_matrices(
700
+ pose_jitter=self.camera_pose_jitter,
701
+ focal_jitter=self.focal_jitter,
702
+ lateral_skew=self.lateral_skew,
703
+ )
704
+ return {
705
+ "images": np.stack([images[camera] for camera in self.camera_names], axis=0),
706
+ "depths": np.stack([images[f"{camera}_depth"] for camera in self.camera_names], axis=0)[:, None, :, :],
707
+ "depth_valid": np.stack([images[f"{camera}_depth_valid"] for camera in self.camera_names], axis=0)[:, None, :, :],
708
+ "proprio": self._proprio(privileged_state),
709
+ "text": PROXY_GOALS[self.proxy_name],
710
+ "task_name": self.task_name,
711
+ "task_id": self.task_id,
712
+ "stress_slice": self.stress_slice,
713
+ "difficulty_bin": self.difficulty_bin,
714
+ "episode_metadata": self.episode_metadata(),
715
+ "camera_names": self.camera_names,
716
+ "camera_intrinsics": camera_intrinsics,
717
+ "camera_extrinsics": camera_extrinsics,
718
+ "camera_valid_mask": np.ones((len(self.camera_names),), dtype=np.float32),
719
+ }
720
+
721
+ def teacher_action(self) -> np.ndarray:
722
+ privileged_state = self.get_privileged_state()
723
+ preferred_mode = self.dynamics.preferred_mode
724
+ if self.opening < self.dynamics.desired_opening:
725
+ chosen_mode = SupportMode.HOLD
726
+ open_cmd = 0.95
727
+ elif privileged_state["persistence_horizon"][preferred_mode] >= 2.0:
728
+ chosen_mode = preferred_mode
729
+ open_cmd = 0.12
730
+ else:
731
+ chosen_mode = SupportMode.HOLD
732
+ open_cmd = 0.30
733
+
734
+ corridor = privileged_state["corridor_feasible"][int(chosen_mode)]
735
+ actor_ready = bool(corridor[self.target_template] > 0.5)
736
+ retrieve = (
737
+ actor_ready
738
+ and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
739
+ and self.actor_progress >= 0.55
740
+ )
741
+ action = np.zeros((14,), dtype=np.float32)
742
+ action[0] = np.float32(open_cmd)
743
+ action[1] = np.float32(1.0 if chosen_mode == SupportMode.TRANSFER else -1.0)
744
+ action[2] = np.float32(1.0 if chosen_mode == SupportMode.PASSIVE else -1.0)
745
+ action[6] = np.float32(1.0 if chosen_mode == SupportMode.HOLD else -1.0)
746
+ action[7] = np.float32(self._normalized_template(self.target_template))
747
+ action[8] = np.float32(1.0 if actor_ready else 0.2)
748
+ action[13] = np.float32(1.0 if retrieve else -1.0)
749
+ return action
750
+
751
+ def _set_mode_bits(self, action: np.ndarray, mode: SupportMode) -> None:
752
+ action[1] = np.float32(1.0 if mode == SupportMode.TRANSFER else -1.0)
753
+ action[2] = np.float32(1.0 if mode == SupportMode.PASSIVE else -1.0)
754
+ action[6] = np.float32(1.0 if mode == SupportMode.HOLD else -1.0)
755
+
756
+ def macro_action_chunk(self, macro_name: str, chunk_horizon: int = 8) -> np.ndarray:
757
+ preferred_mode = self.dynamics.preferred_mode
758
+ hold_mode = SupportMode.HOLD
759
+ passive_mode = SupportMode.PASSIVE
760
+ target_index = self.target_template
761
+ left_index = max(0, target_index - 4)
762
+ right_index = min(self.num_templates - 1, target_index + 4)
763
+ wrong_index = 0 if target_index > (self.num_templates // 2) else self.num_templates - 1
764
+
765
+ chunk = np.zeros((chunk_horizon, 14), dtype=np.float32)
766
+ for step_idx in range(chunk_horizon):
767
+ action = self.teacher_action()
768
+ action[13] = np.float32(-1.0)
769
+ action[8] = np.float32(0.2)
770
+ self._set_mode_bits(action, preferred_mode)
771
+
772
+ if macro_name in {"widen_gap", "widen_mouth", "lift_edge", "separate_layer"}:
773
+ self._set_mode_bits(action, hold_mode)
774
+ action[0] = np.float32(0.95)
775
+ elif macro_name in {"maintain_gap", "maintain_mouth", "maintain_lift", "stabilize_fold", "pin_canopy"}:
776
+ self._set_mode_bits(action, preferred_mode)
777
+ action[0] = np.float32(0.12)
778
+ elif macro_name in {"sweep_left", "pin_left_rim"}:
779
+ self._set_mode_bits(action, hold_mode)
780
+ action[0] = np.float32(0.75)
781
+ action[7] = np.float32(self._normalized_template(left_index))
782
+ elif macro_name in {"sweep_right", "pin_right_rim"}:
783
+ self._set_mode_bits(action, hold_mode)
784
+ action[0] = np.float32(0.75)
785
+ action[7] = np.float32(self._normalized_template(right_index))
786
+ elif macro_name == "probe_inside":
787
+ self._set_mode_bits(action, preferred_mode)
788
+ action[0] = np.float32(0.10)
789
+ action[8] = np.float32(0.75)
790
+ elif macro_name == "insert_actor":
791
+ self._set_mode_bits(action, preferred_mode)
792
+ action[0] = np.float32(0.10)
793
+ action[8] = np.float32(1.0)
794
+ elif macro_name == "retrieve":
795
+ self._set_mode_bits(action, preferred_mode)
796
+ action[0] = np.float32(0.05)
797
+ action[8] = np.float32(1.0)
798
+ action[13] = np.float32(1.0)
799
+ elif macro_name == "premature_retrieve":
800
+ self._set_mode_bits(action, passive_mode)
801
+ action[0] = np.float32(-0.20)
802
+ action[8] = np.float32(1.0)
803
+ action[13] = np.float32(1.0)
804
+ elif macro_name in {"reveal_with_release", "foliage_immediate_reocclusion"}:
805
+ reveal_phase = step_idx < max(1, chunk_horizon // 2)
806
+ self._set_mode_bits(action, hold_mode if reveal_phase else passive_mode)
807
+ action[0] = np.float32(0.95 if reveal_phase else -0.35)
808
+ action[8] = np.float32(0.2)
809
+ elif macro_name in {"wrong_side_reveal", "wrong_edge_reveal", "wrong_layer_reveal"}:
810
+ self._set_mode_bits(action, hold_mode)
811
+ action[0] = np.float32(0.65)
812
+ action[7] = np.float32(self._normalized_template(wrong_index))
813
+ elif macro_name in {"over_disturbance", "cloth_lift_high"}:
814
+ self._set_mode_bits(action, passive_mode)
815
+ action[0] = np.float32(1.0)
816
+ action[8] = np.float32(1.0 if macro_name == "over_disturbance" else 0.2)
817
+ elif macro_name == "delayed_actor_entry":
818
+ self._set_mode_bits(action, preferred_mode)
819
+ action[0] = np.float32(0.10)
820
+ action[8] = np.float32(0.2 if step_idx < (chunk_horizon - 1) else 1.0)
821
+ elif macro_name in {"weak_corridor_insert", "bag_fabric_probe"}:
822
+ self._set_mode_bits(action, passive_mode)
823
+ action[0] = np.float32(0.02)
824
+ action[8] = np.float32(1.0)
825
+ else:
826
+ action = self.teacher_action()
827
+ chunk[step_idx] = np.clip(action, -1.0, 1.0)
828
+ return chunk
829
+
830
+ def baseline_action_chunk(self, baseline_name: str, chunk_horizon: int = 8) -> np.ndarray:
831
+ if baseline_name == "teacher":
832
+ chunk, _ = self.teacher_chunk_and_rollout(chunk_horizon=chunk_horizon, rollout_horizon=self.rollout_horizon)
833
+ return chunk
834
+ if baseline_name == "reveal_only":
835
+ return self.macro_action_chunk("widen_gap" if self.proxy_name == FOLIAGE_PROXY.name else ("widen_mouth" if self.proxy_name == BAG_PROXY.name else "lift_edge"), chunk_horizon=chunk_horizon)
836
+ if baseline_name == "retrieve_only":
837
+ return self.macro_action_chunk("premature_retrieve", chunk_horizon=chunk_horizon)
838
+ if baseline_name == "no_hold":
839
+ return self.macro_action_chunk("reveal_with_release", chunk_horizon=chunk_horizon)
840
+ if baseline_name == "random":
841
+ return self.rng.uniform(-1.0, 1.0, size=(chunk_horizon, 14)).astype(np.float32)
842
+ raise KeyError(f"Unknown baseline chunk: {baseline_name}")
843
+
844
+ def teacher_chunk_and_rollout(
845
+ self,
846
+ chunk_horizon: int = 8,
847
+ rollout_horizon: int | None = None,
848
+ ) -> tuple[np.ndarray, dict[str, np.ndarray]]:
849
+ rollout_horizon = rollout_horizon or self.rollout_horizon
850
+ snapshot = self.clone_state()
851
+ action_chunk: list[np.ndarray] = []
852
+ rollout_support_mode = []
853
+ rollout_corridor = []
854
+ rollout_persistence = []
855
+ rollout_disturbance = []
856
+ rollout_belief = []
857
+ rollout_visibility = []
858
+ rollout_clearance = []
859
+ rollout_support_stability = []
860
+ rollout_reocclusion = []
861
+ rollout_occluder_contact = []
862
+ rollout_grasp_affordance = []
863
+ rollout_phase = []
864
+ for step in range(chunk_horizon):
865
+ action = self.teacher_action()
866
+ action_chunk.append(action)
867
+ _, _, terminated, truncated, privileged_state = self.step(action)
868
+ if step < rollout_horizon:
869
+ rollout_support_mode.append(privileged_state["support_mode"])
870
+ rollout_corridor.append(privileged_state["corridor_feasible"])
871
+ rollout_persistence.append(privileged_state["persistence_horizon"])
872
+ rollout_disturbance.append(privileged_state["disturbance_cost"])
873
+ rollout_belief.append(privileged_state["belief_map"])
874
+ rollout_visibility.append(privileged_state["visibility_map"])
875
+ rollout_clearance.append(privileged_state["clearance_map"])
876
+ rollout_support_stability.append(privileged_state["support_stability"])
877
+ rollout_reocclusion.append(privileged_state["reocclusion_target"])
878
+ rollout_occluder_contact.append(privileged_state["occluder_contact_map"])
879
+ rollout_grasp_affordance.append(privileged_state["grasp_affordance_map"])
880
+ rollout_phase.append(int(privileged_state["phase_label"]))
881
+ if terminated or truncated:
882
+ break
883
+ while len(action_chunk) < chunk_horizon:
884
+ action_chunk.append(np.zeros((14,), dtype=np.float32))
885
+ while len(rollout_support_mode) < rollout_horizon:
886
+ current = self.get_privileged_state()
887
+ rollout_support_mode.append(int(self._current_support_mode()))
888
+ rollout_corridor.append(current["corridor_feasible"])
889
+ rollout_persistence.append(current["persistence_horizon"])
890
+ rollout_disturbance.append(current["disturbance_cost"])
891
+ rollout_belief.append(current["belief_map"])
892
+ rollout_visibility.append(current["visibility_map"])
893
+ rollout_clearance.append(current["clearance_map"])
894
+ rollout_support_stability.append(current["support_stability"])
895
+ rollout_reocclusion.append(current["reocclusion_target"])
896
+ rollout_occluder_contact.append(current["occluder_contact_map"])
897
+ rollout_grasp_affordance.append(current["grasp_affordance_map"])
898
+ rollout_phase.append(int(current["phase_label"]))
899
+ self.restore_state(snapshot)
900
+ return np.stack(action_chunk, axis=0).astype(np.float32), {
901
+ "rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
902
+ "rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
903
+ "rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
904
+ "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
905
+ "rollout_belief_map": np.asarray(rollout_belief, dtype=np.float32),
906
+ "rollout_visibility_map": np.asarray(rollout_visibility, dtype=np.float32),
907
+ "rollout_clearance_map": np.asarray(rollout_clearance, dtype=np.float32),
908
+ "rollout_support_stability": np.asarray(rollout_support_stability, dtype=np.float32),
909
+ "rollout_reocclusion_target": np.asarray(rollout_reocclusion, dtype=np.float32),
910
+ "rollout_occluder_contact_map": np.asarray(rollout_occluder_contact, dtype=np.float32),
911
+ "rollout_grasp_affordance_map": np.asarray(rollout_grasp_affordance, dtype=np.float32),
912
+ "rollout_phase": np.asarray(rollout_phase, dtype=np.int64),
913
+ }
914
+
915
+ def evaluate_action_chunk(
916
+ self,
917
+ action_chunk: np.ndarray,
918
+ rollout_horizon: int | None = None,
919
+ ) -> dict[str, np.ndarray | float]:
920
+ rollout_horizon = rollout_horizon or self.rollout_horizon
921
+ snapshot = self.clone_state()
922
+ rollout_support_mode: list[int] = []
923
+ rollout_corridor: list[np.ndarray] = []
924
+ rollout_persistence: list[np.ndarray] = []
925
+ rollout_disturbance: list[float] = []
926
+ rollout_belief: list[np.ndarray] = []
927
+ rollout_visibility: list[np.ndarray] = []
928
+ rollout_clearance: list[np.ndarray] = []
929
+ rollout_support_stability: list[float] = []
930
+ rollout_reocclusion: list[float] = []
931
+ rollout_occluder_contact: list[np.ndarray] = []
932
+ rollout_grasp_affordance: list[np.ndarray] = []
933
+ rollout_phase: list[int] = []
934
+ corridor_open_trace = [float(self.get_privileged_state()["corridor_feasible"][self._current_support_mode()].any())]
935
+ visibility_trace = [float(self.get_privileged_state()["visibility"])]
936
+ disturbance_trace = [float(self.get_privileged_state()["disturbance_cost"])]
937
+ support_trace = [float(self.get_privileged_state()["support_stability"])]
938
+ opening_trace = [float(self.opening)]
939
+ terminated = False
940
+ truncated = False
941
+ privileged_state = self.get_privileged_state()
942
+ for step, action in enumerate(np.asarray(action_chunk, dtype=np.float32)):
943
+ _, _, terminated, truncated, privileged_state = self.step(action)
944
+ if step < rollout_horizon:
945
+ rollout_support_mode.append(int(privileged_state["support_mode"]))
946
+ rollout_corridor.append(privileged_state["corridor_feasible"].astype(np.float32))
947
+ rollout_persistence.append(privileged_state["persistence_horizon"].astype(np.float32))
948
+ rollout_disturbance.append(float(privileged_state["disturbance_cost"]))
949
+ rollout_belief.append(privileged_state["belief_map"].astype(np.float32))
950
+ rollout_visibility.append(privileged_state["visibility_map"].astype(np.float32))
951
+ rollout_clearance.append(privileged_state["clearance_map"].astype(np.float32))
952
+ rollout_support_stability.append(float(privileged_state["support_stability"]))
953
+ rollout_reocclusion.append(float(privileged_state["reocclusion_target"]))
954
+ rollout_occluder_contact.append(privileged_state["occluder_contact_map"].astype(np.float32))
955
+ rollout_grasp_affordance.append(privileged_state["grasp_affordance_map"].astype(np.float32))
956
+ rollout_phase.append(int(privileged_state["phase_label"]))
957
+ corridor_open_trace.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
958
+ visibility_trace.append(float(privileged_state["visibility"]))
959
+ disturbance_trace.append(float(privileged_state["disturbance_cost"]))
960
+ support_trace.append(float(privileged_state["support_stability"]))
961
+ opening_trace.append(float(self.opening))
962
+ if terminated or truncated:
963
+ break
964
+ while len(rollout_support_mode) < rollout_horizon:
965
+ current = self.get_privileged_state()
966
+ rollout_support_mode.append(int(current["support_mode"]))
967
+ rollout_corridor.append(current["corridor_feasible"].astype(np.float32))
968
+ rollout_persistence.append(current["persistence_horizon"].astype(np.float32))
969
+ rollout_disturbance.append(float(current["disturbance_cost"]))
970
+ rollout_belief.append(current["belief_map"].astype(np.float32))
971
+ rollout_visibility.append(current["visibility_map"].astype(np.float32))
972
+ rollout_clearance.append(current["clearance_map"].astype(np.float32))
973
+ rollout_support_stability.append(float(current["support_stability"]))
974
+ rollout_reocclusion.append(float(current["reocclusion_target"]))
975
+ rollout_occluder_contact.append(current["occluder_contact_map"].astype(np.float32))
976
+ rollout_grasp_affordance.append(current["grasp_affordance_map"].astype(np.float32))
977
+ rollout_phase.append(int(current["phase_label"]))
978
+ final_state = self.get_privileged_state()
979
+ corridor_curve = np.asarray(corridor_open_trace, dtype=np.float32)
980
+ visibility_curve = np.asarray(visibility_trace, dtype=np.float32)
981
+ disturbance_curve = np.asarray(disturbance_trace, dtype=np.float32)
982
+ support_curve = np.asarray(support_trace, dtype=np.float32)
983
+ opening_curve = np.asarray(opening_trace, dtype=np.float32)
984
+ reocclusion = float(
985
+ np.logical_and(
986
+ corridor_curve[:-1] > 0.5,
987
+ corridor_curve[1:] <= 0.5,
988
+ ).mean()
989
+ ) if len(corridor_open_trace) > 1 else 0.0
990
+ result: dict[str, np.ndarray | float] = {
991
+ "rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
992
+ "rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
993
+ "rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
994
+ "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
995
+ "rollout_belief_map": np.asarray(rollout_belief, dtype=np.float32),
996
+ "rollout_visibility_map": np.asarray(rollout_visibility, dtype=np.float32),
997
+ "rollout_clearance_map": np.asarray(rollout_clearance, dtype=np.float32),
998
+ "rollout_support_stability": np.asarray(rollout_support_stability, dtype=np.float32),
999
+ "rollout_reocclusion_target": np.asarray(rollout_reocclusion, dtype=np.float32),
1000
+ "rollout_occluder_contact_map": np.asarray(rollout_occluder_contact, dtype=np.float32),
1001
+ "rollout_grasp_affordance_map": np.asarray(rollout_grasp_affordance, dtype=np.float32),
1002
+ "rollout_phase": np.asarray(rollout_phase, dtype=np.int64),
1003
+ "retrieval_success": float(final_state["retrieval_success"]),
1004
+ "final_disturbance_cost": float(final_state["disturbance_cost"]),
1005
+ "reocclusion_rate": reocclusion,
1006
+ "visibility_integral": float(np.sum(visibility_curve)),
1007
+ "actor_feasibility_auc": float(corridor_curve.mean()),
1008
+ "reveal_achieved": float(visibility_curve.max() >= self.dynamics.retrieve_visibility_threshold),
1009
+ "hold_persistence": float(corridor_curve.mean()),
1010
+ "support_stability_auc": float(support_curve.mean()),
1011
+ "disturbance_auc": float(disturbance_curve.mean()),
1012
+ "opening_peak": float(opening_curve.max()),
1013
+ }
1014
+ if self.proxy_name == FOLIAGE_PROXY.name:
1015
+ result["candidate_gap_width"] = float(final_state.get("gap_width", opening_curve.max()))
1016
+ result["candidate_damage_proxy"] = float(final_state.get("damage_proxy", final_state["disturbance_cost"]))
1017
+ elif self.proxy_name == BAG_PROXY.name:
1018
+ result["candidate_mouth_aperture"] = float(final_state.get("mouth_aperture", opening_curve.max()))
1019
+ result["candidate_hold_quality"] = float(final_state.get("hold_quality", support_curve.mean()))
1020
+ result["candidate_rim_slip_risk"] = float(final_state.get("rim_slip_risk", reocclusion))
1021
+ elif self.proxy_name == CLOTH_PROXY.name:
1022
+ result["candidate_fold_preservation"] = float(final_state.get("fold_preservation", 1.0 - final_state["disturbance_cost"]))
1023
+ result["candidate_layer_separation_quality"] = float(final_state.get("layer_separation_quality", opening_curve.max()))
1024
+ result["candidate_lift_too_much_risk"] = float(final_state.get("lift_too_much_risk", max(0.0, opening_curve.max() - self.dynamics.desired_opening)))
1025
+ result["candidate_top_layer_stability"] = float(final_state.get("top_layer_stability", support_curve.mean()))
1026
+ self.restore_state(snapshot)
1027
+ return result
1028
+
1029
+ def candidate_outcome_utility(self, outcome: dict[str, np.ndarray | float]) -> float:
1030
+ retrieval_success = float(outcome["retrieval_success"])
1031
+ disturbance = float(outcome["final_disturbance_cost"])
1032
+ reocclusion = float(outcome["reocclusion_rate"])
1033
+ utility = retrieval_success - disturbance - reocclusion
1034
+ if self.proxy_name == CLOTH_PROXY.name:
1035
+ # Cloth success tracks layer separation more than strict fold/disturbance minimization.
1036
+ # Keep a lift-risk penalty, but stop over-penalizing the slightly aggressive actions
1037
+ # that actually create an insertable corridor on the proxy.
1038
+ layer_separation = float(outcome.get("candidate_layer_separation_quality", outcome.get("opening_peak", 0.0)))
1039
+ fold_preservation = float(outcome.get("candidate_fold_preservation", max(0.0, 1.0 - disturbance)))
1040
+ lift_risk = float(outcome.get("candidate_lift_too_much_risk", 0.0))
1041
+ utility = (
1042
+ retrieval_success
1043
+ + 0.80 * layer_separation
1044
+ + 0.20 * fold_preservation
1045
+ - 0.20 * disturbance
1046
+ - 0.20 * reocclusion
1047
+ - 0.35 * lift_risk
1048
+ )
1049
+ return float(utility)
1050
+
1051
+ def sample_candidate_action_chunks(
1052
+ self,
1053
+ teacher_chunk: np.ndarray,
1054
+ num_candidates: int = 4,
1055
+ rollout_horizon: int | None = None,
1056
+ ) -> tuple[np.ndarray, dict[str, np.ndarray]]:
1057
+ rollout_horizon = rollout_horizon or self.rollout_horizon
1058
+ teacher_chunk = np.asarray(teacher_chunk, dtype=np.float32)
1059
+ candidates = [teacher_chunk.astype(np.float32)]
1060
+ outcomes = [self.evaluate_action_chunk(teacher_chunk, rollout_horizon=rollout_horizon)]
1061
+ candidate_macro_ids = [0]
1062
+ candidate_is_hard_negative = [0.0]
1063
+ candidate_macro_names = ["teacher"]
1064
+ candidate_negative_families = ["teacher"]
1065
+ if self.proxy_name == FOLIAGE_PROXY.name:
1066
+ semantic_specs = [
1067
+ ("pin_canopy", "positive"),
1068
+ ("maintain_gap", "positive"),
1069
+ ("premature_retrieve", "premature_retrieve"),
1070
+ ("reveal_with_release", "reveal_with_release"),
1071
+ ("wrong_side_reveal", "wrong_side_reveal"),
1072
+ ("foliage_immediate_reocclusion", "immediate_reocclusion"),
1073
+ ("over_disturbance", "over_disturbance"),
1074
+ ("weak_corridor_insert", "weak_corridor_insert"),
1075
+ ("insert_actor", "positive"),
1076
+ ("retrieve", "positive"),
1077
+ ]
1078
+ elif self.proxy_name == BAG_PROXY.name:
1079
+ semantic_specs = [
1080
+ ("widen_mouth", "positive"),
1081
+ ("maintain_mouth", "positive"),
1082
+ ("premature_retrieve", "premature_retrieve"),
1083
+ ("reveal_with_release", "reveal_with_release"),
1084
+ ("wrong_edge_reveal", "wrong_side_reveal"),
1085
+ ("pin_left_rim", "one_rim_slip"),
1086
+ ("bag_fabric_probe", "fabric_probe"),
1087
+ ("weak_corridor_insert", "weak_corridor_insert"),
1088
+ ("insert_actor", "positive"),
1089
+ ("retrieve", "positive"),
1090
+ ]
1091
+ else:
1092
+ semantic_specs = [
1093
+ ("lift_edge", "positive"),
1094
+ ("stabilize_fold", "positive"),
1095
+ ("premature_retrieve", "premature_retrieve"),
1096
+ ("reveal_with_release", "reveal_with_release"),
1097
+ ("cloth_lift_high", "lift_too_high"),
1098
+ ("wrong_layer_reveal", "wrong_layer_reveal"),
1099
+ ("delayed_actor_entry", "delayed_actor_entry"),
1100
+ ("weak_corridor_insert", "weak_corridor_insert"),
1101
+ ("insert_actor", "positive"),
1102
+ ("retrieve", "positive"),
1103
+ ]
1104
+
1105
+ for spec_idx, (macro_name, family_name) in enumerate(semantic_specs[: max(0, num_candidates - 1)], start=1):
1106
+ candidate = self.macro_action_chunk(macro_name, chunk_horizon=teacher_chunk.shape[0])
1107
+ candidates.append(candidate.astype(np.float32))
1108
+ outcomes.append(self.evaluate_action_chunk(candidate, rollout_horizon=rollout_horizon))
1109
+ candidate_macro_ids.append(spec_idx)
1110
+ candidate_macro_names.append(macro_name)
1111
+ candidate_negative_families.append(family_name)
1112
+ candidate_is_hard_negative.append(0.0 if family_name == "positive" else 1.0)
1113
+
1114
+ while len(candidates) < num_candidates:
1115
+ random_chunk = self.rng.uniform(-1.0, 1.0, size=teacher_chunk.shape).astype(np.float32)
1116
+ candidates.append(random_chunk)
1117
+ outcomes.append(self.evaluate_action_chunk(random_chunk, rollout_horizon=rollout_horizon))
1118
+ candidate_macro_ids.append(len(candidate_macro_ids))
1119
+ candidate_macro_names.append("random")
1120
+ candidate_negative_families.append("random")
1121
+ candidate_is_hard_negative.append(1.0)
1122
+ stacked_outcomes = {
1123
+ "candidate_rollout_support_mode": np.stack([item["rollout_support_mode"] for item in outcomes], axis=0).astype(np.int64),
1124
+ "candidate_rollout_phase": np.stack([item["rollout_phase"] for item in outcomes], axis=0).astype(np.int64),
1125
+ "candidate_rollout_corridor_feasible": np.stack(
1126
+ [item["rollout_corridor_feasible"] for item in outcomes], axis=0
1127
+ ).astype(np.float32),
1128
+ "candidate_rollout_persistence_horizon": np.stack(
1129
+ [item["rollout_persistence_horizon"] for item in outcomes], axis=0
1130
+ ).astype(np.float32),
1131
+ "candidate_rollout_disturbance_cost": np.stack(
1132
+ [item["rollout_disturbance_cost"] for item in outcomes], axis=0
1133
+ ).astype(np.float32),
1134
+ "candidate_rollout_belief_map": np.stack(
1135
+ [item["rollout_belief_map"] for item in outcomes], axis=0
1136
+ ).astype(np.float32),
1137
+ "candidate_rollout_visibility_map": np.stack(
1138
+ [item["rollout_visibility_map"] for item in outcomes], axis=0
1139
+ ).astype(np.float32),
1140
+ "candidate_rollout_clearance_map": np.stack(
1141
+ [item["rollout_clearance_map"] for item in outcomes], axis=0
1142
+ ).astype(np.float32),
1143
+ "candidate_rollout_support_stability": np.stack(
1144
+ [item["rollout_support_stability"] for item in outcomes], axis=0
1145
+ ).astype(np.float32),
1146
+ "candidate_rollout_reocclusion_target": np.stack(
1147
+ [item["rollout_reocclusion_target"] for item in outcomes], axis=0
1148
+ ).astype(np.float32),
1149
+ "candidate_rollout_occluder_contact_map": np.stack(
1150
+ [item["rollout_occluder_contact_map"] for item in outcomes], axis=0
1151
+ ).astype(np.float32),
1152
+ "candidate_rollout_grasp_affordance_map": np.stack(
1153
+ [item["rollout_grasp_affordance_map"] for item in outcomes], axis=0
1154
+ ).astype(np.float32),
1155
+ "candidate_retrieval_success": np.asarray([item["retrieval_success"] for item in outcomes], dtype=np.float32),
1156
+ "candidate_final_disturbance_cost": np.asarray(
1157
+ [item["final_disturbance_cost"] for item in outcomes], dtype=np.float32
1158
+ ),
1159
+ "candidate_reocclusion_rate": np.asarray([item["reocclusion_rate"] for item in outcomes], dtype=np.float32),
1160
+ "candidate_visibility_integral": np.asarray([item["visibility_integral"] for item in outcomes], dtype=np.float32),
1161
+ "candidate_actor_feasibility_auc": np.asarray([item["actor_feasibility_auc"] for item in outcomes], dtype=np.float32),
1162
+ "candidate_reveal_achieved": np.asarray([item["reveal_achieved"] for item in outcomes], dtype=np.float32),
1163
+ "candidate_hold_persistence": np.asarray([item["hold_persistence"] for item in outcomes], dtype=np.float32),
1164
+ "candidate_support_stability_auc": np.asarray([item["support_stability_auc"] for item in outcomes], dtype=np.float32),
1165
+ "candidate_disturbance_auc": np.asarray([item["disturbance_auc"] for item in outcomes], dtype=np.float32),
1166
+ "candidate_macro_ids": np.asarray(candidate_macro_ids, dtype=np.int64),
1167
+ "candidate_is_hard_negative": np.asarray(candidate_is_hard_negative, dtype=np.float32),
1168
+ }
1169
+ stacked_outcomes["candidate_risk"] = np.clip(
1170
+ stacked_outcomes["candidate_final_disturbance_cost"] + stacked_outcomes["candidate_reocclusion_rate"],
1171
+ 0.0,
1172
+ 1.0,
1173
+ ).astype(np.float32)
1174
+ stacked_outcomes["candidate_utility"] = np.asarray(
1175
+ [self.candidate_outcome_utility(item) for item in outcomes],
1176
+ dtype=np.float32,
1177
+ )
1178
+ stacked_outcomes["candidate_macro_names"] = candidate_macro_names
1179
+ stacked_outcomes["candidate_negative_families"] = candidate_negative_families
1180
+ if self.proxy_name == FOLIAGE_PROXY.name:
1181
+ stacked_outcomes["candidate_gap_width"] = np.asarray([item["candidate_gap_width"] for item in outcomes], dtype=np.float32)
1182
+ stacked_outcomes["candidate_damage_proxy"] = np.asarray([item["candidate_damage_proxy"] for item in outcomes], dtype=np.float32)
1183
+ elif self.proxy_name == BAG_PROXY.name:
1184
+ stacked_outcomes["candidate_mouth_aperture"] = np.asarray([item["candidate_mouth_aperture"] for item in outcomes], dtype=np.float32)
1185
+ stacked_outcomes["candidate_hold_quality"] = np.asarray([item["candidate_hold_quality"] for item in outcomes], dtype=np.float32)
1186
+ stacked_outcomes["candidate_rim_slip_risk"] = np.asarray([item["candidate_rim_slip_risk"] for item in outcomes], dtype=np.float32)
1187
+ elif self.proxy_name == CLOTH_PROXY.name:
1188
+ stacked_outcomes["candidate_fold_preservation"] = np.asarray([item["candidate_fold_preservation"] for item in outcomes], dtype=np.float32)
1189
+ stacked_outcomes["candidate_layer_separation_quality"] = np.asarray([item["candidate_layer_separation_quality"] for item in outcomes], dtype=np.float32)
1190
+ stacked_outcomes["candidate_lift_too_much_risk"] = np.asarray([item["candidate_lift_too_much_risk"] for item in outcomes], dtype=np.float32)
1191
+ return np.stack(candidates, axis=0).astype(np.float32), stacked_outcomes
1192
+
1193
+ def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
1194
+ action = np.asarray(action, dtype=np.float32)
1195
+ mode = self._mode_from_action(action)
1196
+ self.holding = mode == SupportMode.HOLD
1197
+ self.transferred = mode == SupportMode.TRANSFER
1198
+ open_cmd = float(np.clip(action[0], -1.0, 1.0))
1199
+ actor_reach = float((np.tanh(float(action[8])) + 1.0) * 0.5)
1200
+ retrieve_cmd = float((np.tanh(float(action[13])) + 1.0) * 0.5)
1201
+ self.last_actor_template = int(
1202
+ np.clip(
1203
+ round(((float(np.clip(action[7], -1.0, 1.0)) + 1.0) * 0.5) * (self.num_templates - 1)),
1204
+ 0,
1205
+ self.num_templates - 1,
1206
+ )
1207
+ )
1208
+
1209
+ support_bonus = {SupportMode.HOLD: 0.08, SupportMode.TRANSFER: 0.04, SupportMode.PASSIVE: 0.0}[mode]
1210
+ closure = self.closure_scale * self._mode_decay(mode)
1211
+ self.opening = float(
1212
+ np.clip(
1213
+ self.opening + 0.16 * open_cmd + support_bonus - closure - 0.05 * self.disturbance,
1214
+ 0.0,
1215
+ 1.0,
1216
+ )
1217
+ )
1218
+ self.disturbance = float(
1219
+ np.clip(
1220
+ self.disturbance
1221
+ + self.disturbance_gain_scale * self.dynamics.disturbance_gain * abs(open_cmd)
1222
+ + 0.025 * actor_reach
1223
+ + 0.05 * max(0.0, self.opening - self.dynamics.desired_opening)
1224
+ + 0.03 * self.collateral_bias * actor_reach
1225
+ - self.dynamics.settle_rate,
1226
+ 0.0,
1227
+ 1.0,
1228
+ )
1229
+ )
1230
+
1231
+ self.step_count += 1
1232
+ privileged_state = self.get_privileged_state()
1233
+ corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
1234
+ if corridor[self.last_actor_template] > 0.5 and actor_reach >= 0.55:
1235
+ persistence_ratio = privileged_state["persistence_horizon"][privileged_state["support_mode"]] / float(
1236
+ max(1, self.rollout_horizon)
1237
+ )
1238
+ self.actor_progress = float(np.clip(self.actor_progress + 0.55 * persistence_ratio, 0.0, 1.0))
1239
+ shock = 0.16 * max(0.0, 0.8 - persistence_ratio)
1240
+ if shock > 0.0:
1241
+ self.opening = float(np.clip(self.opening - shock, 0.0, 1.0))
1242
+ privileged_state = self.get_privileged_state()
1243
+ corridor = privileged_state["corridor_feasible"][privileged_state["support_mode"]]
1244
+ else:
1245
+ self.actor_progress = float(np.clip(self.actor_progress - 0.20, 0.0, 1.0))
1246
+ success = bool(
1247
+ retrieve_cmd >= 0.55
1248
+ and self.actor_progress >= 0.80
1249
+ and corridor[self.last_actor_template] > 0.5
1250
+ and privileged_state["visibility"] >= self.dynamics.retrieve_visibility_threshold
1251
+ and self.disturbance < 0.9
1252
+ )
1253
+ if success:
1254
+ self.retrieved = True
1255
+ privileged_state["retrieval_success"] = True
1256
+
1257
+ self.visibility_trace.append(float(privileged_state["visibility"]))
1258
+ self.corridor_trace.append(float(corridor.any()))
1259
+
1260
+ reward = 1.0 if success else (0.08 * privileged_state["visibility"] - 0.03 * privileged_state["disturbance_cost"])
1261
+ terminated = bool(self.retrieved)
1262
+ truncated = bool(self.step_count >= self.max_steps)
1263
+ return self.get_observation(privileged_state), float(reward), terminated, truncated, privileged_state
1264
+
1265
+
1266
+ def render_views_from_state(
1267
+ proxy_name: str,
1268
+ render_state: dict[str, Any],
1269
+ resolution: int,
1270
+ num_templates: int = 32,
1271
+ include_depth: bool = False,
1272
+ ) -> dict[str, np.ndarray]:
1273
+ dynamics = PROXY_DYNAMICS[proxy_name]
1274
+ opening = float(render_state["opening"])
1275
+ disturbance = float(render_state["disturbance"])
1276
+ target_center = float(render_state["target_center"])
1277
+ target_depth = float(render_state["target_depth"])
1278
+ target_radius = float(render_state["target_radius"])
1279
+ texture_phase = float(render_state["texture_phase"])
1280
+ texture_scale = float(render_state["texture_scale"])
1281
+ view_bias = float(render_state["view_bias"])
1282
+ target_intensity = float(render_state["target_intensity"])
1283
+ step_fraction = float(render_state["step_fraction"])
1284
+
1285
+ height = width = resolution
1286
+ base = np.ones((height, width, 3), dtype=np.float32)
1287
+ base *= np.asarray(dynamics.palette, dtype=np.float32)
1288
+
1289
+ x = np.linspace(0.0, 1.0, width, dtype=np.float32)
1290
+ y = np.linspace(0.0, 1.0, height, dtype=np.float32)
1291
+ yy, xx = np.meshgrid(y, x, indexing="ij")
1292
+ visibility = np.clip(
1293
+ 1.25 * opening - 0.68 * disturbance - 0.24 * target_depth + dynamics.visibility_bias,
1294
+ 0.0,
1295
+ 1.0,
1296
+ )
1297
+ target_y = 0.74 - 0.22 * target_depth
1298
+ gap_width = np.clip(0.05 + 0.16 * opening - 0.08 * disturbance, 0.02, 0.24)
1299
+ front_center = np.clip(target_center + 0.03 * view_bias, 0.06, 0.94)
1300
+ left_center = np.clip(0.34 + 0.12 * (target_center - 0.5) - 0.05 * view_bias, 0.18, 0.52)
1301
+ right_center = np.clip(0.66 + 0.18 * (target_center - 0.5) + 0.06 * view_bias, 0.42, 0.88)
1302
+ surface_wave = 0.5 + 0.5 * np.sin((xx * (14.0 * texture_scale) + yy * 7.0) * np.pi + texture_phase)
1303
+ weave_wave = 0.5 + 0.5 * np.cos((xx * 6.0 - yy * (10.0 + 2.0 * texture_scale)) * np.pi - 0.6 * texture_phase)
1304
+ clutter = 0.65 * surface_wave + 0.35 * weave_wave
1305
+ disturbance_map = disturbance * (
1306
+ 0.55 + 0.45 * np.sin((xx * 9.0 + yy * (12.0 + texture_scale)) * np.pi + 1.3 * texture_phase)
1307
+ )
1308
+ target_mask = ((xx - front_center) ** 2 + ((yy - target_y) / 1.2) ** 2) <= target_radius**2
1309
+
1310
+ front = base.copy()
1311
+ front *= (0.82 + 0.24 * clutter[..., None]).astype(np.float32)
1312
+ occluder_profile = np.abs(xx - front_center) / gap_width + 0.55 * np.abs(yy - (0.56 + 0.08 * view_bias))
1313
+ gap_mask = occluder_profile <= (1.15 + 0.35 * opening)
1314
+ front[gap_mask] = np.clip(front[gap_mask] + np.asarray([0.14, 0.16, 0.14], dtype=np.float32), 0.0, 1.0)
1315
+ target_rgb = np.asarray([0.78, 0.74, 0.58], dtype=np.float32) * target_intensity
1316
+ front[target_mask] = np.clip(
1317
+ front[target_mask] * (1.0 - 0.45 * visibility) + target_rgb * (0.25 + 0.75 * visibility),
1318
+ 0.0,
1319
+ 1.0,
1320
+ )
1321
+ front[..., 2] = np.clip(front[..., 2] + 0.12 * disturbance_map + 0.04 * step_fraction, 0.0, 1.0)
1322
+
1323
+ wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
1324
+ wrist_left *= (0.8 + 0.18 * clutter[..., None]).astype(np.float32)
1325
+ left_slit_width = np.clip(0.04 + 0.18 * opening - 0.10 * disturbance, 0.015, 0.22)
1326
+ left_profile = ((xx - left_center) / left_slit_width) ** 2 + ((yy - 0.58) / (0.40 + 0.10 * opening)) ** 2
1327
+ left_open = left_profile <= 1.0
1328
+ wrist_left[left_open] = np.clip(wrist_left[left_open] + np.asarray([0.08, 0.22, 0.12], dtype=np.float32), 0.0, 1.0)
1329
+ wrist_left[..., 0] = np.clip(wrist_left[..., 0] + 0.18 * disturbance_map, 0.0, 1.0)
1330
+ wrist_left[target_mask] = np.clip(
1331
+ wrist_left[target_mask] * (1.0 - 0.35 * visibility) + target_rgb * (0.18 + 0.52 * visibility),
1332
+ 0.0,
1333
+ 1.0,
1334
+ )
1335
+
1336
+ wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
1337
+ wrist_right *= (0.78 + 0.22 * clutter[..., None]).astype(np.float32)
1338
+ right_band = np.exp(-((xx - right_center) ** 2) / max(1e-4, (0.06 + gap_width) ** 2))
1339
+ right_clear = np.exp(-((yy - (0.52 - 0.12 * target_depth)) ** 2) / max(1e-4, (0.12 + 0.18 * opening) ** 2))
1340
+ wrist_right[..., 1] = np.clip(
1341
+ wrist_right[..., 1] + 0.28 * visibility * right_band * right_clear - 0.10 * disturbance_map,
1342
+ 0.0,
1343
+ 1.0,
1344
+ )
1345
+ wrist_right[target_mask] = np.clip(
1346
+ wrist_right[target_mask] * (1.0 - 0.40 * visibility) + target_rgb * (0.22 + 0.60 * visibility),
1347
+ 0.0,
1348
+ 1.0,
1349
+ )
1350
+ wrist_right[..., 2] = np.clip(wrist_right[..., 2] + 0.08 * step_fraction + 0.06 * right_band, 0.0, 1.0)
1351
+ wrist_right = np.clip(wrist_right, 0.0, 1.0)
1352
+
1353
+ outputs = {
1354
+ "front": (front * 255.0).astype(np.uint8),
1355
+ "wrist_left": (wrist_left * 255.0).astype(np.uint8),
1356
+ "wrist_right": (wrist_right * 255.0).astype(np.uint8),
1357
+ }
1358
+ if not include_depth:
1359
+ return outputs
1360
+
1361
+ front_depth = np.clip(0.25 + 0.40 * target_depth + 0.15 * disturbance + 0.10 * (1.0 - visibility), 0.0, 1.0)
1362
+ target_depth_map = np.clip(0.10 + 0.55 * target_depth, 0.0, 1.0)
1363
+ occluder_depth = np.clip(0.30 + 0.20 * disturbance + 0.10 * (1.0 - opening), 0.0, 1.0)
1364
+ front_depth_map = np.full((height, width), front_depth, dtype=np.float32)
1365
+ front_depth_map[gap_mask] = np.minimum(front_depth_map[gap_mask], occluder_depth)
1366
+ front_depth_map[target_mask] = np.minimum(front_depth_map[target_mask], target_depth_map)
1367
+
1368
+ wrist_left_depth = np.clip(0.35 + 0.25 * target_depth + 0.10 * disturbance, 0.0, 1.0)
1369
+ wrist_left_depth_map = np.full((height, width), wrist_left_depth, dtype=np.float32)
1370
+ wrist_left_depth_map[left_open] = np.minimum(wrist_left_depth_map[left_open], 0.22 + 0.25 * target_depth)
1371
+ wrist_left_depth_map[target_mask] = np.minimum(wrist_left_depth_map[target_mask], target_depth_map)
1372
+
1373
+ wrist_right_depth = np.clip(0.35 + 0.20 * target_depth + 0.12 * disturbance, 0.0, 1.0)
1374
+ wrist_right_depth_map = np.full((height, width), wrist_right_depth, dtype=np.float32)
1375
+ right_focus = (right_band * right_clear) > 0.15
1376
+ wrist_right_depth_map[right_focus] = np.minimum(wrist_right_depth_map[right_focus], 0.20 + 0.25 * target_depth)
1377
+ wrist_right_depth_map[target_mask] = np.minimum(wrist_right_depth_map[target_mask], target_depth_map)
1378
+
1379
+ outputs.update(
1380
+ {
1381
+ "front_depth": front_depth_map.astype(np.float32),
1382
+ "wrist_left_depth": wrist_left_depth_map.astype(np.float32),
1383
+ "wrist_right_depth": wrist_right_depth_map.astype(np.float32),
1384
+ "front_depth_valid": np.ones((height, width), dtype=np.float32),
1385
+ "wrist_left_depth_valid": np.ones((height, width), dtype=np.float32),
1386
+ "wrist_right_depth_valid": np.ones((height, width), dtype=np.float32),
1387
+ }
1388
+ )
1389
+ return outputs
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/sim_reveal/teachers.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any
5
+
6
+ import numpy as np
7
+
8
+
9
+ @dataclass
10
+ class TeacherAction:
11
+ revealer_action: np.ndarray
12
+ actor_action: np.ndarray
13
+
14
+
15
+ def foliage_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
16
+ target_cluster = privileged_state.get("lowest_cost_strip_direction", [0.0, 0.0, 1.0])
17
+ actor_ready = bool(privileged_state.get("corridor_exists", False))
18
+ revealer = np.asarray(target_cluster, dtype=np.float32)
19
+ actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
20
+ if not actor_ready:
21
+ actor = np.zeros_like(actor)
22
+ return TeacherAction(revealer_action=revealer, actor_action=actor)
23
+
24
+
25
+ def bag_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
26
+ contact_a = np.asarray(privileged_state.get("expand_contact_a", [1.0, 0.0, 0.0]), dtype=np.float32)
27
+ contact_b = np.asarray(privileged_state.get("expand_contact_b", [-1.0, 0.0, 0.0]), dtype=np.float32)
28
+ aperture_ready = float(privileged_state.get("aperture", 0.0)) >= float(privileged_state.get("aperture_threshold", 1.0))
29
+ actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
30
+ if not aperture_ready:
31
+ actor = np.zeros_like(actor)
32
+ return TeacherAction(revealer_action=np.concatenate([contact_a, contact_b]), actor_action=actor)
33
+
34
+
35
+ def cloth_teacher_step(privileged_state: dict[str, Any]) -> TeacherAction:
36
+ lift = np.asarray(privileged_state.get("minimal_lift_direction", [0.0, 0.0, 1.0]), dtype=np.float32)
37
+ actor_ready = bool(privileged_state.get("target_exposed", False))
38
+ actor = np.asarray(privileged_state.get("retrieve_direction", [0.0, 0.0, 0.0]), dtype=np.float32)
39
+ if not actor_ready:
40
+ actor = np.zeros_like(actor)
41
+ return TeacherAction(revealer_action=lift, actor_action=actor)
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/base.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - model: backbone_only
3
+ - data: rlbench_3cam
4
+ - _self_
5
+
6
+ trainer:
7
+ policy_type: backbone_only
8
+ use_bf16: true
9
+ grad_clip_norm: 1.0
10
+ freeze_backbone: true
11
+ gradient_checkpointing: true
12
+
13
+ optim:
14
+ lr: 1.0e-4
15
+ weight_decay: 1.0e-4
16
+
17
+ runtime:
18
+ batch_size: 8
19
+ num_workers: 4
20
+ seed: 0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ policy:
2
+ backbone:
3
+ model_name: openai/clip-vit-base-patch32
4
+ hidden_dim: 512
5
+ max_text_tokens: 32
6
+ freeze_backbone: true
7
+ gradient_checkpointing: true
8
+ use_dummy_backbone: false
9
+ fusion:
10
+ hidden_dim: 512
11
+ num_cameras: 3
12
+ num_layers: 4
13
+ num_heads: 8
14
+ ff_dim: 2048
15
+ dropout: 0.1
16
+ proprio_dim: 32
17
+ proprio_tokens: 1
18
+ decoder:
19
+ hidden_dim: 512
20
+ num_heads: 8
21
+ num_layers: 4
22
+ ff_dim: 2048
23
+ dropout: 0.1
24
+ chunk_size: 8
25
+ action_dim: 14
26
+ num_candidates: 8
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_base_fast.yaml ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_adapter_wrapped_clip_base_fast_seed17
2
+ output_dir: /workspace/workspace/outputs/adapter_proxy
3
+ device: cuda
4
+ seed: 17
5
+ init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
10
+ resolution: 224
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase_fast
12
+ train_episodes_per_proxy: 12
13
+ val_episodes_per_proxy: 4
14
+ train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast.pt
15
+ val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast.pt
16
+ rebuild_dataset: false
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 17
22
+
23
+ optim:
24
+ epochs: 2
25
+ batch_size: 4
26
+ num_workers: 8
27
+ lr: 0.0001
28
+ weight_decay: 0.0001
29
+
30
+ trainer:
31
+ policy_type: adapter_wrapped
32
+ training_regime: adapter_train_frozen_trunk
33
+ eval_mode: adapter_active
34
+ adapter_mode: adapter_active
35
+ adapter_use_transition_model: false
36
+ adapter_use_task_conditioning: true
37
+ use_bf16: true
38
+ grad_clip_norm: 1.0
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ plan_during_train: false
42
+ plan_during_eval: false
43
+ support_mode_conditioning: true
44
+ planner_mode: off
45
+ use_depth: true
46
+ use_world_model: false
47
+ use_role_tokens: true
48
+ compute_equivariance_probe: false
49
+ trainable_parameter_prefixes:
50
+ - adapter.state_head
51
+ - adapter.proposal_prior
52
+ - adapter.planner
53
+
54
+ policy:
55
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
56
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
57
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8, reveal_cache_steps: 4, reveal_cache_decay: 0.7}
58
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 7, planner_top_k: 4, proposal_delta_scale: 0.2, proposal_slot_scale: 0.05}
59
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, num_tasks: 4}
60
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2, rollout_mode: compact_rollout, num_tasks: 4, lightweight_field_size: 4}
61
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4, adapter_confidence_threshold: 0.45}
62
+
63
+ loss_weights:
64
+ action: 1.0
65
+ phase: 0.08
66
+ arm_role: 0.08
67
+ support_mode: 0.08
68
+ corridor: 0.12
69
+ persistence: 0.06
70
+ disturbance: 0.06
71
+ world_model: 0.0
72
+ transition: 0.0
73
+ belief: 0.05
74
+ visibility: 0.05
75
+ clearance: 0.06
76
+ support_stability: 0.06
77
+ reocclusion: 0.06
78
+ occluder_contact: 0.05
79
+ grasp_affordance: 0.05
80
+ planner_success: 0.15
81
+ planner_risk: 0.08
82
+ planner_ranking: 0.15
83
+ proposal_reconstruction: 0.08
84
+ proposal_success: 0.1
85
+ proposal_ranking: 0.12
86
+ proposal_mode: 0.08
87
+ proposal_diversity: 0.05
88
+ role_swap_consistency: 0.0
89
+ task_metrics: 0.06
90
+ gate: 0.05
91
+ distillation: 0.05
92
+ calibration: 0.02
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_adapter_wrapped_clip_rank_only_rebuild128.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17
2
+ output_dir: /workspace/workspace/outputs/adapter_proxy
3
+ device: cuda
4
+ seed: 17
5
+ init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
10
+ resolution: 224
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase
12
+ train_episodes_per_proxy: 128
13
+ val_episodes_per_proxy: 32
14
+ train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
15
+ val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
16
+ rebuild_dataset: true
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 17
22
+
23
+ optim:
24
+ epochs: 4
25
+ batch_size: 8
26
+ num_workers: 32
27
+ lr: 0.00005
28
+ weight_decay: 0.0001
29
+
30
+ trainer:
31
+ policy_type: adapter_wrapped
32
+ training_regime: proxy_rank_only
33
+ eval_mode: adapter_active
34
+ adapter_mode: adapter_active
35
+ adapter_use_transition_model: false
36
+ adapter_use_task_conditioning: true
37
+ use_bf16: true
38
+ grad_clip_norm: 1.0
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ plan_during_train: false
42
+ plan_during_eval: false
43
+ support_mode_conditioning: true
44
+ planner_mode: off
45
+ use_depth: true
46
+ use_world_model: false
47
+ use_role_tokens: true
48
+ compute_equivariance_probe: false
49
+ trainable_parameter_prefixes:
50
+ - adapter.proposal_prior
51
+ - adapter.planner
52
+
53
+ policy:
54
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
55
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
56
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
57
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 7, planner_top_k: 4, proposal_delta_scale: 0.2, proposal_slot_scale: 0.05}
58
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, num_tasks: 4}
59
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2, rollout_mode: compact_rollout, num_tasks: 4, lightweight_field_size: 4}
60
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4, adapter_confidence_threshold: 0.55}
61
+
62
+ loss_weights:
63
+ action: 0.5
64
+ phase: 0.0
65
+ arm_role: 0.0
66
+ support_mode: 0.0
67
+ corridor: 0.0
68
+ persistence: 0.0
69
+ disturbance: 0.0
70
+ world_model: 0.0
71
+ transition: 0.0
72
+ belief: 0.0
73
+ visibility: 0.0
74
+ clearance: 0.0
75
+ support_stability: 0.0
76
+ reocclusion: 0.0
77
+ occluder_contact: 0.0
78
+ grasp_affordance: 0.0
79
+ planner_success: 0.0
80
+ planner_risk: 0.0
81
+ planner_ranking: 0.2
82
+ proposal_reconstruction: 0.0
83
+ proposal_success: 0.1
84
+ proposal_ranking: 0.2
85
+ proposal_mode: 0.1
86
+ proposal_diversity: 0.02
87
+ role_swap_consistency: 0.0
88
+ task_metrics: 0.0
89
+ gate: 0.0
90
+ distillation: 0.05
91
+ calibration: 0.0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_backbone_only_clip
2
+ output_dir: /workspace/outputs/reveal_runs
3
+ device: cuda
4
+ seed: 7
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 224
9
+ train_episodes_per_proxy: 48
10
+ val_episodes_per_proxy: 16
11
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
12
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
13
+ rebuild_dataset: true
14
+ chunk_horizon: 8
15
+ rollout_horizon: 5
16
+ history_steps: 2
17
+ planner_candidates: 4
18
+ seed: 7
19
+
20
+ optim:
21
+ epochs: 4
22
+ batch_size: 2
23
+ num_workers: 0
24
+ lr: 0.0003
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: backbone_only
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: false
34
+ plan_during_eval: false
35
+ support_mode_conditioning: true
36
+
37
+ policy:
38
+ backbone:
39
+ model_name: openai/clip-vit-base-patch32
40
+ hidden_dim: 512
41
+ max_text_tokens: 32
42
+ freeze_backbone: true
43
+ gradient_checkpointing: false
44
+ use_dummy_backbone: false
45
+ fusion:
46
+ hidden_dim: 512
47
+ num_cameras: 3
48
+ num_layers: 4
49
+ num_heads: 8
50
+ ff_dim: 2048
51
+ dropout: 0.1
52
+ proprio_dim: 32
53
+ proprio_tokens: 1
54
+ memory:
55
+ hidden_dim: 512
56
+ history_steps: 2
57
+ num_layers: 1
58
+ dropout: 0.1
59
+ decoder:
60
+ hidden_dim: 512
61
+ num_heads: 8
62
+ num_layers: 4
63
+ ff_dim: 2048
64
+ dropout: 0.1
65
+ chunk_size: 8
66
+ action_dim: 14
67
+ num_candidates: 8
68
+ reveal_head:
69
+ hidden_dim: 512
70
+ num_support_modes: 3
71
+ num_approach_templates: 32
72
+ rollout_horizon: 5
73
+ belief_map_size: 32
74
+ predict_belief_map: true
75
+ world_model:
76
+ hidden_dim: 512
77
+ action_dim: 14
78
+ num_support_modes: 3
79
+ num_approach_templates: 32
80
+ rollout_horizon: 5
81
+ planner:
82
+ hidden_dim: 512
83
+ num_candidates: 8
84
+ action_dim: 14
85
+ utility_margin: 0.1
86
+
87
+ loss_weights:
88
+ action: 1.0
89
+ support_mode: 0.1
90
+ corridor: 0.1
91
+ persistence: 0.05
92
+ disturbance: 0.05
93
+ world_model: 0.1
94
+ belief: 0.05
95
+ planner_success: 0.0
96
+ planner_risk: 0.0
97
+ planner_ranking: 0.0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_backbone_only_smoke
2
+ output_dir: /workspace/outputs/smoke
3
+ device: cuda
4
+ seed: 11
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 64
9
+ train_episodes_per_proxy: 6
10
+ val_episodes_per_proxy: 2
11
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
12
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
13
+ rebuild_dataset: true
14
+ chunk_horizon: 4
15
+ rollout_horizon: 3
16
+ history_steps: 2
17
+ planner_candidates: 4
18
+ seed: 11
19
+
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 8
23
+ num_workers: 0
24
+ lr: 0.001
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: backbone_only
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: false
34
+ plan_during_eval: false
35
+ support_mode_conditioning: true
36
+
37
+ policy:
38
+ backbone:
39
+ model_name: openai/clip-vit-base-patch32
40
+ hidden_dim: 64
41
+ max_text_tokens: 32
42
+ freeze_backbone: true
43
+ gradient_checkpointing: false
44
+ use_dummy_backbone: true
45
+ fusion:
46
+ hidden_dim: 64
47
+ num_cameras: 3
48
+ num_layers: 2
49
+ num_heads: 4
50
+ ff_dim: 128
51
+ dropout: 0.1
52
+ proprio_dim: 32
53
+ proprio_tokens: 1
54
+ memory:
55
+ hidden_dim: 64
56
+ history_steps: 2
57
+ num_layers: 1
58
+ dropout: 0.1
59
+ decoder:
60
+ hidden_dim: 64
61
+ num_heads: 4
62
+ num_layers: 2
63
+ ff_dim: 128
64
+ dropout: 0.1
65
+ chunk_size: 4
66
+ action_dim: 14
67
+ arm_action_dim: 7
68
+ num_candidates: 4
69
+ reveal_head:
70
+ hidden_dim: 64
71
+ num_support_modes: 3
72
+ num_approach_templates: 32
73
+ rollout_horizon: 3
74
+ belief_map_size: 32
75
+ field_size: 16
76
+ num_heads: 4
77
+ predict_belief_map: true
78
+ world_model:
79
+ hidden_dim: 64
80
+ action_dim: 14
81
+ num_support_modes: 3
82
+ num_approach_templates: 32
83
+ rollout_horizon: 3
84
+ planner:
85
+ hidden_dim: 64
86
+ num_candidates: 4
87
+ action_dim: 14
88
+ utility_margin: 0.1
89
+
90
+ loss_weights:
91
+ action: 1.0
92
+ support_mode: 0.0
93
+ corridor: 0.0
94
+ persistence: 0.0
95
+ disturbance: 0.0
96
+ world_model: 0.0
97
+ belief: 0.0
98
+ planner_success: 0.0
99
+ planner_risk: 0.0
100
+ planner_ranking: 0.0
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nodepth.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_ablation_nodepth
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 17
5
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
6
+ optim: {epochs: 4, batch_size: 2, num_workers: 0, lr: 0.0003, weight_decay: 0.0001}
7
+ trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
8
+ policy:
9
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
10
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
11
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
12
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
13
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
14
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
15
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
16
+ loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_noplanner.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_ablation_noplanner
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 17
5
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
6
+ optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
7
+ trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: false, plan_during_eval: false, support_mode_conditioning: true, planner_mode: off, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
8
+ policy:
9
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
10
+ fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
11
+ memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
12
+ decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
13
+ reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
14
+ world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
15
+ planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
16
+ loss_weights: {action: 1.0}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_norolesym.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_ablation_norolesym
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 17
5
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
6
+ optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
7
+ trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: false, compute_equivariance_probe: false}
8
+ policy:
9
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
10
+ fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
11
+ memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
12
+ decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
13
+ reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
14
+ world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
15
+ planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
16
+ loss_weights: {action: 1.0}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_ablation_nowm.yaml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_ablation_nowm
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 17
5
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
6
+ optim: {epochs: 10, batch_size: 16, num_workers: 0, lr: 0.001, weight_decay: 0.0001}
7
+ trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: false, use_role_tokens: true, compute_equivariance_probe: true}
8
+ policy:
9
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
10
+ fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
11
+ memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
12
+ decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
13
+ reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
14
+ world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
15
+ planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
16
+ loss_weights: {action: 1.0}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_clip.yaml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_clip
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
10
+ resolution: 224
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
12
+ train_episodes_per_proxy: 48
13
+ val_episodes_per_proxy: 16
14
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1.pt
15
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1.pt
16
+ rebuild_dataset: false
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 7
22
+
23
+ optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
24
+
25
+ trainer:
26
+ policy_type: elastic_reveal
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ plan_during_train: true
32
+ plan_during_eval: true
33
+ support_mode_conditioning: true
34
+ planner_mode: trainable
35
+ use_depth: false
36
+ use_world_model: true
37
+ use_role_tokens: true
38
+ compute_equivariance_probe: true
39
+
40
+ policy:
41
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
42
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
43
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
44
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
45
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
46
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
47
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
48
+
49
+ loss_weights:
50
+ action: 1.0
51
+ phase: 0.1
52
+ arm_role: 0.15
53
+ support_mode: 0.1
54
+ corridor: 0.15
55
+ persistence: 0.05
56
+ disturbance: 0.05
57
+ world_model: 0.2
58
+ belief: 0.05
59
+ visibility: 0.05
60
+ clearance: 0.05
61
+ support_stability: 0.05
62
+ reocclusion: 0.05
63
+ occluder_contact: 0.05
64
+ grasp_affordance: 0.05
65
+ planner_success: 0.25
66
+ planner_risk: 0.1
67
+ planner_ranking: 0.2
68
+ proposal_reconstruction: 0.1
69
+ proposal_success: 0.15
70
+ proposal_ranking: 0.2
71
+ proposal_diversity: 0.05
72
+ role_swap_consistency: 0.05
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage1_dummy.yaml ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_dummy
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 13
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 96
9
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
10
+ train_episodes_per_proxy: 48
11
+ val_episodes_per_proxy: 16
12
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy.pt
13
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy.pt
14
+ rebuild_dataset: false
15
+ chunk_horizon: 8
16
+ rollout_horizon: 5
17
+ history_steps: 6
18
+ planner_candidates: 8
19
+ seed: 13
20
+
21
+ optim:
22
+ epochs: 4
23
+ batch_size: 16
24
+ num_workers: 4
25
+ lr: 0.001
26
+ weight_decay: 0.0001
27
+
28
+ trainer:
29
+ policy_type: elastic_reveal
30
+ use_bf16: false
31
+ grad_clip_norm: 1.0
32
+ freeze_backbone: true
33
+ gradient_checkpointing: false
34
+ plan_during_train: true
35
+ plan_during_eval: true
36
+ support_mode_conditioning: true
37
+ planner_mode: trainable
38
+ use_depth: false
39
+ use_world_model: true
40
+ use_role_tokens: true
41
+ compute_equivariance_probe: true
42
+
43
+ policy:
44
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
45
+ fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
46
+ memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
47
+ decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
48
+ reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
49
+ world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
50
+ planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
51
+
52
+ loss_weights:
53
+ action: 1.0
54
+ phase: 0.15
55
+ arm_role: 0.2
56
+ support_mode: 0.15
57
+ corridor: 0.2
58
+ persistence: 0.1
59
+ disturbance: 0.1
60
+ world_model: 0.25
61
+ belief: 0.05
62
+ visibility: 0.05
63
+ clearance: 0.05
64
+ support_stability: 0.05
65
+ reocclusion: 0.05
66
+ occluder_contact: 0.05
67
+ grasp_affordance: 0.05
68
+ planner_success: 0.2
69
+ planner_risk: 0.1
70
+ planner_ranking: 0.1
71
+ proposal_reconstruction: 0.2
72
+ proposal_success: 0.1
73
+ proposal_ranking: 0.1
74
+ proposal_diversity: 0.05
75
+ role_swap_consistency: 0.05
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_clip.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage2_clip
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 11
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage2.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage2.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 11}
8
+ optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
9
+ trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
10
+ policy:
11
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
12
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
13
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
14
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
15
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
16
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
17
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
18
+ loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage2_dummy.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage2_dummy
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 21
5
+ defaults: []
6
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 96, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage2_dummy.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage2_dummy.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 21}
7
+ optim: {epochs: 4, batch_size: 16, num_workers: 4, lr: 0.001, weight_decay: 0.0001}
8
+ trainer: {policy_type: elastic_reveal, use_bf16: false, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: false, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
9
+ policy:
10
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 192, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: true}
11
+ fusion: {hidden_dim: 192, num_cameras: 3, num_layers: 2, num_heads: 4, ff_dim: 384, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
12
+ memory: {hidden_dim: 192, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 4, max_history_steps: 8}
13
+ decoder: {hidden_dim: 192, num_heads: 4, num_layers: 2, ff_dim: 384, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
14
+ reveal_head: {hidden_dim: 192, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 4, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
15
+ world_model: {hidden_dim: 192, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 4, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
16
+ planner: {hidden_dim: 192, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 4, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
17
+ loss_weights: {action: 1.0, phase: 0.15, arm_role: 0.2, support_mode: 0.15, corridor: 0.2, persistence: 0.1, disturbance: 0.1, world_model: 0.3, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.2, planner_risk: 0.1, planner_ranking: 0.1, proposal_reconstruction: 0.2, proposal_success: 0.1, proposal_ranking: 0.1, proposal_diversity: 0.05, role_swap_consistency: 0.05}
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/configs/proxy_interaction_r3d_stage3_clip_rgbd.yaml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage3_clip_rgbd
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 17
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data: {proxies: [foliage_proxy, bag_proxy, cloth_proxy], resolution: 224, dataset_version: reveal_proxy_v6_rgbd_elastic_state, train_episodes_per_proxy: 48, val_episodes_per_proxy: 16, train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3.pt, val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3.pt, rebuild_dataset: false, chunk_horizon: 8, rollout_horizon: 5, history_steps: 6, planner_candidates: 8, seed: 17}
8
+ optim: {epochs: 4, batch_size: 2, num_workers: 4, lr: 0.0003, weight_decay: 0.0001}
9
+ trainer: {policy_type: elastic_reveal, use_bf16: true, grad_clip_norm: 1.0, freeze_backbone: true, gradient_checkpointing: false, plan_during_train: true, plan_during_eval: true, support_mode_conditioning: true, planner_mode: trainable, use_depth: true, use_world_model: true, use_role_tokens: true, compute_equivariance_probe: true}
10
+ policy:
11
+ backbone: {model_name: openai/clip-vit-base-patch32, hidden_dim: 512, max_text_tokens: 32, freeze_backbone: true, gradient_checkpointing: false, use_dummy_backbone: false}
12
+ fusion: {hidden_dim: 512, num_cameras: 3, num_layers: 4, num_heads: 8, ff_dim: 2048, dropout: 0.1, proprio_dim: 32, proprio_tokens: 1}
13
+ memory: {hidden_dim: 512, action_dim: 14, history_steps: 6, scene_history_steps: 3, belief_history_steps: 8, num_layers: 2, dropout: 0.1, memory_bank_size: 4, scene_bank_size: 2, belief_bank_size: 2, num_heads: 8, max_history_steps: 8}
14
+ decoder: {hidden_dim: 512, num_heads: 8, num_layers: 4, ff_dim: 2048, dropout: 0.1, chunk_size: 8, action_dim: 14, arm_action_dim: 7, num_candidates: 8, num_phases: 5, num_arm_roles: 4, num_proposal_modes: 6, planner_top_k: 4}
15
+ reveal_head: {hidden_dim: 512, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, belief_map_size: 32, field_size: 16, num_heads: 8, predict_belief_map: true, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8}
16
+ world_model: {hidden_dim: 512, action_dim: 14, num_support_modes: 3, num_approach_templates: 32, rollout_horizon: 5, field_size: 16, num_heads: 8, num_phases: 5, num_arm_roles: 4, num_interaction_tokens: 8, belief_map_size: 32, predict_belief_map: true, scene_bank_size: 2, belief_bank_size: 2}
17
+ planner: {hidden_dim: 512, num_candidates: 8, action_dim: 14, num_support_modes: 3, utility_margin: 0.1, num_heads: 8, num_layers: 2, num_phases: 5, num_arm_roles: 4, top_k: 4}
18
+ loss_weights: {action: 1.0, phase: 0.1, arm_role: 0.15, support_mode: 0.1, corridor: 0.15, persistence: 0.05, disturbance: 0.05, world_model: 0.25, belief: 0.05, visibility: 0.05, clearance: 0.05, support_stability: 0.05, reocclusion: 0.05, occluder_contact: 0.05, grasp_affordance: 0.05, planner_success: 0.25, planner_risk: 0.1, planner_ranking: 0.2, proposal_reconstruction: 0.1, proposal_success: 0.15, proposal_ranking: 0.2, proposal_diversity: 0.05, role_swap_consistency: 0.05}