lsnu commited on
Commit
d5d49c1
·
verified ·
1 Parent(s): 572d64a

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. FILE_MANIFEST.txt +72 -0
  2. MODEL_INDEX.md +75 -0
  3. README.md +76 -0
  4. artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json +15 -0
  5. artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md +13 -0
  6. artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json +15 -0
  7. artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md +13 -0
  8. artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md +13 -0
  9. artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json +15 -0
  10. artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md +13 -0
  11. artifacts/outputs/interaction_debug/chunk_debug_trace.json +140 -0
  12. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json +7 -0
  13. artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json +174 -0
  14. artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml +127 -0
  15. artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json +7 -0
  16. artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json +174 -0
  17. artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json +573 -0
  18. artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml +125 -0
  19. artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json +7 -0
  20. artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json +346 -0
  21. artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json +16 -0
  22. artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json +28 -0
  23. artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md +25 -0
  24. artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json +41 -0
  25. artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md +37 -0
  26. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json +15 -0
  27. artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md +13 -0
  28. artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json +41 -0
  29. artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md +37 -0
  30. artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json +41 -0
  31. artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md +37 -0
  32. artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json +157 -0
  33. code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py +1 -0
  34. code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +45 -13
  35. code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py +112 -18
  36. code/reveal_vla_bimanual/models/backbones.py +1 -1
  37. code/reveal_vla_bimanual/models/observation_memory.py +38 -3
  38. code/reveal_vla_bimanual/models/policy.py +14 -2
  39. code/reveal_vla_bimanual/sim_reveal/dataset.py +17 -2
  40. code/reveal_vla_bimanual/sim_rlbench/dataset.py +27 -2
  41. code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml +125 -0
  42. code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml +125 -0
  43. code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml +129 -0
  44. code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml +127 -0
  45. code/reveal_vla_bimanual/train/losses.py +58 -6
  46. code/reveal_vla_bimanual/train/run_experiment.py +31 -0
  47. code/reveal_vla_bimanual/train/run_rlbench_experiment.py +1 -0
  48. code/reveal_vla_bimanual/train/smoke_checks.py +4 -0
  49. code/reveal_vla_bimanual/train/trainer.py +1 -0
  50. environment/validate_same_machine.sh +23 -0
FILE_MANIFEST.txt CHANGED
@@ -3,11 +3,17 @@
3
  ./MODEL_INDEX.md
4
  ./README.md
5
  ./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
 
6
  ./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
 
7
  ./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
 
8
  ./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
 
9
  ./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
 
10
  ./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
 
11
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
12
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
13
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
@@ -98,6 +104,68 @@
98
  ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
99
  ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
100
  ./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
102
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
103
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
@@ -256,6 +324,10 @@
256
  ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
257
  ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
258
  ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
 
 
 
 
259
  ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
260
  ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
261
  ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
 
3
  ./MODEL_INDEX.md
4
  ./README.md
5
  ./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
6
+ ./artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
7
  ./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
8
+ ./artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
9
  ./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
10
+ ./artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
11
  ./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
12
+ ./artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
13
  ./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
14
+ ./artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
15
  ./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
16
+ ./artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
17
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
18
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
19
  ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
 
104
  ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
105
  ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
106
  ./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
107
+ ./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json
108
+ ./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.md
109
+ ./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json
110
+ ./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md
111
+ ./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json
112
+ ./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md
113
+ ./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json
114
+ ./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md
115
+ ./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json
116
+ ./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.md
117
+ ./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json
118
+ ./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md
119
+ ./artifacts/outputs/interaction_debug/chunk_debug_trace.json
120
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
121
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/config_resolved.yaml
122
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json
123
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json
124
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt
125
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/config_resolved.yaml
126
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json
127
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json
128
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
129
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml
130
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json
131
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json
132
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json
133
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
134
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml
135
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json
136
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json
137
+ ./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json
138
+ ./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json
139
+ ./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md
140
+ ./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json
141
+ ./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md
142
+ ./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.json
143
+ ./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.md
144
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json
145
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.md
146
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json
147
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md
148
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.json
149
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.md
150
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.json
151
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.md
152
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json
153
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md
154
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json
155
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md
156
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.json
157
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.md
158
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.json
159
+ ./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.md
160
+ ./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json
161
+ ./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.md
162
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json
163
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.md
164
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json
165
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.md
166
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json
167
+ ./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.md
168
+ ./artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json
169
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
170
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
171
  ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
 
324
  ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
325
  ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
326
  ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
327
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml
328
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml
329
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml
330
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml
331
  ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
332
  ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
333
  ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
MODEL_INDEX.md CHANGED
@@ -40,6 +40,18 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
40
  - `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
41
  - smoke val dataset
42
  - `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  ## Raw Benchmark Reports
45
 
@@ -95,6 +107,63 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
95
  - interaction-state rolefix full benchmark JSON
96
  - `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ## RLBench Two-Robot Smoke Outputs
99
 
100
  - import smoke JSON
@@ -115,6 +184,12 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
115
  - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
116
  - RLBench open_drawer rollout eval Markdown
117
  - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
 
 
 
 
 
 
118
  - RLBench smoke dataset root
119
  - `artifacts/data/rlbench_smoke_open_drawer/`
120
 
 
40
  - `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
41
  - smoke val dataset
42
  - `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
43
+ - actionhist train dataset
44
+ - `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
45
+ - actionhist val dataset
46
+ - `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
47
+ - actionhist smoke train dataset
48
+ - `artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt`
49
+ - actionhist smoke val dataset
50
+ - `artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt`
51
+ - CLIP actionhist train dataset
52
+ - `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
53
+ - CLIP actionhist val dataset
54
+ - `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
55
 
56
  ## Raw Benchmark Reports
57
 
 
107
  - interaction-state rolefix full benchmark JSON
108
  - `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
109
 
110
+ ## Interaction Debug Outputs
111
+
112
+ - actionhist smoke checkpoint
113
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt`
114
+ - actionhist smoke metrics
115
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json`
116
+ - actionhist smoke diagnostics
117
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json`
118
+ - actionhist full checkpoint
119
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt`
120
+ - actionhist full metrics
121
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json`
122
+ - actionhist full diagnostics
123
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json`
124
+ - recency-oracleft full checkpoint
125
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt`
126
+ - recency-oracleft full metrics
127
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json`
128
+ - recency-oracleft full summary
129
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json`
130
+ - recency-oracleft diagnostics
131
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json`
132
+ - CLIP actionhist full checkpoint
133
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt`
134
+ - CLIP actionhist full metrics
135
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json`
136
+ - CLIP actionhist full summary
137
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json`
138
+ - CLIP actionhist diagnostics
139
+ - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json`
140
+ - corrected interaction benchmark JSON
141
+ - `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
142
+ - corrected baseline compare benchmark JSON
143
+ - `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
144
+ - corrected CLIP baseline benchmark JSON
145
+ - `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
146
+ - corrected CLIP interaction compare benchmark JSON
147
+ - `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
148
+ - corrected recency-oracleft compare benchmark JSON
149
+ - `artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json`
150
+ - actionhist ablation full benchmark JSON
151
+ - `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
152
+ - actionhist ablation no-interaction-head benchmark JSON
153
+ - `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
154
+ - actionhist ablation no-world-model benchmark JSON
155
+ - `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
156
+ - actionhist ablation no-planner benchmark JSON
157
+ - `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
158
+ - actionhist ablation no-role-tokens benchmark JSON
159
+ - `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
160
+ - actionhist ablation short-history benchmark JSON
161
+ - `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
162
+ - chunk debug trace
163
+ - `artifacts/outputs/interaction_debug/chunk_debug_trace.json`
164
+ - actionhist smoke checks
165
+ - `artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json`
166
+
167
  ## RLBench Two-Robot Smoke Outputs
168
 
169
  - import smoke JSON
 
184
  - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
185
  - RLBench open_drawer rollout eval Markdown
186
  - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
187
+ - RLBench open_drawer rollout eval rerun JSON
188
+ - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json`
189
+ - RLBench open_drawer rollout eval clipped JSON
190
+ - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json`
191
+ - RLBench open_drawer rollout eval recovered JSON
192
+ - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json`
193
  - RLBench smoke dataset root
194
  - `artifacts/data/rlbench_smoke_open_drawer/`
195
 
README.md CHANGED
@@ -166,8 +166,84 @@ The smoke output file is:
166
  - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
167
  - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
168
  - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
 
 
 
 
169
  - `artifacts/data/rlbench_smoke_open_drawer/`
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  ## Recreate The Same Software Layout
172
 
173
  Use:
 
166
  - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
167
  - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
168
  - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
169
+ - `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
170
+ - `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
171
+ - `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
172
+ - `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
173
  - `artifacts/data/rlbench_smoke_open_drawer/`
174
 
175
+ ## Raw Follow-Up Interaction Runs
176
+
177
+ ### Proxy Training Endpoints
178
+
179
+ | Run | Checkpoint | Final train total | Final val total | Metrics or summary |
180
+ | --- | --- | ---: | ---: | --- |
181
+ | interaction-state actionhist smoke | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt` | 1.229741208255291 | 1.1121365427970886 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json` |
182
+ | interaction-state actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt` | 0.7432626067979089 | 0.8655468797630735 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json` |
183
+ | interaction-state recency oracleft full | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt` | 0.9377426480253538 | 1.211510909928216 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json` |
184
+ | interaction-state CLIP actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt` | 1.2094011244349454 | 1.1205205075324527 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json` |
185
+
186
+ ### Proxy Benchmark Results With Committed-Chunk Evaluator
187
+
188
+ Source files:
189
+
190
+ - `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
191
+ - `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
192
+
193
+ | Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
194
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
195
+ | interaction | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
196
+ | backbone | 0.5555555555555555 | 0.4166666666666667 | 0.5833333333333334 | 0.6666666666666666 | 29.27436817354626 | 0.7935162136952082 | 0.07854136604136604 | 0.0 | 0.4006388829503622 |
197
+ | reveal | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 30.107333534293705 | 0.8134206715557311 | 0.05241552429052429 | 2.0996421982129196 | 0.42389288420478505 |
198
+
199
+ ### Frozen CLIP Proxy Benchmark Results With Committed-Chunk Evaluator
200
+
201
+ Source files:
202
+
203
+ - `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
204
+ - `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
205
+
206
+ | Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
207
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
208
+ | interaction_clip | 0.3055555555555556 | 0.2916666666666667 | 0.2916666666666667 | 0.3333333333333333 | 10.379729785852962 | 0.38910322284532917 | 0.026909722222222224 | 3.8014686041765726 | 0.392014082081409 |
209
+ | backbone_clip | 0.3333333333333333 | 0.2916666666666667 | 0.4166666666666667 | 0.2916666666666667 | 5.090670637786388 | 0.30186899772120845 | 0.013541666666666667 | 0.0 | 0.36051381931045196 |
210
+ | reveal_clip | 0.20833333333333334 | 0.20833333333333334 | 0.25 | 0.16666666666666666 | 48.426281129320465 | 0.8251730443702804 | 0.06718750000000001 | 0.9353624902194482 | 0.709741123020649 |
211
+
212
+ ### Proxy Diagnostics
213
+
214
+ | Run | Planner top-1 accuracy | Planner regret | Risk calibration MSE | Role collapse rate | Samples | JSON |
215
+ | --- | ---: | ---: | ---: | ---: | ---: | --- |
216
+ | interaction-state actionhist full | 0.1984732824427481 | 0.07150506228208542 | 0.009851997718214989 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json` |
217
+ | interaction-state recency oracleft full | 0.2824427480916031 | 0.24119873344898224 | 0.009003574028611183 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json` |
218
+ | interaction-state CLIP actionhist full | 0.3253968253968254 | 0.1786193549633026 | 0.01645304262638092 | 0.0 | 126 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json` |
219
+
220
+ ### Proxy Ablation Results For Actionhist Checkpoint
221
+
222
+ Source files:
223
+
224
+ - `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
225
+ - `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
226
+ - `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
227
+ - `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
228
+ - `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
229
+ - `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
230
+
231
+ | Ablation | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
232
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
233
+ | full_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
234
+ | no_interaction_head | 0.38888888888888884 | 0.16666666666666666 | 0.5 | 0.5 | 42.193298303418686 | 0.9207814501391517 | 0.016840277777777777 | 0.0 | 0.5719093395810988 |
235
+ | no_world_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
236
+ | no_planner | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
237
+ | no_role_tokens | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 33.69023843109608 | 0.8873094982571073 | 0.0 | 1.165569365169578 | 0.4185725698868434 |
238
+ | short_history | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 31.347230527136063 | 0.875287824206882 | 0.0 | 3.0816725173931325 | 0.459634010369579 |
239
+
240
+ ### RLBench Open Drawer Rollout Reruns
241
+
242
+ | Output | Raw values | File |
243
+ | --- | --- | --- |
244
+ | rollout rerun with path error | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.error="A path could not be found because the target is outside of workspace."`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json` |
245
+ | rollout rerun after display and path recovery fixes | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.path_recoveries=0`, `tasks.open_drawer.noop_fallbacks=0`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json` |
246
+
247
  ## Recreate The Same Software Layout
248
 
249
  Use:
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 32.94181125528283,
10
+ "corridor_availability": 0.8710797395971086,
11
+ "reocclusion_rate": 0.003125,
12
+ "persistence_horizon_mae": 1.1577362408331497,
13
+ "disturbance_cost": 0.42711537962572443
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 32.942
7
+ - corridor_availability: 0.871
8
+ - reocclusion_rate: 0.003
9
+ - persistence_horizon_mae: 1.158
10
+ - disturbance_cost: 0.427
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 33.69023843109608,
10
+ "corridor_availability": 0.8873094982571073,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.165569365169578,
13
+ "disturbance_cost": 0.4185725698868434
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 33.690
7
+ - corridor_availability: 0.887
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.166
10
+ - disturbance_cost: 0.419
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 32.942
7
+ - corridor_availability: 0.871
8
+ - reocclusion_rate: 0.003
9
+ - persistence_horizon_mae: 1.158
10
+ - disturbance_cost: 0.427
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5416666666666666,
9
+ "visibility_integral": 31.347230527136063,
10
+ "corridor_availability": 0.875287824206882,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 3.0816725173931325,
13
+ "disturbance_cost": 0.459634010369579
14
+ }
15
+ }
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.542
6
+ - visibility_integral: 31.347
7
+ - corridor_availability: 0.875
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 3.082
10
+ - disturbance_cost: 0.460
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/chunk_debug_trace.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "label": "rolefix_smoke_old",
4
+ "proxy": "foliage_proxy",
5
+ "best_candidate_index": 2,
6
+ "retrieve_sequence": [
7
+ 0.22872358560562134,
8
+ 0.7541071176528931,
9
+ 0.6303636431694031,
10
+ 0.4685209095478058
11
+ ],
12
+ "open_sequence": [
13
+ 1.2554516792297363,
14
+ 0.8975364565849304,
15
+ 0.5596103668212891,
16
+ 0.4779726266860962
17
+ ],
18
+ "template_sequence": [
19
+ 0.47042158246040344,
20
+ 0.6467143297195435,
21
+ 0.5085114240646362,
22
+ 0.478359580039978
23
+ ]
24
+ },
25
+ {
26
+ "label": "rolefix_smoke_old",
27
+ "proxy": "bag_proxy",
28
+ "best_candidate_index": 2,
29
+ "retrieve_sequence": [
30
+ 0.2374069094657898,
31
+ 0.7521002292633057,
32
+ 0.6305321455001831,
33
+ 0.4743019640445709
34
+ ],
35
+ "open_sequence": [
36
+ 1.257965326309204,
37
+ 0.896579384803772,
38
+ 0.5625595450401306,
39
+ 0.4776189923286438
40
+ ],
41
+ "template_sequence": [
42
+ 0.47550493478775024,
43
+ 0.6366342306137085,
44
+ 0.5038254261016846,
45
+ 0.4769764542579651
46
+ ]
47
+ },
48
+ {
49
+ "label": "rolefix_smoke_old",
50
+ "proxy": "cloth_proxy",
51
+ "best_candidate_index": 2,
52
+ "retrieve_sequence": [
53
+ 0.24050980806350708,
54
+ 0.7626074552536011,
55
+ 0.6310772895812988,
56
+ 0.47661182284355164
57
+ ],
58
+ "open_sequence": [
59
+ 1.2510802745819092,
60
+ 0.8940063714981079,
61
+ 0.5478025078773499,
62
+ 0.470864862203598
63
+ ],
64
+ "template_sequence": [
65
+ 0.46881186962127686,
66
+ 0.6378085613250732,
67
+ 0.504069447517395,
68
+ 0.4773429036140442
69
+ ]
70
+ },
71
+ {
72
+ "label": "actionhist_smoke_new",
73
+ "proxy": "foliage_proxy",
74
+ "best_candidate_index": 0,
75
+ "retrieve_sequence": [
76
+ 0.23512092232704163,
77
+ 0.5730606317520142,
78
+ 0.5967459678649902,
79
+ 0.4731495678424835
80
+ ],
81
+ "open_sequence": [
82
+ 0.6600309014320374,
83
+ 0.43168342113494873,
84
+ 0.15955285727977753,
85
+ -0.09488785266876221
86
+ ],
87
+ "template_sequence": [
88
+ -0.017185214906930923,
89
+ 0.017828624695539474,
90
+ 0.013375137001276016,
91
+ -0.01390126720070839
92
+ ]
93
+ },
94
+ {
95
+ "label": "actionhist_smoke_new",
96
+ "proxy": "bag_proxy",
97
+ "best_candidate_index": 0,
98
+ "retrieve_sequence": [
99
+ 0.2351658046245575,
100
+ 0.572963535785675,
101
+ 0.5971102714538574,
102
+ 0.4758695065975189
103
+ ],
104
+ "open_sequence": [
105
+ 0.6608113646507263,
106
+ 0.4318099617958069,
107
+ 0.16285540163516998,
108
+ -0.09124644100666046
109
+ ],
110
+ "template_sequence": [
111
+ -0.018705788999795914,
112
+ 0.016191553324460983,
113
+ 0.012765157967805862,
114
+ -0.016781020909547806
115
+ ]
116
+ },
117
+ {
118
+ "label": "actionhist_smoke_new",
119
+ "proxy": "cloth_proxy",
120
+ "best_candidate_index": 0,
121
+ "retrieve_sequence": [
122
+ 0.23625126481056213,
123
+ 0.5730390548706055,
124
+ 0.59672611951828,
125
+ 0.4727664887905121
126
+ ],
127
+ "open_sequence": [
128
+ 0.6570022106170654,
129
+ 0.4338717460632324,
130
+ 0.15934017300605774,
131
+ -0.09580504149198532
132
+ ],
133
+ "template_sequence": [
134
+ -0.028799299150705338,
135
+ 0.006899785250425339,
136
+ 0.004223380237817764,
137
+ -0.026467766612768173
138
+ ]
139
+ }
140
+ ]
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.35294117647058826,
3
+ "planner_regret": 0.017080334946513176,
4
+ "risk_calibration_mse": 0.00906219333410263,
5
+ "role_collapse_rate": 0.0,
6
+ "num_samples": 17
7
+ }
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.23455160359541574,
6
+ "arm_role": 1.2069129049777985,
7
+ "belief": 0.48631568253040314,
8
+ "corridor": 0.5782903432846069,
9
+ "disturbance": 0.17786112676064172,
10
+ "persistence": 1.815186083316803,
11
+ "phase": 1.3141004741191864,
12
+ "planner_ranking": 0.15019067749381065,
13
+ "planner_risk": 0.05527863139286637,
14
+ "planner_success": 0.6984443863232931,
15
+ "proposal_ranking": 0.10006876041491826,
16
+ "proposal_reconstruction": 0.3053521513938904,
17
+ "proposal_success": 0.6853575110435486,
18
+ "reocclusion": 0.6961739559968313,
19
+ "support_mode": 0.8659396668275198,
20
+ "total": 2.116169492403666,
21
+ "uncertainty": 0.6137877206007639,
22
+ "world_model": 2.6161614656448364
23
+ },
24
+ "val": {
25
+ "action": 0.07151262213786443,
26
+ "arm_role": 0.6764164765675863,
27
+ "belief": 0.36398513118426007,
28
+ "corridor": 0.4683004717032115,
29
+ "disturbance": 0.102281058828036,
30
+ "persistence": 2.114008625348409,
31
+ "phase": 0.9027760624885559,
32
+ "planner_ranking": 0.09026545286178589,
33
+ "planner_risk": 0.02189356298185885,
34
+ "planner_success": 0.6435574690500895,
35
+ "proposal_ranking": 0.16597949465115866,
36
+ "proposal_reconstruction": 0.11828663945198059,
37
+ "proposal_success": 0.6095772981643677,
38
+ "reocclusion": 0.7000808914502462,
39
+ "support_mode": 0.6359505653381348,
40
+ "total": 1.4241125186284382,
41
+ "uncertainty": 0.5725147326787313,
42
+ "world_model": 1.5686078071594238
43
+ }
44
+ },
45
+ {
46
+ "epoch": 1,
47
+ "train": {
48
+ "action": 0.07887393422424793,
49
+ "arm_role": 0.4496926615635554,
50
+ "belief": 0.28958051403363544,
51
+ "corridor": 0.3720829039812088,
52
+ "disturbance": 0.07337014439205329,
53
+ "persistence": 1.7143786152203877,
54
+ "phase": 0.777398000160853,
55
+ "planner_ranking": 0.14400668690601984,
56
+ "planner_risk": 0.016193983455499012,
57
+ "planner_success": 0.6361206471920013,
58
+ "proposal_ranking": 0.11434461300571759,
59
+ "proposal_reconstruction": 0.11045620342095692,
60
+ "proposal_success": 0.6260021726290385,
61
+ "reocclusion": 0.6881168782711029,
62
+ "support_mode": 0.784478078285853,
63
+ "total": 1.2963247100512187,
64
+ "uncertainty": 0.5047676662604014,
65
+ "world_model": 1.4695208072662354
66
+ },
67
+ "val": {
68
+ "action": 0.05061729749043783,
69
+ "arm_role": 0.2217621256907781,
70
+ "belief": 0.19144149124622345,
71
+ "corridor": 0.33698633313179016,
72
+ "disturbance": 0.019655164952079456,
73
+ "persistence": 2.276299834251404,
74
+ "phase": 0.7830212910970052,
75
+ "planner_ranking": 0.10330406576395035,
76
+ "planner_risk": 0.012047629677302515,
77
+ "planner_success": 0.46883141497770947,
78
+ "proposal_ranking": 0.16881480813026428,
79
+ "proposal_reconstruction": 0.08914910753568013,
80
+ "proposal_success": 0.5338547825813293,
81
+ "reocclusion": 0.7235203385353088,
82
+ "support_mode": 0.6643315752347311,
83
+ "total": 1.1495283842086792,
84
+ "uncertainty": 0.36858222881952923,
85
+ "world_model": 1.2773457169532776
86
+ }
87
+ },
88
+ {
89
+ "epoch": 2,
90
+ "train": {
91
+ "action": 0.0648206224044164,
92
+ "arm_role": 0.1347198486328125,
93
+ "belief": 0.14715169121821722,
94
+ "corridor": 0.2695915202299754,
95
+ "disturbance": 0.010349508646565178,
96
+ "persistence": 1.7063330213228862,
97
+ "phase": 0.726386179526647,
98
+ "planner_ranking": 0.11673471455772717,
99
+ "planner_risk": 0.009400874686737856,
100
+ "planner_success": 0.6698183119297028,
101
+ "proposal_ranking": 0.10080837706724803,
102
+ "proposal_reconstruction": 0.10316941390434901,
103
+ "proposal_success": 0.6286104818185171,
104
+ "reocclusion": 0.6681396464506785,
105
+ "support_mode": 0.6904432475566864,
106
+ "total": 1.1366514563560486,
107
+ "uncertainty": 0.27301351229349774,
108
+ "world_model": 1.372689664363861
109
+ },
110
+ "val": {
111
+ "action": 0.05020085473855337,
112
+ "arm_role": 0.054195716977119446,
113
+ "belief": 0.12719580034414926,
114
+ "corridor": 0.33358681698640186,
115
+ "disturbance": 0.0010723281108463805,
116
+ "persistence": 2.3125662008921304,
117
+ "phase": 0.7737143238385519,
118
+ "planner_ranking": 0.12118598818778992,
119
+ "planner_risk": 0.008284708329786858,
120
+ "planner_success": 0.6051804622014364,
121
+ "proposal_ranking": 0.1250954990585645,
122
+ "proposal_reconstruction": 0.08273230989774068,
123
+ "proposal_success": 0.5201686124006907,
124
+ "reocclusion": 0.6809982657432556,
125
+ "support_mode": 0.5777197976907095,
126
+ "total": 1.1349389950434368,
127
+ "uncertainty": 0.17320589224497476,
128
+ "world_model": 1.3453394174575806
129
+ }
130
+ },
131
+ {
132
+ "epoch": 3,
133
+ "train": {
134
+ "action": 0.055803545440236725,
135
+ "arm_role": 0.033050537109375,
136
+ "belief": 0.11564020191629727,
137
+ "corridor": 0.256190650165081,
138
+ "disturbance": 0.002490642402941982,
139
+ "persistence": 1.711540162563324,
140
+ "phase": 0.681098093589147,
141
+ "planner_ranking": 0.10920613507429759,
142
+ "planner_risk": 0.010532331497718891,
143
+ "planner_success": 0.6514300604661306,
144
+ "proposal_ranking": 0.08523762846986453,
145
+ "proposal_reconstruction": 0.08513934289415677,
146
+ "proposal_success": 0.6457574268182119,
147
+ "reocclusion": 0.6691893935203552,
148
+ "support_mode": 0.6864420572916666,
149
+ "total": 1.0746445059776306,
150
+ "uncertainty": 0.1379331536591053,
151
+ "world_model": 1.3261052171389263
152
+ },
153
+ "val": {
154
+ "action": 0.04372807095448176,
155
+ "arm_role": 0.014572909101843834,
156
+ "belief": 0.12325718998908997,
157
+ "corridor": 0.344586377342542,
158
+ "disturbance": 0.002586025783481697,
159
+ "persistence": 2.2659462292989097,
160
+ "phase": 0.712437629699707,
161
+ "planner_ranking": 0.1231433277328809,
162
+ "planner_risk": 0.00803024492536982,
163
+ "planner_success": 0.5179306268692017,
164
+ "proposal_ranking": 0.11125253637631734,
165
+ "proposal_reconstruction": 0.07622659454743068,
166
+ "proposal_success": 0.5146457950274149,
167
+ "reocclusion": 0.6703451077143351,
168
+ "support_mode": 0.6071783800919851,
169
+ "total": 1.0756589968999226,
170
+ "uncertainty": 0.10349630812803905,
171
+ "world_model": 1.2806402842203777
172
+ }
173
+ }
174
+ ]
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_clip_actionhist
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ train_episodes_per_proxy: 48
14
+ val_episodes_per_proxy: 16
15
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
16
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
17
+ rebuild_dataset: false
18
+ chunk_horizon: 8
19
+ rollout_horizon: 5
20
+ history_steps: 6
21
+ planner_candidates: 8
22
+ seed: 7
23
+ optim:
24
+ epochs: 4
25
+ batch_size: 2
26
+ num_workers: 0
27
+ lr: 0.0003
28
+ weight_decay: 0.0001
29
+ trainer:
30
+ policy_type: interaction_state
31
+ use_bf16: true
32
+ grad_clip_norm: 1.0
33
+ freeze_backbone: true
34
+ gradient_checkpointing: false
35
+ plan_during_train: true
36
+ plan_during_eval: true
37
+ support_mode_conditioning: true
38
+ planner_mode: trainable
39
+ policy:
40
+ backbone:
41
+ model_name: openai/clip-vit-base-patch32
42
+ hidden_dim: 512
43
+ max_text_tokens: 32
44
+ freeze_backbone: true
45
+ gradient_checkpointing: false
46
+ use_dummy_backbone: false
47
+ fusion:
48
+ hidden_dim: 512
49
+ num_cameras: 3
50
+ num_layers: 4
51
+ num_heads: 8
52
+ ff_dim: 2048
53
+ dropout: 0.1
54
+ proprio_dim: 32
55
+ proprio_tokens: 1
56
+ memory:
57
+ hidden_dim: 512
58
+ action_dim: 14
59
+ history_steps: 6
60
+ num_layers: 2
61
+ dropout: 0.1
62
+ memory_bank_size: 4
63
+ num_heads: 8
64
+ max_history_steps: 8
65
+ decoder:
66
+ hidden_dim: 512
67
+ num_heads: 8
68
+ num_layers: 4
69
+ ff_dim: 2048
70
+ dropout: 0.1
71
+ chunk_size: 8
72
+ action_dim: 14
73
+ arm_action_dim: 7
74
+ num_candidates: 8
75
+ num_phases: 5
76
+ num_arm_roles: 4
77
+ reveal_head:
78
+ hidden_dim: 512
79
+ num_support_modes: 3
80
+ num_approach_templates: 32
81
+ rollout_horizon: 5
82
+ belief_map_size: 32
83
+ field_size: 16
84
+ num_heads: 8
85
+ predict_belief_map: true
86
+ num_phases: 5
87
+ num_arm_roles: 4
88
+ num_interaction_tokens: 8
89
+ world_model:
90
+ hidden_dim: 512
91
+ action_dim: 14
92
+ num_support_modes: 3
93
+ num_approach_templates: 32
94
+ rollout_horizon: 5
95
+ field_size: 16
96
+ num_heads: 8
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ belief_map_size: 32
101
+ predict_belief_map: true
102
+ planner:
103
+ hidden_dim: 512
104
+ num_candidates: 8
105
+ action_dim: 14
106
+ num_support_modes: 3
107
+ utility_margin: 0.1
108
+ num_heads: 8
109
+ num_layers: 2
110
+ num_phases: 5
111
+ num_arm_roles: 4
112
+ loss_weights:
113
+ action: 1.0
114
+ phase: 0.1
115
+ arm_role: 0.15
116
+ support_mode: 0.1
117
+ corridor: 0.15
118
+ persistence: 0.05
119
+ disturbance: 0.05
120
+ world_model: 0.2
121
+ belief: 0.05
122
+ planner_success: 0.25
123
+ planner_risk: 0.1
124
+ planner_ranking: 0.2
125
+ proposal_reconstruction: 0.1
126
+ proposal_success: 0.15
127
+ proposal_ranking: 0.2
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.3253968253968254,
3
+ "planner_regret": 0.1786193549633026,
4
+ "risk_calibration_mse": 0.01645304262638092,
5
+ "role_collapse_rate": 0.0,
6
+ "num_samples": 126
7
+ }
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.17748002509016017,
6
+ "arm_role": 0.01832497369556527,
7
+ "belief": 0.12835299933612035,
8
+ "corridor": 0.2547702425456952,
9
+ "disturbance": 0.00850862705773346,
10
+ "persistence": 4.974573742500774,
11
+ "phase": 0.7463235106143652,
12
+ "planner_ranking": 1.3405994254881175,
13
+ "planner_risk": 0.024703218532160547,
14
+ "planner_success": 0.7777972318115035,
15
+ "proposal_ranking": 1.165930202494117,
16
+ "proposal_reconstruction": 0.2531185241035766,
17
+ "proposal_success": 0.6786430877540748,
18
+ "reocclusion": 0.7147265204584411,
19
+ "support_mode": 0.7602155595549738,
20
+ "total": 2.0788989903415063,
21
+ "uncertainty": 0.03309597126671469,
22
+ "world_model": 3.071348061112209
23
+ },
24
+ "val": {
25
+ "action": 0.03192901705938672,
26
+ "arm_role": 6.15250448592835e-06,
27
+ "belief": 0.10559089872099105,
28
+ "corridor": 0.23193429670636617,
29
+ "disturbance": 0.0022747389350750756,
30
+ "persistence": 3.85837465619284,
31
+ "phase": 0.6875752177503374,
32
+ "planner_ranking": 1.1088495595114571,
33
+ "planner_risk": 0.018587306145549057,
34
+ "planner_success": 0.6127710470131466,
35
+ "proposal_ranking": 1.1232511202494304,
36
+ "proposal_reconstruction": 0.08394021162438015,
37
+ "proposal_success": 0.681461288815453,
38
+ "reocclusion": 0.6769484205851479,
39
+ "support_mode": 0.6654504603809781,
40
+ "total": 1.5210873153474596,
41
+ "uncertainty": 0.011785898017623121,
42
+ "world_model": 1.9750548638994732
43
+ }
44
+ },
45
+ {
46
+ "epoch": 1,
47
+ "train": {
48
+ "action": 0.030109174476439102,
49
+ "arm_role": 8.612091004536414e-06,
50
+ "belief": 0.104316227781679,
51
+ "corridor": 0.23850143234689197,
52
+ "disturbance": 0.0025595128212472823,
53
+ "persistence": 3.9934506887540766,
54
+ "phase": 0.6901740428664922,
55
+ "planner_ranking": 1.239893207687358,
56
+ "planner_risk": 0.026462018369155793,
57
+ "planner_success": 0.664632208528319,
58
+ "proposal_ranking": 1.1259761543174065,
59
+ "proposal_reconstruction": 0.08132225903072907,
60
+ "proposal_success": 0.6764243753792728,
61
+ "reocclusion": 0.6790863540784227,
62
+ "support_mode": 0.6789359047774869,
63
+ "total": 1.550120969093283,
64
+ "uncertainty": 0.007208701525449128,
65
+ "world_model": 1.8854006223029491
66
+ },
67
+ "val": {
68
+ "action": 0.02197206175575654,
69
+ "arm_role": 2.089118947517977e-05,
70
+ "belief": 0.09741538857656812,
71
+ "corridor": 0.22761633885758265,
72
+ "disturbance": 0.0017140347070323067,
73
+ "persistence": 3.6565530148763505,
74
+ "phase": 0.6668311646060338,
75
+ "planner_ranking": 1.1634496355813646,
76
+ "planner_risk": 0.047890776559518324,
77
+ "planner_success": 0.5928089713293409,
78
+ "proposal_ranking": 1.1224727725225783,
79
+ "proposal_reconstruction": 0.06971718163953887,
80
+ "proposal_success": 0.6724110945822701,
81
+ "reocclusion": 0.6611922624565306,
82
+ "support_mode": 0.6766654224622817,
83
+ "total": 1.4845811980111259,
84
+ "uncertainty": 0.004251384046963519,
85
+ "world_model": 1.875271028942532
86
+ }
87
+ },
88
+ {
89
+ "epoch": 2,
90
+ "train": {
91
+ "action": 0.02331933839470928,
92
+ "arm_role": 8.285201656880802e-06,
93
+ "belief": 0.1041115006695243,
94
+ "corridor": 0.2380418391820258,
95
+ "disturbance": 0.002577872130260731,
96
+ "persistence": 3.555448654902543,
97
+ "phase": 0.6753773314790575,
98
+ "planner_ranking": 1.1668821538930163,
99
+ "planner_risk": 0.020309378087023242,
100
+ "planner_success": 0.623614322296612,
101
+ "proposal_ranking": 1.1245252312165905,
102
+ "proposal_reconstruction": 0.07289492924019929,
103
+ "proposal_success": 0.6749192248464255,
104
+ "reocclusion": 0.6692662537097931,
105
+ "support_mode": 0.6756738792539267,
106
+ "total": 1.4613653153025044,
107
+ "uncertainty": 0.012397505843296725,
108
+ "world_model": 1.7293687263084332
109
+ },
110
+ "val": {
111
+ "action": 0.03152821023785879,
112
+ "arm_role": 1.913968098564048e-06,
113
+ "belief": 0.10549203495657633,
114
+ "corridor": 0.20762673824552505,
115
+ "disturbance": 0.0014280516678275214,
116
+ "persistence": 2.0710838323547724,
117
+ "phase": 0.6628126601378123,
118
+ "planner_ranking": 1.0928319522312708,
119
+ "planner_risk": 0.021120590453464833,
120
+ "planner_success": 0.5570865495810433,
121
+ "proposal_ranking": 1.1183055109447904,
122
+ "proposal_reconstruction": 0.08380144739907885,
123
+ "proposal_success": 0.6772379392669314,
124
+ "reocclusion": 0.6509462926122878,
125
+ "support_mode": 0.6650945194183834,
126
+ "total": 1.3528291233002194,
127
+ "uncertainty": 0.0025819382726839403,
128
+ "world_model": 1.7092195824971275
129
+ }
130
+ },
131
+ {
132
+ "epoch": 3,
133
+ "train": {
134
+ "action": 0.021615957470698506,
135
+ "arm_role": 9.251202588306048e-07,
136
+ "belief": 0.10970319874818725,
137
+ "corridor": 0.2036819358732704,
138
+ "disturbance": 0.002751460597729129,
139
+ "persistence": 1.0053820329420464,
140
+ "phase": 0.4392661486620678,
141
+ "planner_ranking": 1.1170655027109915,
142
+ "planner_risk": 0.023540541935585323,
143
+ "planner_success": 0.574678816408387,
144
+ "proposal_ranking": 1.1232363391297027,
145
+ "proposal_reconstruction": 0.07163417897143289,
146
+ "proposal_success": 0.6759519848523964,
147
+ "reocclusion": 0.3594565280497986,
148
+ "support_mode": 0.1658484423971925,
149
+ "total": 1.2094011244349454,
150
+ "uncertainty": 0.001485606099231278,
151
+ "world_model": 1.6549255024076133
152
+ },
153
+ "val": {
154
+ "action": 0.01307902658092124,
155
+ "arm_role": 3.7938821602466983e-07,
156
+ "belief": 0.10557046154188732,
157
+ "corridor": 0.18899264949418249,
158
+ "disturbance": 0.003063943787498237,
159
+ "persistence": 0.6038030874915421,
160
+ "phase": 0.19549169234694944,
161
+ "planner_ranking": 1.1149483919143677,
162
+ "planner_risk": 0.01645888195424858,
163
+ "planner_success": 0.5231598180437845,
164
+ "proposal_ranking": 1.1176083068999032,
165
+ "proposal_reconstruction": 0.05967588533484747,
166
+ "proposal_success": 0.6721902480201115,
167
+ "reocclusion": 0.1391045902338293,
168
+ "support_mode": 0.0005700885616649415,
169
+ "total": 1.1205205075324527,
170
+ "uncertainty": 0.0005439088721946714,
171
+ "world_model": 1.6766679949230618
172
+ }
173
+ }
174
+ ]
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json ADDED
@@ -0,0 +1,573 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_state_clip_actionhist",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt",
5
+ "final_train_total": 1.2094011244349454,
6
+ "final_val_total": 1.1205205075324527,
7
+ "num_train_samples": 382,
8
+ "num_val_samples": 126,
9
+ "planner_mode": "trainable",
10
+ "frozen_modules": [],
11
+ "init_info": {
12
+ "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
13
+ "loaded_keys": 467,
14
+ "skipped_shape_mismatch_keys": [
15
+ "memory.gru.weight_ih_l0",
16
+ "memory.gru.weight_hh_l0",
17
+ "memory.gru.bias_ih_l0",
18
+ "memory.gru.bias_hh_l0",
19
+ "decoder.actor_role_bias",
20
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
21
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
22
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
23
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
24
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
25
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
26
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
27
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
28
+ "decoder.revealer_decoder.layers.0.linear1.weight",
29
+ "decoder.revealer_decoder.layers.0.linear1.bias",
30
+ "decoder.revealer_decoder.layers.0.linear2.weight",
31
+ "decoder.revealer_decoder.layers.0.linear2.bias",
32
+ "decoder.revealer_decoder.layers.0.norm1.weight",
33
+ "decoder.revealer_decoder.layers.0.norm1.bias",
34
+ "decoder.revealer_decoder.layers.0.norm2.weight",
35
+ "decoder.revealer_decoder.layers.0.norm2.bias",
36
+ "decoder.revealer_decoder.layers.0.norm3.weight",
37
+ "decoder.revealer_decoder.layers.0.norm3.bias",
38
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
39
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
40
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
41
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
42
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
43
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
44
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
45
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
46
+ "decoder.revealer_decoder.layers.1.linear1.weight",
47
+ "decoder.revealer_decoder.layers.1.linear1.bias",
48
+ "decoder.revealer_decoder.layers.1.linear2.weight",
49
+ "decoder.revealer_decoder.layers.1.linear2.bias",
50
+ "decoder.revealer_decoder.layers.1.norm1.weight",
51
+ "decoder.revealer_decoder.layers.1.norm1.bias",
52
+ "decoder.revealer_decoder.layers.1.norm2.weight",
53
+ "decoder.revealer_decoder.layers.1.norm2.bias",
54
+ "decoder.revealer_decoder.layers.1.norm3.weight",
55
+ "decoder.revealer_decoder.layers.1.norm3.bias",
56
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
57
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
58
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
59
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
60
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
61
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
62
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
63
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
64
+ "decoder.revealer_decoder.layers.2.linear1.weight",
65
+ "decoder.revealer_decoder.layers.2.linear1.bias",
66
+ "decoder.revealer_decoder.layers.2.linear2.weight",
67
+ "decoder.revealer_decoder.layers.2.linear2.bias",
68
+ "decoder.revealer_decoder.layers.2.norm1.weight",
69
+ "decoder.revealer_decoder.layers.2.norm1.bias",
70
+ "decoder.revealer_decoder.layers.2.norm2.weight",
71
+ "decoder.revealer_decoder.layers.2.norm2.bias",
72
+ "decoder.revealer_decoder.layers.2.norm3.weight",
73
+ "decoder.revealer_decoder.layers.2.norm3.bias",
74
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
75
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
76
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
77
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
78
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
79
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
80
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
81
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
82
+ "decoder.revealer_decoder.layers.3.linear1.weight",
83
+ "decoder.revealer_decoder.layers.3.linear1.bias",
84
+ "decoder.revealer_decoder.layers.3.linear2.weight",
85
+ "decoder.revealer_decoder.layers.3.linear2.bias",
86
+ "decoder.revealer_decoder.layers.3.norm1.weight",
87
+ "decoder.revealer_decoder.layers.3.norm1.bias",
88
+ "decoder.revealer_decoder.layers.3.norm2.weight",
89
+ "decoder.revealer_decoder.layers.3.norm2.bias",
90
+ "decoder.revealer_decoder.layers.3.norm3.weight",
91
+ "decoder.revealer_decoder.layers.3.norm3.bias",
92
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
93
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
94
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
95
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
96
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
97
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
98
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
99
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
100
+ "decoder.actor_decoder.layers.0.linear1.weight",
101
+ "decoder.actor_decoder.layers.0.linear1.bias",
102
+ "decoder.actor_decoder.layers.0.linear2.weight",
103
+ "decoder.actor_decoder.layers.0.linear2.bias",
104
+ "decoder.actor_decoder.layers.0.norm1.weight",
105
+ "decoder.actor_decoder.layers.0.norm1.bias",
106
+ "decoder.actor_decoder.layers.0.norm2.weight",
107
+ "decoder.actor_decoder.layers.0.norm2.bias",
108
+ "decoder.actor_decoder.layers.0.norm3.weight",
109
+ "decoder.actor_decoder.layers.0.norm3.bias",
110
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
111
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
112
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
113
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
114
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
115
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
116
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
117
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
118
+ "decoder.actor_decoder.layers.1.linear1.weight",
119
+ "decoder.actor_decoder.layers.1.linear1.bias",
120
+ "decoder.actor_decoder.layers.1.linear2.weight",
121
+ "decoder.actor_decoder.layers.1.linear2.bias",
122
+ "decoder.actor_decoder.layers.1.norm1.weight",
123
+ "decoder.actor_decoder.layers.1.norm1.bias",
124
+ "decoder.actor_decoder.layers.1.norm2.weight",
125
+ "decoder.actor_decoder.layers.1.norm2.bias",
126
+ "decoder.actor_decoder.layers.1.norm3.weight",
127
+ "decoder.actor_decoder.layers.1.norm3.bias",
128
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
129
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
130
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
131
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
132
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
133
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
134
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
135
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
136
+ "decoder.actor_decoder.layers.2.linear1.weight",
137
+ "decoder.actor_decoder.layers.2.linear1.bias",
138
+ "decoder.actor_decoder.layers.2.linear2.weight",
139
+ "decoder.actor_decoder.layers.2.linear2.bias",
140
+ "decoder.actor_decoder.layers.2.norm1.weight",
141
+ "decoder.actor_decoder.layers.2.norm1.bias",
142
+ "decoder.actor_decoder.layers.2.norm2.weight",
143
+ "decoder.actor_decoder.layers.2.norm2.bias",
144
+ "decoder.actor_decoder.layers.2.norm3.weight",
145
+ "decoder.actor_decoder.layers.2.norm3.bias",
146
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
147
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
148
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
149
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
150
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
151
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
152
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
153
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
154
+ "decoder.actor_decoder.layers.3.linear1.weight",
155
+ "decoder.actor_decoder.layers.3.linear1.bias",
156
+ "decoder.actor_decoder.layers.3.linear2.weight",
157
+ "decoder.actor_decoder.layers.3.linear2.bias",
158
+ "decoder.actor_decoder.layers.3.norm1.weight",
159
+ "decoder.actor_decoder.layers.3.norm1.bias",
160
+ "decoder.actor_decoder.layers.3.norm2.weight",
161
+ "decoder.actor_decoder.layers.3.norm2.bias",
162
+ "decoder.actor_decoder.layers.3.norm3.weight",
163
+ "decoder.actor_decoder.layers.3.norm3.bias",
164
+ "decoder.revealer_mean.weight",
165
+ "decoder.revealer_mean.bias",
166
+ "decoder.revealer_log_std.weight",
167
+ "decoder.revealer_log_std.bias",
168
+ "decoder.actor_mean.weight",
169
+ "decoder.actor_mean.bias",
170
+ "decoder.actor_log_std.weight",
171
+ "decoder.actor_log_std.bias",
172
+ "decoder.proposal_score.1.weight",
173
+ "decoder.proposal_score.1.bias"
174
+ ],
175
+ "missing_keys": [
176
+ "memory.position_embedding",
177
+ "memory.bank_queries",
178
+ "memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
179
+ "memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
180
+ "memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
181
+ "memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
182
+ "memory.sequence_encoder.layers.0.linear1.weight",
183
+ "memory.sequence_encoder.layers.0.linear1.bias",
184
+ "memory.sequence_encoder.layers.0.linear2.weight",
185
+ "memory.sequence_encoder.layers.0.linear2.bias",
186
+ "memory.sequence_encoder.layers.0.norm1.weight",
187
+ "memory.sequence_encoder.layers.0.norm1.bias",
188
+ "memory.sequence_encoder.layers.0.norm2.weight",
189
+ "memory.sequence_encoder.layers.0.norm2.bias",
190
+ "memory.sequence_encoder.layers.1.self_attn.in_proj_weight",
191
+ "memory.sequence_encoder.layers.1.self_attn.in_proj_bias",
192
+ "memory.sequence_encoder.layers.1.self_attn.out_proj.weight",
193
+ "memory.sequence_encoder.layers.1.self_attn.out_proj.bias",
194
+ "memory.sequence_encoder.layers.1.linear1.weight",
195
+ "memory.sequence_encoder.layers.1.linear1.bias",
196
+ "memory.sequence_encoder.layers.1.linear2.weight",
197
+ "memory.sequence_encoder.layers.1.linear2.bias",
198
+ "memory.sequence_encoder.layers.1.norm1.weight",
199
+ "memory.sequence_encoder.layers.1.norm1.bias",
200
+ "memory.sequence_encoder.layers.1.norm2.weight",
201
+ "memory.sequence_encoder.layers.1.norm2.bias",
202
+ "memory.bank_attention.in_proj_weight",
203
+ "memory.bank_attention.in_proj_bias",
204
+ "memory.bank_attention.out_proj.weight",
205
+ "memory.bank_attention.out_proj.bias",
206
+ "memory.bank_mlp.0.weight",
207
+ "memory.bank_mlp.0.bias",
208
+ "memory.bank_mlp.1.weight",
209
+ "memory.bank_mlp.1.bias",
210
+ "memory.bank_mlp.3.weight",
211
+ "memory.bank_mlp.3.bias",
212
+ "memory.action_proj.0.weight",
213
+ "memory.action_proj.0.bias",
214
+ "memory.action_proj.1.weight",
215
+ "memory.action_proj.1.bias",
216
+ "decoder.right_decoder.layers.0.self_attn.in_proj_weight",
217
+ "decoder.right_decoder.layers.0.self_attn.in_proj_bias",
218
+ "decoder.right_decoder.layers.0.self_attn.out_proj.weight",
219
+ "decoder.right_decoder.layers.0.self_attn.out_proj.bias",
220
+ "decoder.right_decoder.layers.0.multihead_attn.in_proj_weight",
221
+ "decoder.right_decoder.layers.0.multihead_attn.in_proj_bias",
222
+ "decoder.right_decoder.layers.0.multihead_attn.out_proj.weight",
223
+ "decoder.right_decoder.layers.0.multihead_attn.out_proj.bias",
224
+ "decoder.right_decoder.layers.0.linear1.weight",
225
+ "decoder.right_decoder.layers.0.linear1.bias",
226
+ "decoder.right_decoder.layers.0.linear2.weight",
227
+ "decoder.right_decoder.layers.0.linear2.bias",
228
+ "decoder.right_decoder.layers.0.norm1.weight",
229
+ "decoder.right_decoder.layers.0.norm1.bias",
230
+ "decoder.right_decoder.layers.0.norm2.weight",
231
+ "decoder.right_decoder.layers.0.norm2.bias",
232
+ "decoder.right_decoder.layers.0.norm3.weight",
233
+ "decoder.right_decoder.layers.0.norm3.bias",
234
+ "decoder.right_decoder.layers.1.self_attn.in_proj_weight",
235
+ "decoder.right_decoder.layers.1.self_attn.in_proj_bias",
236
+ "decoder.right_decoder.layers.1.self_attn.out_proj.weight",
237
+ "decoder.right_decoder.layers.1.self_attn.out_proj.bias",
238
+ "decoder.right_decoder.layers.1.multihead_attn.in_proj_weight",
239
+ "decoder.right_decoder.layers.1.multihead_attn.in_proj_bias",
240
+ "decoder.right_decoder.layers.1.multihead_attn.out_proj.weight",
241
+ "decoder.right_decoder.layers.1.multihead_attn.out_proj.bias",
242
+ "decoder.right_decoder.layers.1.linear1.weight",
243
+ "decoder.right_decoder.layers.1.linear1.bias",
244
+ "decoder.right_decoder.layers.1.linear2.weight",
245
+ "decoder.right_decoder.layers.1.linear2.bias",
246
+ "decoder.right_decoder.layers.1.norm1.weight",
247
+ "decoder.right_decoder.layers.1.norm1.bias",
248
+ "decoder.right_decoder.layers.1.norm2.weight",
249
+ "decoder.right_decoder.layers.1.norm2.bias",
250
+ "decoder.right_decoder.layers.1.norm3.weight",
251
+ "decoder.right_decoder.layers.1.norm3.bias",
252
+ "decoder.right_decoder.layers.2.self_attn.in_proj_weight",
253
+ "decoder.right_decoder.layers.2.self_attn.in_proj_bias",
254
+ "decoder.right_decoder.layers.2.self_attn.out_proj.weight",
255
+ "decoder.right_decoder.layers.2.self_attn.out_proj.bias",
256
+ "decoder.right_decoder.layers.2.multihead_attn.in_proj_weight",
257
+ "decoder.right_decoder.layers.2.multihead_attn.in_proj_bias",
258
+ "decoder.right_decoder.layers.2.multihead_attn.out_proj.weight",
259
+ "decoder.right_decoder.layers.2.multihead_attn.out_proj.bias",
260
+ "decoder.right_decoder.layers.2.linear1.weight",
261
+ "decoder.right_decoder.layers.2.linear1.bias",
262
+ "decoder.right_decoder.layers.2.linear2.weight",
263
+ "decoder.right_decoder.layers.2.linear2.bias",
264
+ "decoder.right_decoder.layers.2.norm1.weight",
265
+ "decoder.right_decoder.layers.2.norm1.bias",
266
+ "decoder.right_decoder.layers.2.norm2.weight",
267
+ "decoder.right_decoder.layers.2.norm2.bias",
268
+ "decoder.right_decoder.layers.2.norm3.weight",
269
+ "decoder.right_decoder.layers.2.norm3.bias",
270
+ "decoder.right_decoder.layers.3.self_attn.in_proj_weight",
271
+ "decoder.right_decoder.layers.3.self_attn.in_proj_bias",
272
+ "decoder.right_decoder.layers.3.self_attn.out_proj.weight",
273
+ "decoder.right_decoder.layers.3.self_attn.out_proj.bias",
274
+ "decoder.right_decoder.layers.3.multihead_attn.in_proj_weight",
275
+ "decoder.right_decoder.layers.3.multihead_attn.in_proj_bias",
276
+ "decoder.right_decoder.layers.3.multihead_attn.out_proj.weight",
277
+ "decoder.right_decoder.layers.3.multihead_attn.out_proj.bias",
278
+ "decoder.right_decoder.layers.3.linear1.weight",
279
+ "decoder.right_decoder.layers.3.linear1.bias",
280
+ "decoder.right_decoder.layers.3.linear2.weight",
281
+ "decoder.right_decoder.layers.3.linear2.bias",
282
+ "decoder.right_decoder.layers.3.norm1.weight",
283
+ "decoder.right_decoder.layers.3.norm1.bias",
284
+ "decoder.right_decoder.layers.3.norm2.weight",
285
+ "decoder.right_decoder.layers.3.norm2.bias",
286
+ "decoder.right_decoder.layers.3.norm3.weight",
287
+ "decoder.right_decoder.layers.3.norm3.bias",
288
+ "decoder.left_decoder.layers.0.self_attn.in_proj_weight",
289
+ "decoder.left_decoder.layers.0.self_attn.in_proj_bias",
290
+ "decoder.left_decoder.layers.0.self_attn.out_proj.weight",
291
+ "decoder.left_decoder.layers.0.self_attn.out_proj.bias",
292
+ "decoder.left_decoder.layers.0.multihead_attn.in_proj_weight",
293
+ "decoder.left_decoder.layers.0.multihead_attn.in_proj_bias",
294
+ "decoder.left_decoder.layers.0.multihead_attn.out_proj.weight",
295
+ "decoder.left_decoder.layers.0.multihead_attn.out_proj.bias",
296
+ "decoder.left_decoder.layers.0.linear1.weight",
297
+ "decoder.left_decoder.layers.0.linear1.bias",
298
+ "decoder.left_decoder.layers.0.linear2.weight",
299
+ "decoder.left_decoder.layers.0.linear2.bias",
300
+ "decoder.left_decoder.layers.0.norm1.weight",
301
+ "decoder.left_decoder.layers.0.norm1.bias",
302
+ "decoder.left_decoder.layers.0.norm2.weight",
303
+ "decoder.left_decoder.layers.0.norm2.bias",
304
+ "decoder.left_decoder.layers.0.norm3.weight",
305
+ "decoder.left_decoder.layers.0.norm3.bias",
306
+ "decoder.left_decoder.layers.1.self_attn.in_proj_weight",
307
+ "decoder.left_decoder.layers.1.self_attn.in_proj_bias",
308
+ "decoder.left_decoder.layers.1.self_attn.out_proj.weight",
309
+ "decoder.left_decoder.layers.1.self_attn.out_proj.bias",
310
+ "decoder.left_decoder.layers.1.multihead_attn.in_proj_weight",
311
+ "decoder.left_decoder.layers.1.multihead_attn.in_proj_bias",
312
+ "decoder.left_decoder.layers.1.multihead_attn.out_proj.weight",
313
+ "decoder.left_decoder.layers.1.multihead_attn.out_proj.bias",
314
+ "decoder.left_decoder.layers.1.linear1.weight",
315
+ "decoder.left_decoder.layers.1.linear1.bias",
316
+ "decoder.left_decoder.layers.1.linear2.weight",
317
+ "decoder.left_decoder.layers.1.linear2.bias",
318
+ "decoder.left_decoder.layers.1.norm1.weight",
319
+ "decoder.left_decoder.layers.1.norm1.bias",
320
+ "decoder.left_decoder.layers.1.norm2.weight",
321
+ "decoder.left_decoder.layers.1.norm2.bias",
322
+ "decoder.left_decoder.layers.1.norm3.weight",
323
+ "decoder.left_decoder.layers.1.norm3.bias",
324
+ "decoder.left_decoder.layers.2.self_attn.in_proj_weight",
325
+ "decoder.left_decoder.layers.2.self_attn.in_proj_bias",
326
+ "decoder.left_decoder.layers.2.self_attn.out_proj.weight",
327
+ "decoder.left_decoder.layers.2.self_attn.out_proj.bias",
328
+ "decoder.left_decoder.layers.2.multihead_attn.in_proj_weight",
329
+ "decoder.left_decoder.layers.2.multihead_attn.in_proj_bias",
330
+ "decoder.left_decoder.layers.2.multihead_attn.out_proj.weight",
331
+ "decoder.left_decoder.layers.2.multihead_attn.out_proj.bias",
332
+ "decoder.left_decoder.layers.2.linear1.weight",
333
+ "decoder.left_decoder.layers.2.linear1.bias",
334
+ "decoder.left_decoder.layers.2.linear2.weight",
335
+ "decoder.left_decoder.layers.2.linear2.bias",
336
+ "decoder.left_decoder.layers.2.norm1.weight",
337
+ "decoder.left_decoder.layers.2.norm1.bias",
338
+ "decoder.left_decoder.layers.2.norm2.weight",
339
+ "decoder.left_decoder.layers.2.norm2.bias",
340
+ "decoder.left_decoder.layers.2.norm3.weight",
341
+ "decoder.left_decoder.layers.2.norm3.bias",
342
+ "decoder.left_decoder.layers.3.self_attn.in_proj_weight",
343
+ "decoder.left_decoder.layers.3.self_attn.in_proj_bias",
344
+ "decoder.left_decoder.layers.3.self_attn.out_proj.weight",
345
+ "decoder.left_decoder.layers.3.self_attn.out_proj.bias",
346
+ "decoder.left_decoder.layers.3.multihead_attn.in_proj_weight",
347
+ "decoder.left_decoder.layers.3.multihead_attn.in_proj_bias",
348
+ "decoder.left_decoder.layers.3.multihead_attn.out_proj.weight",
349
+ "decoder.left_decoder.layers.3.multihead_attn.out_proj.bias",
350
+ "decoder.left_decoder.layers.3.linear1.weight",
351
+ "decoder.left_decoder.layers.3.linear1.bias",
352
+ "decoder.left_decoder.layers.3.linear2.weight",
353
+ "decoder.left_decoder.layers.3.linear2.bias",
354
+ "decoder.left_decoder.layers.3.norm1.weight",
355
+ "decoder.left_decoder.layers.3.norm1.bias",
356
+ "decoder.left_decoder.layers.3.norm2.weight",
357
+ "decoder.left_decoder.layers.3.norm2.bias",
358
+ "decoder.left_decoder.layers.3.norm3.weight",
359
+ "decoder.left_decoder.layers.3.norm3.bias",
360
+ "decoder.proposal_queries.weight",
361
+ "decoder.arm_identity.weight",
362
+ "decoder.phase_adapter.weight",
363
+ "decoder.phase_adapter.bias",
364
+ "decoder.role_adapter.weight",
365
+ "decoder.role_adapter.bias",
366
+ "decoder.context_proj.0.weight",
367
+ "decoder.context_proj.0.bias",
368
+ "decoder.context_proj.1.weight",
369
+ "decoder.context_proj.1.bias",
370
+ "decoder.right_mean.weight",
371
+ "decoder.right_mean.bias",
372
+ "decoder.right_log_std.weight",
373
+ "decoder.right_log_std.bias",
374
+ "decoder.left_mean.weight",
375
+ "decoder.left_mean.bias",
376
+ "decoder.left_log_std.weight",
377
+ "decoder.left_log_std.bias",
378
+ "decoder.proposal_score.1.weight",
379
+ "decoder.proposal_score.1.bias",
380
+ "decoder.proposal_score.3.weight",
381
+ "decoder.proposal_score.3.bias",
382
+ "interaction_head.interaction_queries",
383
+ "interaction_head.interaction_attention.in_proj_weight",
384
+ "interaction_head.interaction_attention.in_proj_bias",
385
+ "interaction_head.interaction_attention.out_proj.weight",
386
+ "interaction_head.interaction_attention.out_proj.bias",
387
+ "interaction_head.interaction_mlp.0.weight",
388
+ "interaction_head.interaction_mlp.0.bias",
389
+ "interaction_head.interaction_mlp.1.weight",
390
+ "interaction_head.interaction_mlp.1.bias",
391
+ "interaction_head.interaction_mlp.3.weight",
392
+ "interaction_head.interaction_mlp.3.bias",
393
+ "interaction_head.decoder.field_queries",
394
+ "interaction_head.decoder.field_attention.in_proj_weight",
395
+ "interaction_head.decoder.field_attention.in_proj_bias",
396
+ "interaction_head.decoder.field_attention.out_proj.weight",
397
+ "interaction_head.decoder.field_attention.out_proj.bias",
398
+ "interaction_head.decoder.field_mlp.0.weight",
399
+ "interaction_head.decoder.field_mlp.0.bias",
400
+ "interaction_head.decoder.field_mlp.1.weight",
401
+ "interaction_head.decoder.field_mlp.1.bias",
402
+ "interaction_head.decoder.field_mlp.3.weight",
403
+ "interaction_head.decoder.field_mlp.3.bias",
404
+ "interaction_head.decoder.summary_proj.0.weight",
405
+ "interaction_head.decoder.summary_proj.0.bias",
406
+ "interaction_head.decoder.summary_proj.1.weight",
407
+ "interaction_head.decoder.summary_proj.1.bias",
408
+ "interaction_head.decoder.phase_head.0.weight",
409
+ "interaction_head.decoder.phase_head.0.bias",
410
+ "interaction_head.decoder.phase_head.1.weight",
411
+ "interaction_head.decoder.phase_head.1.bias",
412
+ "interaction_head.decoder.phase_head.3.weight",
413
+ "interaction_head.decoder.phase_head.3.bias",
414
+ "interaction_head.decoder.arm_role_head.0.weight",
415
+ "interaction_head.decoder.arm_role_head.0.bias",
416
+ "interaction_head.decoder.arm_role_head.1.weight",
417
+ "interaction_head.decoder.arm_role_head.1.bias",
418
+ "interaction_head.decoder.arm_role_head.3.weight",
419
+ "interaction_head.decoder.arm_role_head.3.bias",
420
+ "interaction_head.decoder.arm_identity.weight",
421
+ "interaction_head.decoder.support_mode.0.weight",
422
+ "interaction_head.decoder.support_mode.0.bias",
423
+ "interaction_head.decoder.support_mode.1.weight",
424
+ "interaction_head.decoder.support_mode.1.bias",
425
+ "interaction_head.decoder.support_mode.3.weight",
426
+ "interaction_head.decoder.support_mode.3.bias",
427
+ "interaction_head.decoder.target_field.weight",
428
+ "interaction_head.decoder.target_field.bias",
429
+ "interaction_head.decoder.actor_feasibility_field.weight",
430
+ "interaction_head.decoder.actor_feasibility_field.bias",
431
+ "interaction_head.decoder.persistence_field.weight",
432
+ "interaction_head.decoder.persistence_field.bias",
433
+ "interaction_head.decoder.risk_field.weight",
434
+ "interaction_head.decoder.risk_field.bias",
435
+ "interaction_head.decoder.uncertainty_field.weight",
436
+ "interaction_head.decoder.uncertainty_field.bias",
437
+ "interaction_head.decoder.compat_access_field.weight",
438
+ "interaction_head.decoder.compat_access_field.bias",
439
+ "interaction_head.decoder.compat_persistence.weight",
440
+ "interaction_head.decoder.compat_persistence.bias",
441
+ "interaction_head.decoder.reocclusion_head.0.weight",
442
+ "interaction_head.decoder.reocclusion_head.0.bias",
443
+ "interaction_head.decoder.reocclusion_head.1.weight",
444
+ "interaction_head.decoder.reocclusion_head.1.bias",
445
+ "interaction_head.decoder.reocclusion_head.3.weight",
446
+ "interaction_head.decoder.reocclusion_head.3.bias",
447
+ "world_model.action_encoder.0.weight",
448
+ "world_model.action_encoder.0.bias",
449
+ "world_model.action_encoder.1.weight",
450
+ "world_model.action_encoder.1.bias",
451
+ "world_model.transition.layers.0.self_attn.in_proj_weight",
452
+ "world_model.transition.layers.0.self_attn.in_proj_bias",
453
+ "world_model.transition.layers.0.self_attn.out_proj.weight",
454
+ "world_model.transition.layers.0.self_attn.out_proj.bias",
455
+ "world_model.transition.layers.0.linear1.weight",
456
+ "world_model.transition.layers.0.linear1.bias",
457
+ "world_model.transition.layers.0.linear2.weight",
458
+ "world_model.transition.layers.0.linear2.bias",
459
+ "world_model.transition.layers.0.norm1.weight",
460
+ "world_model.transition.layers.0.norm1.bias",
461
+ "world_model.transition.layers.0.norm2.weight",
462
+ "world_model.transition.layers.0.norm2.bias",
463
+ "world_model.transition.layers.1.self_attn.in_proj_weight",
464
+ "world_model.transition.layers.1.self_attn.in_proj_bias",
465
+ "world_model.transition.layers.1.self_attn.out_proj.weight",
466
+ "world_model.transition.layers.1.self_attn.out_proj.bias",
467
+ "world_model.transition.layers.1.linear1.weight",
468
+ "world_model.transition.layers.1.linear1.bias",
469
+ "world_model.transition.layers.1.linear2.weight",
470
+ "world_model.transition.layers.1.linear2.bias",
471
+ "world_model.transition.layers.1.norm1.weight",
472
+ "world_model.transition.layers.1.norm1.bias",
473
+ "world_model.transition.layers.1.norm2.weight",
474
+ "world_model.transition.layers.1.norm2.bias",
475
+ "world_model.token_update.0.weight",
476
+ "world_model.token_update.0.bias",
477
+ "world_model.token_update.1.weight",
478
+ "world_model.token_update.1.bias",
479
+ "world_model.token_update.3.weight",
480
+ "world_model.token_update.3.bias",
481
+ "world_model.decoder.field_queries",
482
+ "world_model.decoder.field_attention.in_proj_weight",
483
+ "world_model.decoder.field_attention.in_proj_bias",
484
+ "world_model.decoder.field_attention.out_proj.weight",
485
+ "world_model.decoder.field_attention.out_proj.bias",
486
+ "world_model.decoder.field_mlp.0.weight",
487
+ "world_model.decoder.field_mlp.0.bias",
488
+ "world_model.decoder.field_mlp.1.weight",
489
+ "world_model.decoder.field_mlp.1.bias",
490
+ "world_model.decoder.field_mlp.3.weight",
491
+ "world_model.decoder.field_mlp.3.bias",
492
+ "world_model.decoder.summary_proj.0.weight",
493
+ "world_model.decoder.summary_proj.0.bias",
494
+ "world_model.decoder.summary_proj.1.weight",
495
+ "world_model.decoder.summary_proj.1.bias",
496
+ "world_model.decoder.phase_head.0.weight",
497
+ "world_model.decoder.phase_head.0.bias",
498
+ "world_model.decoder.phase_head.1.weight",
499
+ "world_model.decoder.phase_head.1.bias",
500
+ "world_model.decoder.phase_head.3.weight",
501
+ "world_model.decoder.phase_head.3.bias",
502
+ "world_model.decoder.arm_role_head.0.weight",
503
+ "world_model.decoder.arm_role_head.0.bias",
504
+ "world_model.decoder.arm_role_head.1.weight",
505
+ "world_model.decoder.arm_role_head.1.bias",
506
+ "world_model.decoder.arm_role_head.3.weight",
507
+ "world_model.decoder.arm_role_head.3.bias",
508
+ "world_model.decoder.arm_identity.weight",
509
+ "world_model.decoder.support_mode.0.weight",
510
+ "world_model.decoder.support_mode.0.bias",
511
+ "world_model.decoder.support_mode.1.weight",
512
+ "world_model.decoder.support_mode.1.bias",
513
+ "world_model.decoder.support_mode.3.weight",
514
+ "world_model.decoder.support_mode.3.bias",
515
+ "world_model.decoder.target_field.weight",
516
+ "world_model.decoder.target_field.bias",
517
+ "world_model.decoder.actor_feasibility_field.weight",
518
+ "world_model.decoder.actor_feasibility_field.bias",
519
+ "world_model.decoder.persistence_field.weight",
520
+ "world_model.decoder.persistence_field.bias",
521
+ "world_model.decoder.risk_field.weight",
522
+ "world_model.decoder.risk_field.bias",
523
+ "world_model.decoder.uncertainty_field.weight",
524
+ "world_model.decoder.uncertainty_field.bias",
525
+ "world_model.decoder.compat_access_field.weight",
526
+ "world_model.decoder.compat_access_field.bias",
527
+ "world_model.decoder.compat_persistence.weight",
528
+ "world_model.decoder.compat_persistence.bias",
529
+ "world_model.decoder.reocclusion_head.0.weight",
530
+ "world_model.decoder.reocclusion_head.0.bias",
531
+ "world_model.decoder.reocclusion_head.1.weight",
532
+ "world_model.decoder.reocclusion_head.1.bias",
533
+ "world_model.decoder.reocclusion_head.3.weight",
534
+ "world_model.decoder.reocclusion_head.3.bias",
535
+ "planner.cls_token",
536
+ "planner.step_proj.0.weight",
537
+ "planner.step_proj.0.bias",
538
+ "planner.step_proj.1.weight",
539
+ "planner.step_proj.1.bias",
540
+ "planner.sequence_encoder.layers.0.self_attn.in_proj_weight",
541
+ "planner.sequence_encoder.layers.0.self_attn.in_proj_bias",
542
+ "planner.sequence_encoder.layers.0.self_attn.out_proj.weight",
543
+ "planner.sequence_encoder.layers.0.self_attn.out_proj.bias",
544
+ "planner.sequence_encoder.layers.0.linear1.weight",
545
+ "planner.sequence_encoder.layers.0.linear1.bias",
546
+ "planner.sequence_encoder.layers.0.linear2.weight",
547
+ "planner.sequence_encoder.layers.0.linear2.bias",
548
+ "planner.sequence_encoder.layers.0.norm1.weight",
549
+ "planner.sequence_encoder.layers.0.norm1.bias",
550
+ "planner.sequence_encoder.layers.0.norm2.weight",
551
+ "planner.sequence_encoder.layers.0.norm2.bias",
552
+ "planner.sequence_encoder.layers.1.self_attn.in_proj_weight",
553
+ "planner.sequence_encoder.layers.1.self_attn.in_proj_bias",
554
+ "planner.sequence_encoder.layers.1.self_attn.out_proj.weight",
555
+ "planner.sequence_encoder.layers.1.self_attn.out_proj.bias",
556
+ "planner.sequence_encoder.layers.1.linear1.weight",
557
+ "planner.sequence_encoder.layers.1.linear1.bias",
558
+ "planner.sequence_encoder.layers.1.linear2.weight",
559
+ "planner.sequence_encoder.layers.1.linear2.bias",
560
+ "planner.sequence_encoder.layers.1.norm1.weight",
561
+ "planner.sequence_encoder.layers.1.norm1.bias",
562
+ "planner.sequence_encoder.layers.1.norm2.weight",
563
+ "planner.sequence_encoder.layers.1.norm2.bias",
564
+ "planner.success_head.weight",
565
+ "planner.success_head.bias",
566
+ "planner.risk_head.weight",
567
+ "planner.risk_head.bias",
568
+ "planner.score_head.weight",
569
+ "planner.score_head.bias"
570
+ ],
571
+ "unexpected_keys": []
572
+ }
573
+ }
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_recency_oracleft
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
6
+ init_strict: true
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 96
13
+ train_episodes_per_proxy: 48
14
+ val_episodes_per_proxy: 16
15
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
16
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
17
+ rebuild_dataset: false
18
+ chunk_horizon: 8
19
+ rollout_horizon: 5
20
+ history_steps: 6
21
+ planner_candidates: 8
22
+ seed: 13
23
+ optim:
24
+ epochs: 8
25
+ batch_size: 16
26
+ num_workers: 0
27
+ lr: 0.0003
28
+ weight_decay: 0.0001
29
+ trainer:
30
+ policy_type: interaction_state
31
+ use_bf16: true
32
+ grad_clip_norm: 1.0
33
+ freeze_backbone: true
34
+ gradient_checkpointing: false
35
+ plan_during_train: true
36
+ plan_during_eval: true
37
+ support_mode_conditioning: true
38
+ planner_mode: trainable
39
+ policy:
40
+ backbone:
41
+ model_name: openai/clip-vit-base-patch32
42
+ hidden_dim: 128
43
+ max_text_tokens: 32
44
+ freeze_backbone: true
45
+ gradient_checkpointing: false
46
+ use_dummy_backbone: true
47
+ fusion:
48
+ hidden_dim: 128
49
+ num_cameras: 3
50
+ num_layers: 2
51
+ num_heads: 4
52
+ ff_dim: 256
53
+ dropout: 0.1
54
+ proprio_dim: 32
55
+ proprio_tokens: 1
56
+ memory:
57
+ hidden_dim: 128
58
+ action_dim: 14
59
+ history_steps: 6
60
+ num_layers: 2
61
+ dropout: 0.1
62
+ memory_bank_size: 4
63
+ num_heads: 4
64
+ max_history_steps: 8
65
+ decoder:
66
+ hidden_dim: 128
67
+ num_heads: 4
68
+ num_layers: 2
69
+ ff_dim: 256
70
+ dropout: 0.1
71
+ chunk_size: 8
72
+ action_dim: 14
73
+ arm_action_dim: 7
74
+ num_candidates: 8
75
+ num_phases: 5
76
+ num_arm_roles: 4
77
+ reveal_head:
78
+ hidden_dim: 128
79
+ num_support_modes: 3
80
+ num_approach_templates: 32
81
+ rollout_horizon: 5
82
+ belief_map_size: 32
83
+ field_size: 16
84
+ num_heads: 4
85
+ predict_belief_map: true
86
+ num_phases: 5
87
+ num_arm_roles: 4
88
+ num_interaction_tokens: 8
89
+ world_model:
90
+ hidden_dim: 128
91
+ action_dim: 14
92
+ num_support_modes: 3
93
+ num_approach_templates: 32
94
+ rollout_horizon: 5
95
+ field_size: 16
96
+ num_heads: 4
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ planner:
101
+ hidden_dim: 128
102
+ num_candidates: 8
103
+ action_dim: 14
104
+ num_support_modes: 3
105
+ utility_margin: 0.1
106
+ num_heads: 4
107
+ num_layers: 2
108
+ num_phases: 5
109
+ num_arm_roles: 4
110
+ loss_weights:
111
+ action: 1.0
112
+ phase: 0.1
113
+ arm_role: 0.15
114
+ support_mode: 0.1
115
+ corridor: 0.15
116
+ persistence: 0.05
117
+ disturbance: 0.05
118
+ world_model: 0.2
119
+ belief: 0.05
120
+ planner_success: 0.25
121
+ planner_risk: 0.1
122
+ planner_ranking: 0.2
123
+ proposal_reconstruction: 0.1
124
+ proposal_success: 0.15
125
+ proposal_ranking: 0.2
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.2824427480916031,
3
+ "planner_regret": 0.24119873344898224,
4
+ "risk_calibration_mse": 0.009003574028611183,
5
+ "role_collapse_rate": 0.0,
6
+ "num_samples": 131
7
+ }
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.019621784721190732,
6
+ "arm_role": 1.4238473445023677e-05,
7
+ "belief": 0.10273545235395432,
8
+ "corridor": 0.1970261943837007,
9
+ "disturbance": 0.0021691546814205744,
10
+ "persistence": 1.1530707913140457,
11
+ "phase": 0.40012874578436214,
12
+ "planner_ranking": 1.0832201441129048,
13
+ "planner_risk": 0.009513227792922407,
14
+ "planner_success": 0.44357747708757717,
15
+ "proposal_ranking": 1.1302440961201985,
16
+ "proposal_reconstruction": 0.06888884957879782,
17
+ "proposal_success": 0.6472248112161955,
18
+ "reocclusion": 0.23694051212320724,
19
+ "support_mode": 0.0007155667990446091,
20
+ "total": 1.1227939675251644,
21
+ "uncertainty": 0.00357946046278812,
22
+ "world_model": 1.5367356936136882
23
+ },
24
+ "val": {
25
+ "action": 0.01623468690862258,
26
+ "arm_role": 1.7815142427934916e-06,
27
+ "belief": 0.09838261952002843,
28
+ "corridor": 0.19904182685746086,
29
+ "disturbance": 0.0012887230906118122,
30
+ "persistence": 1.2435127298037212,
31
+ "phase": 0.451065621442265,
32
+ "planner_ranking": 1.1237382623884413,
33
+ "planner_risk": 0.00879605039436784,
34
+ "planner_success": 0.5527588526407877,
35
+ "proposal_ranking": 1.1329045295715332,
36
+ "proposal_reconstruction": 0.062247288723786674,
37
+ "proposal_success": 0.6402903331650628,
38
+ "reocclusion": 0.19368870432178178,
39
+ "support_mode": 4.385759530123323e-05,
40
+ "total": 1.1563972632090251,
41
+ "uncertainty": 0.003620341523653931,
42
+ "world_model": 1.507298681471083
43
+ }
44
+ },
45
+ {
46
+ "epoch": 1,
47
+ "train": {
48
+ "action": 0.015102950584453842,
49
+ "arm_role": 7.929694329315377e-07,
50
+ "belief": 0.09922042830536763,
51
+ "corridor": 0.1954052426541845,
52
+ "disturbance": 0.0012760455817139398,
53
+ "persistence": 1.1133080422878265,
54
+ "phase": 0.40078286826610565,
55
+ "planner_ranking": 1.0535631676514943,
56
+ "planner_risk": 0.009265869099181145,
57
+ "planner_success": 0.41223976016044617,
58
+ "proposal_ranking": 1.130059376358986,
59
+ "proposal_reconstruction": 0.061719981798281275,
60
+ "proposal_success": 0.6374408900737762,
61
+ "reocclusion": 0.22066612169146538,
62
+ "support_mode": 3.152040555202499e-05,
63
+ "total": 1.0627698848644893,
64
+ "uncertainty": 0.002360584529621216,
65
+ "world_model": 1.3532413293917973
66
+ },
67
+ "val": {
68
+ "action": 0.014689018225504292,
69
+ "arm_role": 5.302327663356563e-07,
70
+ "belief": 0.09588906251721913,
71
+ "corridor": 0.19485984411504534,
72
+ "disturbance": 0.0013201889879484144,
73
+ "persistence": 1.211418045891656,
74
+ "phase": 0.4520965864260991,
75
+ "planner_ranking": 1.1366683509614732,
76
+ "planner_risk": 0.009635515045374632,
77
+ "planner_success": 0.5696005490091112,
78
+ "proposal_ranking": 1.1199064254760742,
79
+ "proposal_reconstruction": 0.06043942438231574,
80
+ "proposal_success": 0.6389325261116028,
81
+ "reocclusion": 0.18208894692361355,
82
+ "support_mode": 1.9065460340546753e-05,
83
+ "total": 1.1511138545142279,
84
+ "uncertainty": 0.0020409094027450513,
85
+ "world_model": 1.481640590561761
86
+ }
87
+ },
88
+ {
89
+ "epoch": 2,
90
+ "train": {
91
+ "action": 0.013718575122766197,
92
+ "arm_role": 3.225997922129409e-07,
93
+ "belief": 0.09694493561983109,
94
+ "corridor": 0.19660565722733736,
95
+ "disturbance": 0.0012764433243622382,
96
+ "persistence": 1.1350401155650616,
97
+ "phase": 0.4035409850378831,
98
+ "planner_ranking": 1.0236077308654785,
99
+ "planner_risk": 0.009166777638408044,
100
+ "planner_success": 0.3850418192644914,
101
+ "proposal_ranking": 1.129315584897995,
102
+ "proposal_reconstruction": 0.06000282304982344,
103
+ "proposal_success": 0.6322548364599546,
104
+ "reocclusion": 0.22824073505277434,
105
+ "support_mode": 1.4410975078741709e-05,
106
+ "total": 1.0505772059162457,
107
+ "uncertainty": 0.001883886650224061,
108
+ "world_model": 1.3608256032069523
109
+ },
110
+ "val": {
111
+ "action": 0.015656203031539917,
112
+ "arm_role": 3.1802936541048945e-07,
113
+ "belief": 0.09277311464150746,
114
+ "corridor": 0.19478923082351685,
115
+ "disturbance": 0.001490643351442284,
116
+ "persistence": 1.2428188456429377,
117
+ "phase": 0.44441814886199105,
118
+ "planner_ranking": 1.1642935540941026,
119
+ "planner_risk": 0.008580206893384457,
120
+ "planner_success": 0.5712412032816145,
121
+ "proposal_ranking": 1.1173533731036716,
122
+ "proposal_reconstruction": 0.060967493802309036,
123
+ "proposal_success": 0.6362337801191542,
124
+ "reocclusion": 0.18877888905505338,
125
+ "support_mode": 8.590733412145508e-06,
126
+ "total": 1.164333701133728,
127
+ "uncertainty": 0.0018725828914385703,
128
+ "world_model": 1.5141921705669827
129
+ }
130
+ },
131
+ {
132
+ "epoch": 3,
133
+ "train": {
134
+ "action": 0.013463407677287856,
135
+ "arm_role": 2.1441115283238332e-07,
136
+ "belief": 0.09542769007384777,
137
+ "corridor": 0.19438757871588072,
138
+ "disturbance": 0.0012542814802145585,
139
+ "persistence": 1.0960917932291825,
140
+ "phase": 0.39501943811774254,
141
+ "planner_ranking": 0.9881478076179823,
142
+ "planner_risk": 0.008955476262296239,
143
+ "planner_success": 0.37380507588386536,
144
+ "proposal_ranking": 1.1260421325763066,
145
+ "proposal_reconstruction": 0.05954852948586146,
146
+ "proposal_success": 0.6345230092604955,
147
+ "reocclusion": 0.222653156456848,
148
+ "support_mode": 1.0468997061252594e-05,
149
+ "total": 1.029868942995866,
150
+ "uncertainty": 0.001529014749394264,
151
+ "world_model": 1.3265959272782009
152
+ },
153
+ "val": {
154
+ "action": 0.014502381595472494,
155
+ "arm_role": 1.8074554909554132e-07,
156
+ "belief": 0.09227573540475634,
157
+ "corridor": 0.19471332927544913,
158
+ "disturbance": 0.0014174091500333613,
159
+ "persistence": 1.2068392270141177,
160
+ "phase": 0.44181974563333726,
161
+ "planner_ranking": 1.1894211106830173,
162
+ "planner_risk": 0.008801783072865672,
163
+ "planner_success": 0.5882998870478736,
164
+ "proposal_ranking": 1.1234880420896742,
165
+ "proposal_reconstruction": 0.06003963781727685,
166
+ "proposal_success": 0.6316338512632582,
167
+ "reocclusion": 0.18501534023218685,
168
+ "support_mode": 1.0471259353532028e-05,
169
+ "total": 1.1484977669186063,
170
+ "uncertainty": 0.0011159069642114143,
171
+ "world_model": 1.402906020482381
172
+ }
173
+ },
174
+ {
175
+ "epoch": 4,
176
+ "train": {
177
+ "action": 0.013384843982445696,
178
+ "arm_role": 1.8212530328298726e-07,
179
+ "belief": 0.0940939641247193,
180
+ "corridor": 0.19484392801920572,
181
+ "disturbance": 0.0013209530419165578,
182
+ "persistence": 1.1009935376544793,
183
+ "phase": 0.39735961332917213,
184
+ "planner_ranking": 0.9340380703409513,
185
+ "planner_risk": 0.009769223863258958,
186
+ "planner_success": 0.35210378592212993,
187
+ "proposal_ranking": 1.1286269277334213,
188
+ "proposal_reconstruction": 0.05935003887861967,
189
+ "proposal_success": 0.6315460602442423,
190
+ "reocclusion": 0.22644051164388657,
191
+ "support_mode": 8.073221484513246e-06,
192
+ "total": 1.0135142927368481,
193
+ "uncertainty": 0.0014145106833893806,
194
+ "world_model": 1.3229995171229045
195
+ },
196
+ "val": {
197
+ "action": 0.014795408584177494,
198
+ "arm_role": 2.842257956893314e-07,
199
+ "belief": 0.09148034122255114,
200
+ "corridor": 0.1952296942472458,
201
+ "disturbance": 0.0014219412179146376,
202
+ "persistence": 1.2065883709324732,
203
+ "phase": 0.4573909127049976,
204
+ "planner_ranking": 1.264210171169705,
205
+ "planner_risk": 0.008240946154627535,
206
+ "planner_success": 0.6136878695752885,
207
+ "proposal_ranking": 1.1302801105711195,
208
+ "proposal_reconstruction": 0.06015601671404309,
209
+ "proposal_success": 0.6339429616928101,
210
+ "reocclusion": 0.18241143381843963,
211
+ "support_mode": 5.932560725341318e-06,
212
+ "total": 1.1784167952007718,
213
+ "uncertainty": 0.0014715428373569415,
214
+ "world_model": 1.4285426007376776
215
+ }
216
+ },
217
+ {
218
+ "epoch": 5,
219
+ "train": {
220
+ "action": 0.013003619310135642,
221
+ "arm_role": 1.6706892166003703e-07,
222
+ "belief": 0.09372370348622401,
223
+ "corridor": 0.19377528379360834,
224
+ "disturbance": 0.0012515889684436843,
225
+ "persistence": 1.087764959782362,
226
+ "phase": 0.39413714533050853,
227
+ "planner_ranking": 0.8574716374278069,
228
+ "planner_risk": 0.00931960518937558,
229
+ "planner_success": 0.32699467862645787,
230
+ "proposal_ranking": 1.1296403209368389,
231
+ "proposal_reconstruction": 0.058937749825417995,
232
+ "proposal_success": 0.6314020653565725,
233
+ "reocclusion": 0.22137584226826826,
234
+ "support_mode": 6.786340643808823e-06,
235
+ "total": 0.9859138304988543,
236
+ "uncertainty": 0.0011173486830860686,
237
+ "world_model": 1.3007333129644394
238
+ },
239
+ "val": {
240
+ "action": 0.014327830738491483,
241
+ "arm_role": 2.553892981538297e-07,
242
+ "belief": 0.0923299789428711,
243
+ "corridor": 0.19848757651117113,
244
+ "disturbance": 0.0011894687777385116,
245
+ "persistence": 1.2340974575943418,
246
+ "phase": 0.4644339034954707,
247
+ "planner_ranking": 1.3578486972384982,
248
+ "planner_risk": 0.009015874264554845,
249
+ "planner_success": 0.6275921530193753,
250
+ "proposal_ranking": 1.1163699362013075,
251
+ "proposal_reconstruction": 0.05984223840965165,
252
+ "proposal_success": 0.6348666879865859,
253
+ "reocclusion": 0.20307053801500136,
254
+ "support_mode": 3.7443181150188643e-06,
255
+ "total": 1.2028855217827692,
256
+ "uncertainty": 0.0018055843215228783,
257
+ "world_model": 1.4401142862108018
258
+ }
259
+ },
260
+ {
261
+ "epoch": 6,
262
+ "train": {
263
+ "action": 0.012725909279348949,
264
+ "arm_role": 1.4006056699618816e-07,
265
+ "belief": 0.09327782255907853,
266
+ "corridor": 0.19324024704595408,
267
+ "disturbance": 0.0013581588767313708,
268
+ "persistence": 1.0872996002435684,
269
+ "phase": 0.3942833219965299,
270
+ "planner_ranking": 0.8039915859699249,
271
+ "planner_risk": 0.009058927069418132,
272
+ "planner_success": 0.3132968743642171,
273
+ "proposal_ranking": 1.1225138505299885,
274
+ "proposal_reconstruction": 0.058770577888935804,
275
+ "proposal_success": 0.6332228208581606,
276
+ "reocclusion": 0.22015962299580374,
277
+ "support_mode": 4.966122408707936e-06,
278
+ "total": 0.9676197816928228,
279
+ "uncertainty": 0.0011598596538533457,
280
+ "world_model": 1.2878785928090413
281
+ },
282
+ "val": {
283
+ "action": 0.014496596633560128,
284
+ "arm_role": 2.192401922229692e-07,
285
+ "belief": 0.090823319223192,
286
+ "corridor": 0.19339712626404232,
287
+ "disturbance": 0.0016455024532559845,
288
+ "persistence": 1.2035431563854218,
289
+ "phase": 0.45077220764425063,
290
+ "planner_ranking": 1.4061412149005466,
291
+ "planner_risk": 0.008559927913463779,
292
+ "planner_success": 0.6576948232120938,
293
+ "proposal_ranking": 1.115302946832445,
294
+ "proposal_reconstruction": 0.059833423958884344,
295
+ "proposal_success": 0.6364065806070963,
296
+ "reocclusion": 0.1801526459554831,
297
+ "support_mode": 4.350292038503136e-06,
298
+ "total": 1.2042852375242445,
299
+ "uncertainty": 0.0007912304588697023,
300
+ "world_model": 1.3813848230573866
301
+ }
302
+ },
303
+ {
304
+ "epoch": 7,
305
+ "train": {
306
+ "action": 0.012622703972738236,
307
+ "arm_role": 1.0477378964424133e-07,
308
+ "belief": 0.09258855165292819,
309
+ "corridor": 0.19252262574930987,
310
+ "disturbance": 0.0013018598037888296,
311
+ "persistence": 1.066667130837838,
312
+ "phase": 0.3908200403675437,
313
+ "planner_ranking": 0.7140753443042437,
314
+ "planner_risk": 0.009592532160847137,
315
+ "planner_success": 0.2998263432333867,
316
+ "proposal_ranking": 1.125225270787875,
317
+ "proposal_reconstruction": 0.05837386598189672,
318
+ "proposal_success": 0.630388061205546,
319
+ "reocclusion": 0.21032434065515795,
320
+ "support_mode": 4.515569240008214e-06,
321
+ "total": 0.9377426480253538,
322
+ "uncertainty": 0.0009036514068914888,
323
+ "world_model": 1.2537205666303635
324
+ },
325
+ "val": {
326
+ "action": 0.01393873720533318,
327
+ "arm_role": 1.9219735413066195e-07,
328
+ "belief": 0.09070102870464325,
329
+ "corridor": 0.19287915196683672,
330
+ "disturbance": 0.0018688688416861826,
331
+ "persistence": 1.1937838825914595,
332
+ "phase": 0.44357551468743217,
333
+ "planner_ranking": 1.4278014368481107,
334
+ "planner_risk": 0.007952027747200595,
335
+ "planner_success": 0.6735637684663137,
336
+ "proposal_ranking": 1.121930678685506,
337
+ "proposal_reconstruction": 0.059713507278098,
338
+ "proposal_success": 0.6304158767064413,
339
+ "reocclusion": 0.17670889291912317,
340
+ "support_mode": 4.034886008715451e-06,
341
+ "total": 1.211510909928216,
342
+ "uncertainty": 0.0006605643041742345,
343
+ "world_model": 1.383787711461385
344
+ }
345
+ }
346
+ ]
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_state_recency_oracleft",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt",
5
+ "final_train_total": 0.9377426480253538,
6
+ "final_val_total": 1.211510909928216,
7
+ "num_train_samples": 380,
8
+ "num_val_samples": 131,
9
+ "planner_mode": "trainable",
10
+ "frozen_modules": [],
11
+ "init_info": {
12
+ "path": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt",
13
+ "missing_keys": [],
14
+ "unexpected_keys": []
15
+ }
16
+ }
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backbone_clip": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.2916666666666667,
5
+ "bag_proxy": 0.4166666666666667,
6
+ "cloth_proxy": 0.2916666666666667
7
+ },
8
+ "mean_success": 0.3333333333333333,
9
+ "visibility_integral": 5.090650259620613,
10
+ "corridor_availability": 0.30186899772120845,
11
+ "reocclusion_rate": 0.013541666666666667,
12
+ "persistence_horizon_mae": 0.0,
13
+ "disturbance_cost": 0.36051484262053335
14
+ },
15
+ "reveal_clip": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.20833333333333334,
18
+ "bag_proxy": 0.25,
19
+ "cloth_proxy": 0.16666666666666666
20
+ },
21
+ "mean_success": 0.20833333333333334,
22
+ "visibility_integral": 48.42640474935373,
23
+ "corridor_availability": 0.8251730443702804,
24
+ "reocclusion_rate": 0.06718750000000001,
25
+ "persistence_horizon_mae": 0.9353625932762888,
26
+ "disturbance_cost": 0.7097413324647479
27
+ }
28
+ }
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## backbone_clip
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
5
+ - mean_success: 0.333
6
+ - visibility_integral: 5.091
7
+ - corridor_availability: 0.302
8
+ - reocclusion_rate: 0.014
9
+ - persistence_horizon_mae: 0.000
10
+ - disturbance_cost: 0.361
11
+ - foliage_proxy_success: 0.292
12
+ - bag_proxy_success: 0.417
13
+ - cloth_proxy_success: 0.292
14
+
15
+ ## reveal_clip
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
17
+ - mean_success: 0.208
18
+ - visibility_integral: 48.426
19
+ - corridor_availability: 0.825
20
+ - reocclusion_rate: 0.067
21
+ - persistence_horizon_mae: 0.935
22
+ - disturbance_cost: 0.710
23
+ - foliage_proxy_success: 0.208
24
+ - bag_proxy_success: 0.250
25
+ - cloth_proxy_success: 0.167
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.5833333333333334
7
+ },
8
+ "mean_success": 0.5138888888888888,
9
+ "visibility_integral": 32.35977659953965,
10
+ "corridor_availability": 0.8802236508991983,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.1419724687506017,
13
+ "disturbance_cost": 0.49480460506553453
14
+ },
15
+ "backbone": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.4166666666666667,
18
+ "bag_proxy": 0.5833333333333334,
19
+ "cloth_proxy": 0.625
20
+ },
21
+ "mean_success": 0.5416666666666666,
22
+ "visibility_integral": 30.58145251042313,
23
+ "corridor_availability": 0.8679845299985673,
24
+ "reocclusion_rate": 0.0,
25
+ "persistence_horizon_mae": 0.0,
26
+ "disturbance_cost": 0.47382067630274427
27
+ },
28
+ "reveal": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.4166666666666667,
31
+ "bag_proxy": 0.5833333333333334,
32
+ "cloth_proxy": 0.6666666666666666
33
+ },
34
+ "mean_success": 0.5555555555555555,
35
+ "visibility_integral": 29.508656750122707,
36
+ "corridor_availability": 0.8612986240122054,
37
+ "reocclusion_rate": 0.0,
38
+ "persistence_horizon_mae": 2.3659667054579057,
39
+ "disturbance_cost": 0.47035404020506477
40
+ }
41
+ }
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.514
6
+ - visibility_integral: 32.360
7
+ - corridor_availability: 0.880
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.142
10
+ - disturbance_cost: 0.495
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.583
14
+
15
+ ## backbone
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
17
+ - mean_success: 0.542
18
+ - visibility_integral: 30.581
19
+ - corridor_availability: 0.868
20
+ - reocclusion_rate: 0.000
21
+ - persistence_horizon_mae: 0.000
22
+ - disturbance_cost: 0.474
23
+ - foliage_proxy_success: 0.417
24
+ - bag_proxy_success: 0.583
25
+ - cloth_proxy_success: 0.625
26
+
27
+ ## reveal
28
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
29
+ - mean_success: 0.556
30
+ - visibility_integral: 29.509
31
+ - corridor_availability: 0.861
32
+ - reocclusion_rate: 0.000
33
+ - persistence_horizon_mae: 2.366
34
+ - disturbance_cost: 0.470
35
+ - foliage_proxy_success: 0.417
36
+ - bag_proxy_success: 0.583
37
+ - cloth_proxy_success: 0.667
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 32.95856812927458,
10
+ "corridor_availability": 0.8741476759314537,
11
+ "reocclusion_rate": 0.0006944444444444445,
12
+ "persistence_horizon_mae": 1.1703627435402033,
13
+ "disturbance_cost": 0.42908077666329014
14
+ }
15
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 32.959
7
+ - corridor_availability: 0.874
8
+ - reocclusion_rate: 0.001
9
+ - persistence_horizon_mae: 1.170
10
+ - disturbance_cost: 0.429
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction_clip": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.2916666666666667,
5
+ "bag_proxy": 0.2916666666666667,
6
+ "cloth_proxy": 0.3333333333333333
7
+ },
8
+ "mean_success": 0.3055555555555556,
9
+ "visibility_integral": 10.379729785852962,
10
+ "corridor_availability": 0.38910322284532917,
11
+ "reocclusion_rate": 0.026909722222222224,
12
+ "persistence_horizon_mae": 3.8014686041765726,
13
+ "disturbance_cost": 0.392014082081409
14
+ },
15
+ "backbone_clip": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.2916666666666667,
18
+ "bag_proxy": 0.4166666666666667,
19
+ "cloth_proxy": 0.2916666666666667
20
+ },
21
+ "mean_success": 0.3333333333333333,
22
+ "visibility_integral": 5.090670637786388,
23
+ "corridor_availability": 0.30186899772120845,
24
+ "reocclusion_rate": 0.013541666666666667,
25
+ "persistence_horizon_mae": 0.0,
26
+ "disturbance_cost": 0.36051381931045196
27
+ },
28
+ "reveal_clip": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.20833333333333334,
31
+ "bag_proxy": 0.25,
32
+ "cloth_proxy": 0.16666666666666666
33
+ },
34
+ "mean_success": 0.20833333333333334,
35
+ "visibility_integral": 48.426281129320465,
36
+ "corridor_availability": 0.8251730443702804,
37
+ "reocclusion_rate": 0.06718750000000001,
38
+ "persistence_horizon_mae": 0.9353624902194482,
39
+ "disturbance_cost": 0.709741123020649
40
+ }
41
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction_clip
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
5
+ - mean_success: 0.306
6
+ - visibility_integral: 10.380
7
+ - corridor_availability: 0.389
8
+ - reocclusion_rate: 0.027
9
+ - persistence_horizon_mae: 3.801
10
+ - disturbance_cost: 0.392
11
+ - foliage_proxy_success: 0.292
12
+ - bag_proxy_success: 0.292
13
+ - cloth_proxy_success: 0.333
14
+
15
+ ## backbone_clip
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
17
+ - mean_success: 0.333
18
+ - visibility_integral: 5.091
19
+ - corridor_availability: 0.302
20
+ - reocclusion_rate: 0.014
21
+ - persistence_horizon_mae: 0.000
22
+ - disturbance_cost: 0.361
23
+ - foliage_proxy_success: 0.292
24
+ - bag_proxy_success: 0.417
25
+ - cloth_proxy_success: 0.292
26
+
27
+ ## reveal_clip
28
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
29
+ - mean_success: 0.208
30
+ - visibility_integral: 48.426
31
+ - corridor_availability: 0.825
32
+ - reocclusion_rate: 0.067
33
+ - persistence_horizon_mae: 0.935
34
+ - disturbance_cost: 0.710
35
+ - foliage_proxy_success: 0.208
36
+ - bag_proxy_success: 0.250
37
+ - cloth_proxy_success: 0.167
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "interaction": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.5833333333333334
7
+ },
8
+ "mean_success": 0.5277777777777778,
9
+ "visibility_integral": 31.56379758318265,
10
+ "corridor_availability": 0.8745781282583872,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.0204093086471828,
13
+ "disturbance_cost": 0.4148087627771828
14
+ },
15
+ "backbone": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.4166666666666667,
18
+ "bag_proxy": 0.625,
19
+ "cloth_proxy": 0.6666666666666666
20
+ },
21
+ "mean_success": 0.5694444444444445,
22
+ "visibility_integral": 28.655961725446915,
23
+ "corridor_availability": 0.7943478326002756,
24
+ "reocclusion_rate": 0.07666819352674617,
25
+ "persistence_horizon_mae": 0.0,
26
+ "disturbance_cost": 0.3941483147856262
27
+ },
28
+ "reveal": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.4166666666666667,
31
+ "bag_proxy": 0.5833333333333334,
32
+ "cloth_proxy": 0.625
33
+ },
34
+ "mean_success": 0.5416666666666666,
35
+ "visibility_integral": 30.121625943316353,
36
+ "corridor_availability": 0.8142780106928613,
37
+ "reocclusion_rate": 0.051547468734968724,
38
+ "persistence_horizon_mae": 2.102369644222497,
39
+ "disturbance_cost": 0.42389609825073016
40
+ }
41
+ }
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## interaction
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
5
+ - mean_success: 0.528
6
+ - visibility_integral: 31.564
7
+ - corridor_availability: 0.875
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.020
10
+ - disturbance_cost: 0.415
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.583
14
+
15
+ ## backbone
16
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
17
+ - mean_success: 0.569
18
+ - visibility_integral: 28.656
19
+ - corridor_availability: 0.794
20
+ - reocclusion_rate: 0.077
21
+ - persistence_horizon_mae: 0.000
22
+ - disturbance_cost: 0.394
23
+ - foliage_proxy_success: 0.417
24
+ - bag_proxy_success: 0.625
25
+ - cloth_proxy_success: 0.667
26
+
27
+ ## reveal
28
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
29
+ - mean_success: 0.542
30
+ - visibility_integral: 30.122
31
+ - corridor_availability: 0.814
32
+ - reocclusion_rate: 0.052
33
+ - persistence_horizon_mae: 2.102
34
+ - disturbance_cost: 0.424
35
+ - foliage_proxy_success: 0.417
36
+ - bag_proxy_success: 0.583
37
+ - cloth_proxy_success: 0.625
artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "proxy": {
3
+ "losses": {
4
+ "action": 0.7160568833351135,
5
+ "phase": 1.4088108539581299,
6
+ "arm_role": 1.5109761953353882,
7
+ "support_mode": 1.3240256309509277,
8
+ "corridor": 0.6577756404876709,
9
+ "persistence": 1.7929389476776123,
10
+ "disturbance": 0.17437878251075745,
11
+ "belief": 0.5924442410469055,
12
+ "reocclusion": 0.780390202999115,
13
+ "uncertainty": 0.7132201194763184,
14
+ "world_model": 3.2925755977630615,
15
+ "planner_success": 0.7400866150856018,
16
+ "planner_risk": 0.032505519688129425,
17
+ "planner_ranking": 0.2076394408941269,
18
+ "proposal_reconstruction": 0.8539058566093445,
19
+ "proposal_success": 0.6675869226455688,
20
+ "proposal_ranking": 0.12013991177082062,
21
+ "total": 1.9776512384414673
22
+ },
23
+ "grad_norm": 3.735501527786255,
24
+ "candidate_shape": [
25
+ 2,
26
+ 4,
27
+ 4,
28
+ 14
29
+ ],
30
+ "rollout_phase_shape": [
31
+ 2,
32
+ 4,
33
+ 4,
34
+ 5
35
+ ]
36
+ },
37
+ "rlbench": {
38
+ "losses": {
39
+ "action": 0.6058900356292725,
40
+ "world_model": 0.0,
41
+ "planner_success": 0.0,
42
+ "planner_risk": 0.0,
43
+ "planner_ranking": 0.0,
44
+ "proposal_reconstruction": 0.0,
45
+ "proposal_success": 0.0,
46
+ "proposal_ranking": 0.0,
47
+ "total": 0.6058900356292725
48
+ },
49
+ "grad_norm": 2.581531286239624,
50
+ "candidate_shape": [
51
+ 2,
52
+ 4,
53
+ 4,
54
+ 14
55
+ ],
56
+ "rollout_phase_shape": [
57
+ 2,
58
+ 4,
59
+ 4,
60
+ 5
61
+ ],
62
+ "planner_enabled_for_eval": true,
63
+ "frozen_modules": [
64
+ "interaction_head",
65
+ "world_model",
66
+ "planner"
67
+ ]
68
+ },
69
+ "policy_config": {
70
+ "backbone": {
71
+ "model_name": "openai/clip-vit-base-patch32",
72
+ "hidden_dim": 64,
73
+ "max_text_tokens": 32,
74
+ "freeze_backbone": true,
75
+ "gradient_checkpointing": false,
76
+ "use_dummy_backbone": true
77
+ },
78
+ "fusion": {
79
+ "hidden_dim": 64,
80
+ "num_cameras": 3,
81
+ "num_layers": 2,
82
+ "num_heads": 4,
83
+ "ff_dim": 128,
84
+ "dropout": 0.1,
85
+ "proprio_dim": 32,
86
+ "proprio_tokens": 1
87
+ },
88
+ "memory": {
89
+ "hidden_dim": 64,
90
+ "action_dim": 14,
91
+ "history_steps": 6,
92
+ "num_layers": 2,
93
+ "dropout": 0.1,
94
+ "memory_bank_size": 4,
95
+ "num_heads": 4,
96
+ "max_history_steps": 8
97
+ },
98
+ "decoder": {
99
+ "hidden_dim": 64,
100
+ "num_heads": 4,
101
+ "num_layers": 2,
102
+ "ff_dim": 128,
103
+ "dropout": 0.1,
104
+ "chunk_size": 4,
105
+ "action_dim": 14,
106
+ "arm_action_dim": 7,
107
+ "num_candidates": 4,
108
+ "num_phases": 5,
109
+ "num_arm_roles": 4
110
+ },
111
+ "reveal_head": {
112
+ "hidden_dim": 64,
113
+ "num_support_modes": 3,
114
+ "num_approach_templates": 32,
115
+ "rollout_horizon": 3,
116
+ "belief_map_size": 32,
117
+ "field_size": 16,
118
+ "num_heads": 4,
119
+ "predict_belief_map": true,
120
+ "num_phases": 5,
121
+ "num_arm_roles": 4,
122
+ "num_interaction_tokens": 8
123
+ },
124
+ "world_model": {
125
+ "hidden_dim": 64,
126
+ "action_dim": 14,
127
+ "num_support_modes": 3,
128
+ "num_approach_templates": 32,
129
+ "rollout_horizon": 3,
130
+ "field_size": 16,
131
+ "num_heads": 4,
132
+ "num_phases": 5,
133
+ "num_arm_roles": 4,
134
+ "num_interaction_tokens": 8,
135
+ "belief_map_size": 32,
136
+ "predict_belief_map": true
137
+ },
138
+ "planner": {
139
+ "hidden_dim": 64,
140
+ "num_candidates": 4,
141
+ "action_dim": 14,
142
+ "num_support_modes": 3,
143
+ "utility_margin": 0.1,
144
+ "corridor_weight": 1.0,
145
+ "persistence_weight": 0.5,
146
+ "proposal_weight": 0.5,
147
+ "task_progress_weight": 0.75,
148
+ "disturbance_weight": 0.75,
149
+ "reocclusion_weight": 0.5,
150
+ "visibility_weight": 0.25,
151
+ "num_heads": 4,
152
+ "num_layers": 2,
153
+ "num_phases": 5,
154
+ "num_arm_roles": 4
155
+ }
156
+ }
157
+ }
code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py CHANGED
@@ -62,6 +62,7 @@ def main() -> None:
62
  texts=moved["texts"],
63
  history_images=moved.get("history_images"),
64
  history_proprio=moved.get("history_proprio"),
 
65
  plan=True,
66
  candidate_chunks_override=moved["candidate_action_chunks"],
67
  )
 
62
  texts=moved["texts"],
63
  history_images=moved.get("history_images"),
64
  history_proprio=moved.get("history_proprio"),
65
+ history_actions=moved.get("history_actions"),
66
  plan=True,
67
  candidate_chunks_override=moved["candidate_action_chunks"],
68
  )
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py CHANGED
@@ -53,7 +53,18 @@ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch
53
  policy_config = _policy_config_from_dict(checkpoint["policy_config"])
54
  trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
55
  model = build_policy(policy_config, trainer_config).to(device)
56
- model.load_state_dict(checkpoint["state_dict"])
 
 
 
 
 
 
 
 
 
 
 
57
  model.eval()
58
  return model, checkpoint
59
 
@@ -63,11 +74,13 @@ def _prepare_batch(
63
  device: torch.device,
64
  history_images: list[np.ndarray] | None = None,
65
  history_proprio: list[np.ndarray] | None = None,
 
66
  ) -> dict[str, Any]:
67
  images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
68
  proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
69
  history_images = history_images or []
70
  history_proprio = history_proprio or []
 
71
  if history_images:
72
  history_images_tensor = (
73
  torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
@@ -81,10 +94,15 @@ def _prepare_batch(
81
  history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
82
  else:
83
  history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
 
 
 
 
84
  return {
85
  "images": images.to(device),
86
  "history_images": history_images_tensor.to(device),
87
  "history_proprio": history_proprio_tensor.to(device),
 
88
  "proprio": proprio.to(device),
89
  "texts": [observation["text"]],
90
  }
@@ -109,6 +127,7 @@ def select_chunk(
109
  "images": images,
110
  "history_images": batch.get("history_images"),
111
  "history_proprio": batch.get("history_proprio"),
 
112
  "proprio": batch["proprio"],
113
  "texts": batch["texts"],
114
  }
@@ -153,6 +172,7 @@ def evaluate_model(
153
  episodes: int,
154
  resolution: int,
155
  ablation: str | None = None,
 
156
  ) -> BenchmarkMetrics:
157
  per_task_success: dict[str, float] = {}
158
  visibility_scores = []
@@ -176,6 +196,7 @@ def evaluate_model(
176
  episode_disturbance = [float(privileged_state["disturbance_cost"])]
177
  history_images: list[np.ndarray] = []
178
  history_proprio: list[np.ndarray] = []
 
179
  done = False
180
  while not done:
181
  batch = _prepare_batch(
@@ -183,20 +204,10 @@ def evaluate_model(
183
  device=device,
184
  history_images=history_images,
185
  history_proprio=history_proprio,
 
186
  )
187
  with torch.no_grad():
188
  chunk, outputs = select_chunk(model, batch, ablation=ablation)
189
- action = chunk[0, 0].detach().cpu().numpy()
190
- if history_steps > 0:
191
- if len(history_images) >= history_steps:
192
- history_images = history_images[-history_steps + 1 :]
193
- history_proprio = history_proprio[-history_steps + 1 :]
194
- history_images.append(observation["images"])
195
- history_proprio.append(observation["proprio"])
196
- observation, _, terminated, truncated, privileged_state = env.step(action)
197
- episode_visibility.append(float(privileged_state["visibility"]))
198
- episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
199
- episode_disturbance.append(float(privileged_state["disturbance_cost"]))
200
  state_output = outputs.get("interaction_state")
201
  if state_output is None:
202
  state_output = outputs.get("reveal_state")
@@ -207,7 +218,26 @@ def evaluate_model(
207
  privileged_state["persistence_horizon"],
208
  )
209
  )
210
- done = bool(terminated or truncated)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  successes.append(float(privileged_state["retrieval_success"]))
212
  visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
213
  corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
@@ -246,6 +276,7 @@ def main() -> None:
246
  parser.add_argument("--ablation", default=None)
247
  parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
248
  parser.add_argument("--proxies", nargs="*", default=None)
 
249
  args = parser.parse_args()
250
 
251
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -266,6 +297,7 @@ def main() -> None:
266
  episodes=args.episodes,
267
  resolution=resolution,
268
  ablation=args.ablation,
 
269
  )
270
  raw_metrics[label] = _metrics_to_dict(metrics)
271
  sections[label] = {
 
53
  policy_config = _policy_config_from_dict(checkpoint["policy_config"])
54
  trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
55
  model = build_policy(policy_config, trainer_config).to(device)
56
+ incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
57
+ allowed_missing = {
58
+ key
59
+ for key in incompatible.missing_keys
60
+ if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
61
+ }
62
+ missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
63
+ if missing_other or incompatible.unexpected_keys:
64
+ raise RuntimeError(
65
+ "Checkpoint load failed due to incompatible weights. "
66
+ f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
67
+ )
68
  model.eval()
69
  return model, checkpoint
70
 
 
74
  device: torch.device,
75
  history_images: list[np.ndarray] | None = None,
76
  history_proprio: list[np.ndarray] | None = None,
77
+ history_actions: list[np.ndarray] | None = None,
78
  ) -> dict[str, Any]:
79
  images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
80
  proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
81
  history_images = history_images or []
82
  history_proprio = history_proprio or []
83
+ history_actions = history_actions or []
84
  if history_images:
85
  history_images_tensor = (
86
  torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
 
94
  history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
95
  else:
96
  history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
97
+ if history_actions:
98
+ history_actions_tensor = torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).float()
99
+ else:
100
+ history_actions_tensor = torch.zeros((1, 0, 14), dtype=torch.float32)
101
  return {
102
  "images": images.to(device),
103
  "history_images": history_images_tensor.to(device),
104
  "history_proprio": history_proprio_tensor.to(device),
105
+ "history_actions": history_actions_tensor.to(device),
106
  "proprio": proprio.to(device),
107
  "texts": [observation["text"]],
108
  }
 
127
  "images": images,
128
  "history_images": batch.get("history_images"),
129
  "history_proprio": batch.get("history_proprio"),
130
+ "history_actions": batch.get("history_actions"),
131
  "proprio": batch["proprio"],
132
  "texts": batch["texts"],
133
  }
 
172
  episodes: int,
173
  resolution: int,
174
  ablation: str | None = None,
175
+ chunk_commit_steps: int | None = None,
176
  ) -> BenchmarkMetrics:
177
  per_task_success: dict[str, float] = {}
178
  visibility_scores = []
 
196
  episode_disturbance = [float(privileged_state["disturbance_cost"])]
197
  history_images: list[np.ndarray] = []
198
  history_proprio: list[np.ndarray] = []
199
+ history_actions: list[np.ndarray] = []
200
  done = False
201
  while not done:
202
  batch = _prepare_batch(
 
204
  device=device,
205
  history_images=history_images,
206
  history_proprio=history_proprio,
207
+ history_actions=history_actions,
208
  )
209
  with torch.no_grad():
210
  chunk, outputs = select_chunk(model, batch, ablation=ablation)
 
 
 
 
 
 
 
 
 
 
 
211
  state_output = outputs.get("interaction_state")
212
  if state_output is None:
213
  state_output = outputs.get("reveal_state")
 
218
  privileged_state["persistence_horizon"],
219
  )
220
  )
221
+ chunk_np = chunk[0].detach().cpu().numpy()
222
+ commit_steps = chunk_np.shape[0] if chunk_commit_steps is None else min(chunk_commit_steps, chunk_np.shape[0])
223
+ for action in chunk_np[:commit_steps]:
224
+ if history_steps > 0:
225
+ if len(history_images) >= history_steps:
226
+ history_images = history_images[-history_steps + 1 :]
227
+ history_proprio = history_proprio[-history_steps + 1 :]
228
+ history_actions = history_actions[-history_steps + 1 :]
229
+ history_images.append(observation["images"])
230
+ history_proprio.append(observation["proprio"])
231
+ history_actions.append(action.astype(np.float32))
232
+ observation, _, terminated, truncated, privileged_state = env.step(action)
233
+ episode_visibility.append(float(privileged_state["visibility"]))
234
+ episode_corridor.append(
235
+ float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())
236
+ )
237
+ episode_disturbance.append(float(privileged_state["disturbance_cost"]))
238
+ done = bool(terminated or truncated)
239
+ if done:
240
+ break
241
  successes.append(float(privileged_state["retrieval_success"]))
242
  visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
243
  corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
 
276
  parser.add_argument("--ablation", default=None)
277
  parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
278
  parser.add_argument("--proxies", nargs="*", default=None)
279
+ parser.add_argument("--chunk-commit-steps", type=int, default=0)
280
  args = parser.parse_args()
281
 
282
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
297
  episodes=args.episodes,
298
  resolution=resolution,
299
  ablation=args.ablation,
300
+ chunk_commit_steps=(None if args.chunk_commit_steps <= 0 else args.chunk_commit_steps),
301
  )
302
  raw_metrics[label] = _metrics_to_dict(metrics)
303
  sections[label] = {
code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py CHANGED
@@ -52,17 +52,66 @@ def _episode_language_goal(descriptions: Sequence[str]) -> str:
52
  return str(descriptions[0]) if descriptions else ""
53
 
54
 
55
- def _step_bimanual_chunk(task: Any, obs: Any, delta_action: np.ndarray) -> tuple[Any, float, bool]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  total_reward = 0.0
57
  done = False
58
  next_obs = obs
 
 
59
  for arm_name in ("right", "left"):
60
- env_action = single_arm_absolute_action_from_delta(next_obs, delta_action, arm_name, ignore_collisions=True)
61
- next_obs, reward, done = task.step(env_action, arm_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  total_reward += float(reward)
63
  if reward >= 1.0 or done:
64
  break
65
- return next_obs, total_reward, done
66
 
67
 
68
  def main() -> None:
@@ -78,6 +127,7 @@ def main() -> None:
78
  parser.add_argument("--allow-unsupervised-planning", action="store_true")
79
  parser.add_argument("--disable-support-mode-conditioning", action="store_true")
80
  parser.add_argument("--headless", action="store_true", default=True)
 
81
  args = parser.parse_args()
82
 
83
  checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
@@ -85,7 +135,18 @@ def main() -> None:
85
  trainer_config = _trainer_config_from_checkpoint(checkpoint)
86
  device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
87
  model = build_policy(policy_config, trainer_config).to(device)
88
- model.load_state_dict(checkpoint["state_dict"], strict=True)
 
 
 
 
 
 
 
 
 
 
 
89
  model.eval()
90
  plan_requested = bool(args.plan)
91
  plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
@@ -144,10 +205,14 @@ def main() -> None:
144
  language_goal = _episode_language_goal(descriptions)
145
  total_reward = 0.0
146
  success = 0.0
 
 
147
  history_images: list[np.ndarray] = []
148
  history_proprio: list[np.ndarray] = []
 
149
  history_steps = int(getattr(policy_config.memory, "history_steps", 0))
150
- for timestep in range(args.episode_length):
 
151
  images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
152
  proprio = torch.from_numpy(
153
  bimanual_proprio_from_obs(
@@ -164,6 +229,9 @@ def main() -> None:
164
  history_proprio_tensor = (
165
  torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
166
  )
 
 
 
167
  else:
168
  history_images_tensor = torch.zeros(
169
  (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
@@ -175,6 +243,11 @@ def main() -> None:
175
  device=device,
176
  dtype=proprio.dtype,
177
  )
 
 
 
 
 
178
  with torch.no_grad():
179
  if policy_supports_planning(trainer_config.policy_type):
180
  outputs = model(
@@ -183,6 +256,7 @@ def main() -> None:
183
  texts=[language_goal],
184
  history_images=history_images_tensor,
185
  history_proprio=history_proprio_tensor,
 
186
  plan=plan_applied,
187
  support_mode_conditioning=not args.disable_support_mode_conditioning,
188
  )
@@ -193,22 +267,40 @@ def main() -> None:
193
  texts=[language_goal],
194
  history_images=history_images_tensor,
195
  history_proprio=history_proprio_tensor,
 
196
  )
197
  chosen_chunk = outputs["action_mean"]
198
  if plan_applied and "planned_chunk" in outputs:
199
  chosen_chunk = outputs["planned_chunk"]
200
- step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
201
- if history_steps > 0:
202
- if len(history_images) >= history_steps:
203
- keep = max(history_steps - 1, 0)
204
- history_images = history_images[-keep:] if keep > 0 else []
205
- history_proprio = history_proprio[-keep:] if keep > 0 else []
206
- history_images.append(images[0].detach().cpu().numpy())
207
- history_proprio.append(proprio[0].detach().cpu().numpy())
208
- obs, reward, done = _step_bimanual_chunk(task, obs, step_action)
209
- total_reward += float(reward)
210
- if reward >= 1.0:
211
- success = 1.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  if done or success >= 1.0:
213
  break
214
  task_successes.append(success)
@@ -217,6 +309,8 @@ def main() -> None:
217
  "task_class": task_class.__name__,
218
  "successes": task_successes,
219
  "returns": task_returns,
 
 
220
  "mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
221
  "mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
222
  }
 
52
  return str(descriptions[0]) if descriptions else ""
53
 
54
 
55
+ def _noop_arm_action(obs: Any, arm_name: str) -> np.ndarray:
56
+ if arm_name == "right":
57
+ pose = np.asarray(obs.gripper_right_pose, dtype=np.float32)
58
+ gripper_open = float(obs.gripper_right_open)
59
+ elif arm_name == "left":
60
+ pose = np.asarray(obs.gripper_left_pose, dtype=np.float32)
61
+ gripper_open = float(obs.gripper_left_open)
62
+ else: # pragma: no cover - defensive guard
63
+ raise ValueError(f"Unsupported arm: {arm_name}")
64
+ return np.concatenate([pose, np.array([gripper_open, 1.0], dtype=np.float32)], axis=0)
65
+
66
+
67
+ def _scaled_single_arm_delta(delta_action: np.ndarray, arm_name: str, scale: float) -> np.ndarray:
68
+ scaled = np.asarray(delta_action, dtype=np.float32).copy()
69
+ arm_index = {"right": 0, "left": 1}[arm_name]
70
+ offset = arm_index * 7
71
+ scaled[offset : offset + 6] *= float(scale)
72
+ return scaled
73
+
74
+
75
+ def _step_bimanual_chunk(task: Any, obs: Any, delta_action: np.ndarray) -> tuple[Any, float, bool, int, int]:
76
  total_reward = 0.0
77
  done = False
78
  next_obs = obs
79
+ recovered_steps = 0
80
+ noop_fallbacks = 0
81
  for arm_name in ("right", "left"):
82
+ reward = 0.0
83
+ last_error: Exception | None = None
84
+ stepped = False
85
+ for scale in (1.0, 0.5, 0.25, 0.1):
86
+ try:
87
+ env_action = single_arm_absolute_action_from_delta(
88
+ next_obs,
89
+ _scaled_single_arm_delta(delta_action, arm_name, scale),
90
+ arm_name,
91
+ ignore_collisions=True,
92
+ )
93
+ next_obs, reward, done = task.step(env_action, arm_name)
94
+ if scale < 1.0:
95
+ recovered_steps += 1
96
+ stepped = True
97
+ break
98
+ except Exception as exc: # pragma: no cover - live RLBench failure path
99
+ last_error = exc
100
+ if not stepped:
101
+ try:
102
+ next_obs, reward, done = task.step(_noop_arm_action(next_obs, arm_name), arm_name)
103
+ noop_fallbacks += 1
104
+ stepped = True
105
+ except Exception as exc: # pragma: no cover - live RLBench failure path
106
+ last_error = exc
107
+ if not stepped:
108
+ if last_error is not None:
109
+ raise last_error
110
+ raise RuntimeError(f"Failed to step arm '{arm_name}' for unknown reasons.")
111
  total_reward += float(reward)
112
  if reward >= 1.0 or done:
113
  break
114
+ return next_obs, total_reward, done, recovered_steps, noop_fallbacks
115
 
116
 
117
  def main() -> None:
 
127
  parser.add_argument("--allow-unsupervised-planning", action="store_true")
128
  parser.add_argument("--disable-support-mode-conditioning", action="store_true")
129
  parser.add_argument("--headless", action="store_true", default=True)
130
+ parser.add_argument("--chunk-commit-steps", type=int, default=0)
131
  args = parser.parse_args()
132
 
133
  checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
 
135
  trainer_config = _trainer_config_from_checkpoint(checkpoint)
136
  device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
137
  model = build_policy(policy_config, trainer_config).to(device)
138
+ incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
139
+ allowed_missing = {
140
+ key
141
+ for key in incompatible.missing_keys
142
+ if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
143
+ }
144
+ missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
145
+ if missing_other or incompatible.unexpected_keys:
146
+ raise RuntimeError(
147
+ "Checkpoint load failed due to incompatible weights. "
148
+ f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
149
+ )
150
  model.eval()
151
  plan_requested = bool(args.plan)
152
  plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
 
205
  language_goal = _episode_language_goal(descriptions)
206
  total_reward = 0.0
207
  success = 0.0
208
+ episode_recoveries = 0
209
+ episode_noop_fallbacks = 0
210
  history_images: list[np.ndarray] = []
211
  history_proprio: list[np.ndarray] = []
212
+ history_actions: list[np.ndarray] = []
213
  history_steps = int(getattr(policy_config.memory, "history_steps", 0))
214
+ timestep = 0
215
+ while timestep < args.episode_length:
216
  images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
217
  proprio = torch.from_numpy(
218
  bimanual_proprio_from_obs(
 
229
  history_proprio_tensor = (
230
  torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
231
  )
232
+ history_actions_tensor = (
233
+ torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).to(device)
234
+ )
235
  else:
236
  history_images_tensor = torch.zeros(
237
  (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
 
243
  device=device,
244
  dtype=proprio.dtype,
245
  )
246
+ history_actions_tensor = torch.zeros(
247
+ (1, 0, policy_config.decoder.action_dim),
248
+ device=device,
249
+ dtype=proprio.dtype,
250
+ )
251
  with torch.no_grad():
252
  if policy_supports_planning(trainer_config.policy_type):
253
  outputs = model(
 
256
  texts=[language_goal],
257
  history_images=history_images_tensor,
258
  history_proprio=history_proprio_tensor,
259
+ history_actions=history_actions_tensor,
260
  plan=plan_applied,
261
  support_mode_conditioning=not args.disable_support_mode_conditioning,
262
  )
 
267
  texts=[language_goal],
268
  history_images=history_images_tensor,
269
  history_proprio=history_proprio_tensor,
270
+ history_actions=history_actions_tensor,
271
  )
272
  chosen_chunk = outputs["action_mean"]
273
  if plan_applied and "planned_chunk" in outputs:
274
  chosen_chunk = outputs["planned_chunk"]
275
+ chunk_np = chosen_chunk[0].detach().float().cpu().numpy()
276
+ commit_steps = chunk_np.shape[0] if args.chunk_commit_steps <= 0 else min(args.chunk_commit_steps, chunk_np.shape[0])
277
+ done = False
278
+ for step_action in chunk_np[:commit_steps]:
279
+ live_images = stack_live_rgb_obs(obs, resolution=args.resolution).detach().cpu().numpy()
280
+ live_proprio = bimanual_proprio_from_obs(
281
+ obs,
282
+ timestep=timestep,
283
+ episode_length=args.episode_length,
284
+ target_dim=policy_config.fusion.proprio_dim,
285
+ ).astype(np.float32)
286
+ if history_steps > 0:
287
+ if len(history_images) >= history_steps:
288
+ keep = max(history_steps - 1, 0)
289
+ history_images = history_images[-keep:] if keep > 0 else []
290
+ history_proprio = history_proprio[-keep:] if keep > 0 else []
291
+ history_actions = history_actions[-keep:] if keep > 0 else []
292
+ history_images.append(live_images)
293
+ history_proprio.append(live_proprio)
294
+ history_actions.append(step_action.astype(np.float32))
295
+ obs, reward, done, recovered_steps, noop_fallbacks = _step_bimanual_chunk(task, obs, step_action)
296
+ episode_recoveries += recovered_steps
297
+ episode_noop_fallbacks += noop_fallbacks
298
+ total_reward += float(reward)
299
+ timestep += 1
300
+ if reward >= 1.0:
301
+ success = 1.0
302
+ if done or success >= 1.0 or timestep >= args.episode_length:
303
+ break
304
  if done or success >= 1.0:
305
  break
306
  task_successes.append(success)
 
309
  "task_class": task_class.__name__,
310
  "successes": task_successes,
311
  "returns": task_returns,
312
+ "path_recoveries": episode_recoveries if args.episodes_per_task == 1 else None,
313
+ "noop_fallbacks": episode_noop_fallbacks if args.episodes_per_task == 1 else None,
314
  "mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
315
  "mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
316
  }
code/reveal_vla_bimanual/models/backbones.py CHANGED
@@ -48,7 +48,7 @@ class FrozenVLBackbone(nn.Module):
48
  else:
49
  from transformers import AutoTokenizer, CLIPModel
50
 
51
- clip_model = CLIPModel.from_pretrained(config.model_name)
52
  self.vision_model = clip_model.vision_model
53
  self.text_model = clip_model.text_model
54
  self.visual_projection = clip_model.visual_projection
 
48
  else:
49
  from transformers import AutoTokenizer, CLIPModel
50
 
51
+ clip_model = CLIPModel.from_pretrained(config.model_name, use_safetensors=True)
52
  self.vision_model = clip_model.vision_model
53
  self.text_model = clip_model.text_model
54
  self.visual_projection = clip_model.visual_projection
code/reveal_vla_bimanual/models/observation_memory.py CHANGED
@@ -9,6 +9,7 @@ from torch import Tensor, nn
9
  @dataclass
10
  class ObservationMemoryConfig:
11
  hidden_dim: int = 512
 
12
  history_steps: int = 2
13
  num_layers: int = 1
14
  dropout: float = 0.1
@@ -33,6 +34,11 @@ class ObservationMemory(nn.Module):
33
  nn.Linear(config.hidden_dim, config.hidden_dim),
34
  nn.GELU(),
35
  )
 
 
 
 
 
36
  self.uncertainty_head = nn.Sequential(
37
  nn.LayerNorm(config.hidden_dim),
38
  nn.Linear(config.hidden_dim, 1),
@@ -42,10 +48,14 @@ class ObservationMemory(nn.Module):
42
  self,
43
  scene_tokens: Tensor,
44
  history_scene_tokens: Tensor | None = None,
 
45
  ) -> dict[str, Tensor]:
46
  pooled_current = scene_tokens.mean(dim=1, keepdim=True)
47
  if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
48
  history_pooled = history_scene_tokens.mean(dim=2)
 
 
 
49
  sequence = torch.cat([history_pooled, pooled_current], dim=1)
50
  else:
51
  sequence = pooled_current
@@ -94,11 +104,24 @@ class InteractionObservationMemory(nn.Module):
94
  nn.Linear(config.hidden_dim, config.hidden_dim),
95
  nn.GELU(),
96
  )
 
 
 
 
 
97
  self.uncertainty_head = nn.Sequential(
98
  nn.LayerNorm(config.hidden_dim),
99
  nn.Linear(config.hidden_dim, 1),
100
  )
101
 
 
 
 
 
 
 
 
 
102
  def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
103
  if history_scene_tokens is None or history_scene_tokens.numel() == 0:
104
  return history_scene_tokens
@@ -110,11 +133,21 @@ class InteractionObservationMemory(nn.Module):
110
  self,
111
  scene_tokens: Tensor,
112
  history_scene_tokens: Tensor | None = None,
 
113
  ) -> dict[str, Tensor]:
114
  pooled_current = scene_tokens.mean(dim=1, keepdim=True)
115
  history_scene_tokens = self._truncate_history(history_scene_tokens)
116
  if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
117
  history_pooled = history_scene_tokens.mean(dim=2)
 
 
 
 
 
 
 
 
 
118
  sequence = torch.cat([history_pooled, pooled_current], dim=1)
119
  else:
120
  sequence = pooled_current
@@ -126,11 +159,13 @@ class InteractionObservationMemory(nn.Module):
126
  )
127
  encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
128
  batch_size = encoded.shape[0]
129
- queries = self.bank_queries.unsqueeze(0).expand(batch_size, -1, -1)
 
 
130
  bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
131
  bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
132
- projected_bank = self.token_proj(bank_tokens)
133
- pooled_bank = projected_bank.mean(dim=1)
134
  return {
135
  "memory_sequence": encoded,
136
  "memory_state": encoded[:, -1],
 
9
  @dataclass
10
  class ObservationMemoryConfig:
11
  hidden_dim: int = 512
12
+ action_dim: int = 14
13
  history_steps: int = 2
14
  num_layers: int = 1
15
  dropout: float = 0.1
 
34
  nn.Linear(config.hidden_dim, config.hidden_dim),
35
  nn.GELU(),
36
  )
37
+ self.action_proj = nn.Sequential(
38
+ nn.LayerNorm(config.action_dim),
39
+ nn.Linear(config.action_dim, config.hidden_dim),
40
+ nn.GELU(),
41
+ )
42
  self.uncertainty_head = nn.Sequential(
43
  nn.LayerNorm(config.hidden_dim),
44
  nn.Linear(config.hidden_dim, 1),
 
48
  self,
49
  scene_tokens: Tensor,
50
  history_scene_tokens: Tensor | None = None,
51
+ history_actions: Tensor | None = None,
52
  ) -> dict[str, Tensor]:
53
  pooled_current = scene_tokens.mean(dim=1, keepdim=True)
54
  if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
55
  history_pooled = history_scene_tokens.mean(dim=2)
56
+ if history_actions is not None and history_actions.numel() > 0:
57
+ history_action_tokens = self.action_proj(history_actions[:, -history_pooled.shape[1] :])
58
+ history_pooled = history_pooled + history_action_tokens
59
  sequence = torch.cat([history_pooled, pooled_current], dim=1)
60
  else:
61
  sequence = pooled_current
 
104
  nn.Linear(config.hidden_dim, config.hidden_dim),
105
  nn.GELU(),
106
  )
107
+ self.action_proj = nn.Sequential(
108
+ nn.LayerNorm(config.action_dim),
109
+ nn.Linear(config.action_dim, config.hidden_dim),
110
+ nn.GELU(),
111
+ )
112
  self.uncertainty_head = nn.Sequential(
113
  nn.LayerNorm(config.hidden_dim),
114
  nn.Linear(config.hidden_dim, 1),
115
  )
116
 
117
+ def _recency_weights(self, length: int, device: torch.device, dtype: torch.dtype) -> Tensor:
118
+ if length <= 0:
119
+ return torch.zeros((0,), device=device, dtype=dtype)
120
+ positions = torch.arange(length, device=device, dtype=dtype)
121
+ distances = (length - 1) - positions
122
+ weights = torch.exp(-0.5 * distances)
123
+ return weights / weights.sum().clamp_min(1e-6)
124
+
125
  def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
126
  if history_scene_tokens is None or history_scene_tokens.numel() == 0:
127
  return history_scene_tokens
 
133
  self,
134
  scene_tokens: Tensor,
135
  history_scene_tokens: Tensor | None = None,
136
+ history_actions: Tensor | None = None,
137
  ) -> dict[str, Tensor]:
138
  pooled_current = scene_tokens.mean(dim=1, keepdim=True)
139
  history_scene_tokens = self._truncate_history(history_scene_tokens)
140
  if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
141
  history_pooled = history_scene_tokens.mean(dim=2)
142
+ if history_actions is not None and history_actions.numel() > 0:
143
+ truncated_actions = history_actions[:, -history_pooled.shape[1] :]
144
+ history_pooled = history_pooled + self.action_proj(truncated_actions)
145
+ recency_weights = self._recency_weights(
146
+ history_pooled.shape[1],
147
+ device=history_pooled.device,
148
+ dtype=history_pooled.dtype,
149
+ ).view(1, -1, 1)
150
+ history_pooled = history_pooled * recency_weights * float(history_pooled.shape[1])
151
  sequence = torch.cat([history_pooled, pooled_current], dim=1)
152
  else:
153
  sequence = pooled_current
 
159
  )
160
  encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
161
  batch_size = encoded.shape[0]
162
+ recent_window = min(max(1, self.config.memory_bank_size // 2), encoded.shape[1])
163
+ recent_summary = encoded[:, -recent_window:].mean(dim=1, keepdim=True)
164
+ queries = self.bank_queries.unsqueeze(0).expand(batch_size, -1, -1) + recent_summary
165
  bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
166
  bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
167
+ projected_bank = self.token_proj(bank_tokens + recent_summary)
168
+ pooled_bank = projected_bank.mean(dim=1) + 0.25 * recent_summary.squeeze(1)
169
  return {
170
  "memory_sequence": encoded,
171
  "memory_state": encoded[:, -1],
code/reveal_vla_bimanual/models/policy.py CHANGED
@@ -111,6 +111,7 @@ class BackboneOnlyPolicy(nn.Module):
111
  language_tokens: dict[str, Tensor] | None = None,
112
  history_images: Tensor | None = None,
113
  history_proprio: Tensor | None = None,
 
114
  ) -> dict[str, Tensor]:
115
  scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
116
  history_scene_tokens = self.encode_history(
@@ -119,7 +120,11 @@ class BackboneOnlyPolicy(nn.Module):
119
  texts=texts,
120
  language_tokens=language_tokens,
121
  )
122
- memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
 
 
 
 
123
  decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
124
  decoded["scene_tokens"] = scene_tokens
125
  decoded["history_scene_tokens"] = history_scene_tokens
@@ -142,6 +147,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
142
  language_tokens: dict[str, Tensor] | None = None,
143
  history_images: Tensor | None = None,
144
  history_proprio: Tensor | None = None,
 
145
  plan: bool = True,
146
  support_mode_conditioning: bool = True,
147
  candidate_chunks_override: Tensor | None = None,
@@ -153,6 +159,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
153
  language_tokens=language_tokens,
154
  history_images=history_images,
155
  history_proprio=history_proprio,
 
156
  )
157
  reveal_state = self.reveal_head(
158
  outputs["scene_tokens"],
@@ -232,6 +239,7 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
232
  language_tokens: dict[str, Tensor] | None = None,
233
  history_images: Tensor | None = None,
234
  history_proprio: Tensor | None = None,
 
235
  plan: bool = True,
236
  support_mode_conditioning: bool = True,
237
  candidate_chunks_override: Tensor | None = None,
@@ -248,7 +256,11 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
248
  )
249
  if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
250
  history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
251
- memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
 
 
 
 
252
 
253
  interaction_state = None
254
  if use_interaction_head:
 
111
  language_tokens: dict[str, Tensor] | None = None,
112
  history_images: Tensor | None = None,
113
  history_proprio: Tensor | None = None,
114
+ history_actions: Tensor | None = None,
115
  ) -> dict[str, Tensor]:
116
  scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
117
  history_scene_tokens = self.encode_history(
 
120
  texts=texts,
121
  language_tokens=language_tokens,
122
  )
123
+ memory_output = self.memory(
124
+ scene_tokens,
125
+ history_scene_tokens=history_scene_tokens,
126
+ history_actions=history_actions,
127
+ )
128
  decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
129
  decoded["scene_tokens"] = scene_tokens
130
  decoded["history_scene_tokens"] = history_scene_tokens
 
147
  language_tokens: dict[str, Tensor] | None = None,
148
  history_images: Tensor | None = None,
149
  history_proprio: Tensor | None = None,
150
+ history_actions: Tensor | None = None,
151
  plan: bool = True,
152
  support_mode_conditioning: bool = True,
153
  candidate_chunks_override: Tensor | None = None,
 
159
  language_tokens=language_tokens,
160
  history_images=history_images,
161
  history_proprio=history_proprio,
162
+ history_actions=history_actions,
163
  )
164
  reveal_state = self.reveal_head(
165
  outputs["scene_tokens"],
 
239
  language_tokens: dict[str, Tensor] | None = None,
240
  history_images: Tensor | None = None,
241
  history_proprio: Tensor | None = None,
242
+ history_actions: Tensor | None = None,
243
  plan: bool = True,
244
  support_mode_conditioning: bool = True,
245
  candidate_chunks_override: Tensor | None = None,
 
256
  )
257
  if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
258
  history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
259
+ memory_output = self.memory(
260
+ scene_tokens,
261
+ history_scene_tokens=history_scene_tokens,
262
+ history_actions=history_actions,
263
+ )
264
 
265
  interaction_state = None
266
  if use_interaction_head:
code/reveal_vla_bimanual/sim_reveal/dataset.py CHANGED
@@ -11,7 +11,7 @@ import numpy as np
11
 
12
  from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
13
 
14
- NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v4_noleak_counterfactual"
15
  LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
16
  {
17
  "target_template",
@@ -74,6 +74,7 @@ def collect_teacher_dataset(
74
  )
75
  padded_history_render_states = []
76
  padded_history_proprio = []
 
77
  history_count = min(history_steps, len(history_buffer))
78
  pad_count = history_steps - history_count
79
  if history_count > 0:
@@ -83,9 +84,11 @@ def collect_teacher_dataset(
83
  for _ in range(pad_count):
84
  padded_history_render_states.append(env.render_state(privileged_state))
85
  padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
 
86
  for item in recent_history:
87
  padded_history_render_states.append(item["render_state"])
88
  padded_history_proprio.append(item["proprio"])
 
89
  samples.append(
90
  {
91
  "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
@@ -108,16 +111,21 @@ def collect_teacher_dataset(
108
  "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
109
  if padded_history_proprio
110
  else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
 
 
 
111
  "candidate_action_chunks": candidate_action_chunks.astype("float32"),
112
  **candidate_outcomes,
113
  }
114
  )
115
  proxy_samples += 1
116
- _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
 
117
  history_buffer.append(
118
  {
119
  "render_state": env.render_state(privileged_state),
120
  "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
 
121
  }
122
  )
123
  if terminated:
@@ -203,6 +211,13 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
203
  "images": stacked,
204
  "history_images": history_stacked,
205
  "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
 
 
 
 
 
 
 
206
  "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
207
  "texts": sample["language_goal"],
208
  "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
 
11
 
12
  from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
13
 
14
+ NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v5_noleak_actionhist"
15
  LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
16
  {
17
  "target_template",
 
74
  )
75
  padded_history_render_states = []
76
  padded_history_proprio = []
77
+ padded_history_actions = []
78
  history_count = min(history_steps, len(history_buffer))
79
  pad_count = history_steps - history_count
80
  if history_count > 0:
 
84
  for _ in range(pad_count):
85
  padded_history_render_states.append(env.render_state(privileged_state))
86
  padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
87
+ padded_history_actions.append(np.zeros((action_chunk.shape[-1],), dtype=np.float32))
88
  for item in recent_history:
89
  padded_history_render_states.append(item["render_state"])
90
  padded_history_proprio.append(item["proprio"])
91
+ padded_history_actions.append(item["action"])
92
  samples.append(
93
  {
94
  "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
 
111
  "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
112
  if padded_history_proprio
113
  else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
114
+ "history_actions": np.stack(padded_history_actions, axis=0).astype("float32")
115
+ if padded_history_actions
116
+ else np.zeros((0, action_chunk.shape[-1]), dtype=np.float32),
117
  "candidate_action_chunks": candidate_action_chunks.astype("float32"),
118
  **candidate_outcomes,
119
  }
120
  )
121
  proxy_samples += 1
122
+ executed_action = env.teacher_action().astype("float32")
123
+ _, _, terminated, truncated, privileged_state = env.step(executed_action)
124
  history_buffer.append(
125
  {
126
  "render_state": env.render_state(privileged_state),
127
  "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
128
+ "action": executed_action,
129
  }
130
  )
131
  if terminated:
 
211
  "images": stacked,
212
  "history_images": history_stacked,
213
  "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
214
+ "history_actions": torch.as_tensor(
215
+ sample.get(
216
+ "history_actions",
217
+ np.zeros((len(sample.get("history_render_states", [])), sample["action_chunk"].shape[-1]), dtype=np.float32),
218
+ ),
219
+ dtype=torch.float32,
220
+ ),
221
  "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
222
  "texts": sample["language_goal"],
223
  "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
code/reveal_vla_bimanual/sim_rlbench/dataset.py CHANGED
@@ -14,6 +14,16 @@ from sim_rlbench.camera_spec import canonical_to_upstream_camera
14
 
15
 
16
  THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
@@ -196,8 +206,8 @@ def single_arm_absolute_action_from_delta(
196
  arm_index = {"right": 0, "left": 1}[arm_name]
197
  current_pose = _arm_pose(current_obs, arm_name)
198
  offset = arm_index * 7
199
- delta_position = delta_action[offset : offset + 3]
200
- delta_rotvec = delta_action[offset + 3 : offset + 6]
201
  gripper = float(delta_action[offset + 6] > 0.5)
202
  current_quat = _xyzw_to_wxyz(current_pose[3:])
203
  delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
@@ -379,6 +389,20 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
379
  )
380
  return torch.stack(history_features, dim=0)
381
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
382
  def __getitem__(self, index: int) -> dict[str, Any]:
383
  sample = self._samples[index]
384
  episode = self._episodes[sample.episode_key]
@@ -388,6 +412,7 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
388
  "images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
389
  "history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
390
  "history_proprio": self._history_proprio(observations, sample.step_index),
 
391
  "proprio": torch.from_numpy(
392
  bimanual_proprio_from_obs(
393
  obs,
 
14
 
15
 
16
  THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
17
+ MAX_RLBENCH_POSITION_DELTA = 0.05
18
+ MAX_RLBENCH_ROTATION_DELTA = 0.35
19
+
20
+
21
+ def _clip_vector_norm(vector: Sequence[float], max_norm: float) -> np.ndarray:
22
+ clipped = np.asarray(vector, dtype=np.float32)
23
+ norm = float(np.linalg.norm(clipped))
24
+ if norm <= max_norm or norm < 1e-8:
25
+ return clipped
26
+ return clipped * (max_norm / norm)
27
 
28
 
29
  def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
 
206
  arm_index = {"right": 0, "left": 1}[arm_name]
207
  current_pose = _arm_pose(current_obs, arm_name)
208
  offset = arm_index * 7
209
+ delta_position = _clip_vector_norm(delta_action[offset : offset + 3], max_norm=MAX_RLBENCH_POSITION_DELTA)
210
+ delta_rotvec = _clip_vector_norm(delta_action[offset + 3 : offset + 6], max_norm=MAX_RLBENCH_ROTATION_DELTA)
211
  gripper = float(delta_action[offset + 6] > 0.5)
212
  current_quat = _xyzw_to_wxyz(current_pose[3:])
213
  delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
 
389
  )
390
  return torch.stack(history_features, dim=0)
391
 
392
+ def _history_actions(self, observations: Any, step_index: int) -> torch.Tensor:
393
+ if self.history_steps <= 0:
394
+ return torch.zeros((0, 14), dtype=torch.float32)
395
+ history_actions = []
396
+ for history_offset in range(self.history_steps, 0, -1):
397
+ history_index = step_index - history_offset
398
+ if history_index < 0:
399
+ history_actions.append(torch.zeros((14,), dtype=torch.float32))
400
+ else:
401
+ history_actions.append(
402
+ torch.from_numpy(delta_action_from_transition(observations[history_index], observations[history_index + 1]))
403
+ )
404
+ return torch.stack(history_actions, dim=0)
405
+
406
  def __getitem__(self, index: int) -> dict[str, Any]:
407
  sample = self._samples[index]
408
  episode = self._episodes[sample.episode_key]
 
412
  "images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
413
  "history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
414
  "history_proprio": self._history_proprio(observations, sample.step_index),
415
+ "history_actions": self._history_actions(observations, sample.step_index),
416
  "proprio": torch.from_numpy(
417
  bimanual_proprio_from_obs(
418
  obs,
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_actionhist
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 96
9
+ train_episodes_per_proxy: 48
10
+ val_episodes_per_proxy: 16
11
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
12
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
13
+ rebuild_dataset: true
14
+ chunk_horizon: 8
15
+ rollout_horizon: 5
16
+ history_steps: 6
17
+ planner_candidates: 8
18
+ seed: 13
19
+
20
+ optim:
21
+ epochs: 10
22
+ batch_size: 16
23
+ num_workers: 0
24
+ lr: 0.001
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+
38
+ policy:
39
+ backbone:
40
+ model_name: openai/clip-vit-base-patch32
41
+ hidden_dim: 128
42
+ max_text_tokens: 32
43
+ freeze_backbone: true
44
+ gradient_checkpointing: false
45
+ use_dummy_backbone: true
46
+ fusion:
47
+ hidden_dim: 128
48
+ num_cameras: 3
49
+ num_layers: 2
50
+ num_heads: 4
51
+ ff_dim: 256
52
+ dropout: 0.1
53
+ proprio_dim: 32
54
+ proprio_tokens: 1
55
+ memory:
56
+ hidden_dim: 128
57
+ action_dim: 14
58
+ history_steps: 6
59
+ num_layers: 2
60
+ dropout: 0.1
61
+ memory_bank_size: 4
62
+ num_heads: 4
63
+ max_history_steps: 8
64
+ decoder:
65
+ hidden_dim: 128
66
+ num_heads: 4
67
+ num_layers: 2
68
+ ff_dim: 256
69
+ dropout: 0.1
70
+ chunk_size: 8
71
+ action_dim: 14
72
+ arm_action_dim: 7
73
+ num_candidates: 8
74
+ num_phases: 5
75
+ num_arm_roles: 4
76
+ reveal_head:
77
+ hidden_dim: 128
78
+ num_support_modes: 3
79
+ num_approach_templates: 32
80
+ rollout_horizon: 5
81
+ belief_map_size: 32
82
+ field_size: 16
83
+ num_heads: 4
84
+ predict_belief_map: true
85
+ num_phases: 5
86
+ num_arm_roles: 4
87
+ num_interaction_tokens: 8
88
+ world_model:
89
+ hidden_dim: 128
90
+ action_dim: 14
91
+ num_support_modes: 3
92
+ num_approach_templates: 32
93
+ rollout_horizon: 5
94
+ field_size: 16
95
+ num_heads: 4
96
+ num_phases: 5
97
+ num_arm_roles: 4
98
+ num_interaction_tokens: 8
99
+ planner:
100
+ hidden_dim: 128
101
+ num_candidates: 8
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ utility_margin: 0.1
105
+ num_heads: 4
106
+ num_layers: 2
107
+ num_phases: 5
108
+ num_arm_roles: 4
109
+
110
+ loss_weights:
111
+ action: 1.0
112
+ phase: 0.15
113
+ arm_role: 0.2
114
+ support_mode: 0.15
115
+ corridor: 0.2
116
+ persistence: 0.1
117
+ disturbance: 0.1
118
+ world_model: 0.25
119
+ belief: 0.05
120
+ planner_success: 0.2
121
+ planner_risk: 0.1
122
+ planner_ranking: 0.1
123
+ proposal_reconstruction: 0.2
124
+ proposal_success: 0.1
125
+ proposal_ranking: 0.1
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_actionhist_smoke
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 64
9
+ train_episodes_per_proxy: 6
10
+ val_episodes_per_proxy: 2
11
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
12
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
13
+ rebuild_dataset: true
14
+ chunk_horizon: 4
15
+ rollout_horizon: 3
16
+ history_steps: 6
17
+ planner_candidates: 4
18
+ seed: 13
19
+
20
+ optim:
21
+ epochs: 4
22
+ batch_size: 8
23
+ num_workers: 0
24
+ lr: 0.001
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+
38
+ policy:
39
+ backbone:
40
+ model_name: openai/clip-vit-base-patch32
41
+ hidden_dim: 64
42
+ max_text_tokens: 32
43
+ freeze_backbone: true
44
+ gradient_checkpointing: false
45
+ use_dummy_backbone: true
46
+ fusion:
47
+ hidden_dim: 64
48
+ num_cameras: 3
49
+ num_layers: 2
50
+ num_heads: 4
51
+ ff_dim: 128
52
+ dropout: 0.1
53
+ proprio_dim: 32
54
+ proprio_tokens: 1
55
+ memory:
56
+ hidden_dim: 64
57
+ action_dim: 14
58
+ history_steps: 6
59
+ num_layers: 2
60
+ dropout: 0.1
61
+ memory_bank_size: 4
62
+ num_heads: 4
63
+ max_history_steps: 8
64
+ decoder:
65
+ hidden_dim: 64
66
+ num_heads: 4
67
+ num_layers: 2
68
+ ff_dim: 128
69
+ dropout: 0.1
70
+ chunk_size: 4
71
+ action_dim: 14
72
+ arm_action_dim: 7
73
+ num_candidates: 4
74
+ num_phases: 5
75
+ num_arm_roles: 4
76
+ reveal_head:
77
+ hidden_dim: 64
78
+ num_support_modes: 3
79
+ num_approach_templates: 32
80
+ rollout_horizon: 3
81
+ belief_map_size: 32
82
+ field_size: 16
83
+ num_heads: 4
84
+ predict_belief_map: true
85
+ num_phases: 5
86
+ num_arm_roles: 4
87
+ num_interaction_tokens: 8
88
+ world_model:
89
+ hidden_dim: 64
90
+ action_dim: 14
91
+ num_support_modes: 3
92
+ num_approach_templates: 32
93
+ rollout_horizon: 3
94
+ field_size: 16
95
+ num_heads: 4
96
+ num_phases: 5
97
+ num_arm_roles: 4
98
+ num_interaction_tokens: 8
99
+ planner:
100
+ hidden_dim: 64
101
+ num_candidates: 4
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ utility_margin: 0.1
105
+ num_heads: 4
106
+ num_layers: 2
107
+ num_phases: 5
108
+ num_arm_roles: 4
109
+
110
+ loss_weights:
111
+ action: 1.0
112
+ phase: 0.15
113
+ arm_role: 0.2
114
+ support_mode: 0.15
115
+ corridor: 0.2
116
+ persistence: 0.1
117
+ disturbance: 0.1
118
+ world_model: 0.25
119
+ belief: 0.05
120
+ planner_success: 0.2
121
+ planner_risk: 0.1
122
+ planner_ranking: 0.1
123
+ proposal_reconstruction: 0.2
124
+ proposal_success: 0.1
125
+ proposal_ranking: 0.1
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_clip_actionhist
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
10
+ resolution: 224
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
14
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
15
+ rebuild_dataset: false
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ history_steps: 6
19
+ planner_candidates: 8
20
+ seed: 7
21
+
22
+ optim:
23
+ epochs: 4
24
+ batch_size: 2
25
+ num_workers: 0
26
+ lr: 0.0003
27
+ weight_decay: 0.0001
28
+
29
+ trainer:
30
+ policy_type: interaction_state
31
+ use_bf16: true
32
+ grad_clip_norm: 1.0
33
+ freeze_backbone: true
34
+ gradient_checkpointing: false
35
+ plan_during_train: true
36
+ plan_during_eval: true
37
+ support_mode_conditioning: true
38
+ planner_mode: trainable
39
+
40
+ policy:
41
+ backbone:
42
+ model_name: openai/clip-vit-base-patch32
43
+ hidden_dim: 512
44
+ max_text_tokens: 32
45
+ freeze_backbone: true
46
+ gradient_checkpointing: false
47
+ use_dummy_backbone: false
48
+ fusion:
49
+ hidden_dim: 512
50
+ num_cameras: 3
51
+ num_layers: 4
52
+ num_heads: 8
53
+ ff_dim: 2048
54
+ dropout: 0.1
55
+ proprio_dim: 32
56
+ proprio_tokens: 1
57
+ memory:
58
+ hidden_dim: 512
59
+ action_dim: 14
60
+ history_steps: 6
61
+ num_layers: 2
62
+ dropout: 0.1
63
+ memory_bank_size: 4
64
+ num_heads: 8
65
+ max_history_steps: 8
66
+ decoder:
67
+ hidden_dim: 512
68
+ num_heads: 8
69
+ num_layers: 4
70
+ ff_dim: 2048
71
+ dropout: 0.1
72
+ chunk_size: 8
73
+ action_dim: 14
74
+ arm_action_dim: 7
75
+ num_candidates: 8
76
+ num_phases: 5
77
+ num_arm_roles: 4
78
+ reveal_head:
79
+ hidden_dim: 512
80
+ num_support_modes: 3
81
+ num_approach_templates: 32
82
+ rollout_horizon: 5
83
+ belief_map_size: 32
84
+ field_size: 16
85
+ num_heads: 8
86
+ predict_belief_map: true
87
+ num_phases: 5
88
+ num_arm_roles: 4
89
+ num_interaction_tokens: 8
90
+ world_model:
91
+ hidden_dim: 512
92
+ action_dim: 14
93
+ num_support_modes: 3
94
+ num_approach_templates: 32
95
+ rollout_horizon: 5
96
+ field_size: 16
97
+ num_heads: 8
98
+ num_phases: 5
99
+ num_arm_roles: 4
100
+ num_interaction_tokens: 8
101
+ belief_map_size: 32
102
+ predict_belief_map: true
103
+ planner:
104
+ hidden_dim: 512
105
+ num_candidates: 8
106
+ action_dim: 14
107
+ num_support_modes: 3
108
+ utility_margin: 0.1
109
+ num_heads: 8
110
+ num_layers: 2
111
+ num_phases: 5
112
+ num_arm_roles: 4
113
+
114
+ loss_weights:
115
+ action: 1.0
116
+ phase: 0.1
117
+ arm_role: 0.15
118
+ support_mode: 0.1
119
+ corridor: 0.15
120
+ persistence: 0.05
121
+ disturbance: 0.05
122
+ world_model: 0.2
123
+ belief: 0.05
124
+ planner_success: 0.25
125
+ planner_risk: 0.1
126
+ planner_ranking: 0.2
127
+ proposal_reconstruction: 0.1
128
+ proposal_success: 0.15
129
+ proposal_ranking: 0.2
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_recency_oracleft
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
3
+ device: cuda
4
+ seed: 13
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
6
+ init_strict: true
7
+
8
+ data:
9
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
10
+ resolution: 96
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
14
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
15
+ rebuild_dataset: false
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ history_steps: 6
19
+ planner_candidates: 8
20
+ seed: 13
21
+
22
+ optim:
23
+ epochs: 8
24
+ batch_size: 16
25
+ num_workers: 0
26
+ lr: 0.0003
27
+ weight_decay: 0.0001
28
+
29
+ trainer:
30
+ policy_type: interaction_state
31
+ use_bf16: true
32
+ grad_clip_norm: 1.0
33
+ freeze_backbone: true
34
+ gradient_checkpointing: false
35
+ plan_during_train: true
36
+ plan_during_eval: true
37
+ support_mode_conditioning: true
38
+ planner_mode: trainable
39
+
40
+ policy:
41
+ backbone:
42
+ model_name: openai/clip-vit-base-patch32
43
+ hidden_dim: 128
44
+ max_text_tokens: 32
45
+ freeze_backbone: true
46
+ gradient_checkpointing: false
47
+ use_dummy_backbone: true
48
+ fusion:
49
+ hidden_dim: 128
50
+ num_cameras: 3
51
+ num_layers: 2
52
+ num_heads: 4
53
+ ff_dim: 256
54
+ dropout: 0.1
55
+ proprio_dim: 32
56
+ proprio_tokens: 1
57
+ memory:
58
+ hidden_dim: 128
59
+ action_dim: 14
60
+ history_steps: 6
61
+ num_layers: 2
62
+ dropout: 0.1
63
+ memory_bank_size: 4
64
+ num_heads: 4
65
+ max_history_steps: 8
66
+ decoder:
67
+ hidden_dim: 128
68
+ num_heads: 4
69
+ num_layers: 2
70
+ ff_dim: 256
71
+ dropout: 0.1
72
+ chunk_size: 8
73
+ action_dim: 14
74
+ arm_action_dim: 7
75
+ num_candidates: 8
76
+ num_phases: 5
77
+ num_arm_roles: 4
78
+ reveal_head:
79
+ hidden_dim: 128
80
+ num_support_modes: 3
81
+ num_approach_templates: 32
82
+ rollout_horizon: 5
83
+ belief_map_size: 32
84
+ field_size: 16
85
+ num_heads: 4
86
+ predict_belief_map: true
87
+ num_phases: 5
88
+ num_arm_roles: 4
89
+ num_interaction_tokens: 8
90
+ world_model:
91
+ hidden_dim: 128
92
+ action_dim: 14
93
+ num_support_modes: 3
94
+ num_approach_templates: 32
95
+ rollout_horizon: 5
96
+ field_size: 16
97
+ num_heads: 4
98
+ num_phases: 5
99
+ num_arm_roles: 4
100
+ num_interaction_tokens: 8
101
+ planner:
102
+ hidden_dim: 128
103
+ num_candidates: 8
104
+ action_dim: 14
105
+ num_support_modes: 3
106
+ utility_margin: 0.1
107
+ num_heads: 4
108
+ num_layers: 2
109
+ num_phases: 5
110
+ num_arm_roles: 4
111
+
112
+ loss_weights:
113
+ action: 1.0
114
+ phase: 0.1
115
+ arm_role: 0.15
116
+ support_mode: 0.1
117
+ corridor: 0.15
118
+ persistence: 0.05
119
+ disturbance: 0.05
120
+ world_model: 0.2
121
+ belief: 0.05
122
+ planner_success: 0.25
123
+ planner_risk: 0.1
124
+ planner_ranking: 0.2
125
+ proposal_reconstruction: 0.1
126
+ proposal_success: 0.15
127
+ proposal_ranking: 0.2
code/reveal_vla_bimanual/train/losses.py CHANGED
@@ -34,18 +34,48 @@ def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | N
34
  return loss.mean()
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
38
  losses = {}
39
  if "phase_logits" in pred:
40
- phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
41
- phase_target = phase_map[target["support_mode"].long()]
 
 
 
 
42
  losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
43
  else:
44
  losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
45
  if "arm_role_logits" in pred:
46
- batch_size = pred["arm_role_logits"].shape[0]
47
- role_target = torch.as_tensor([1, 2], device=pred["arm_role_logits"].device, dtype=torch.long)
48
- role_target = role_target.unsqueeze(0).expand(batch_size, -1)
49
  role_ce = F.cross_entropy(
50
  pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
51
  role_target.reshape(-1),
@@ -106,8 +136,9 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
106
  "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
107
  "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
108
  "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
 
109
  }
110
- return (
111
  F.cross_entropy(
112
  pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
113
  target_rollout["support_mode"].reshape(-1).long(),
@@ -119,6 +150,19 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
119
  + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
120
  + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
121
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
 
124
  def compute_total_loss(
@@ -161,6 +205,7 @@ def compute_total_loss(
161
  "corridor_feasible": batch["candidate_rollout_corridor_feasible"],
162
  "persistence_horizon": batch["candidate_rollout_persistence_horizon"],
163
  "disturbance_cost": batch["candidate_rollout_disturbance_cost"],
 
164
  }
165
  else:
166
  rollout_target = {
@@ -168,6 +213,7 @@ def compute_total_loss(
168
  "corridor_feasible": batch["rollout_corridor_feasible"],
169
  "persistence_horizon": batch["rollout_persistence_horizon"],
170
  "disturbance_cost": batch["rollout_disturbance_cost"],
 
171
  }
172
  world_model_loss = world_model_rollout_consistency_loss(
173
  model_output["planned_rollout"],
@@ -199,6 +245,9 @@ def compute_total_loss(
199
  ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
200
  else:
201
  ranking_loss = model_output["planner_scores"].new_tensor(0.0)
 
 
 
202
  losses["planner_success"] = success_loss
203
  losses["planner_risk"] = risk_loss
204
  losses["planner_ranking"] = ranking_loss
@@ -259,6 +308,9 @@ def compute_total_loss(
259
  ].mean()
260
  else:
261
  proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
 
 
 
262
  losses["proposal_success"] = proposal_success_loss
263
  losses["proposal_ranking"] = proposal_ranking_loss
264
  total = (
 
34
  return loss.mean()
35
 
36
 
37
+ def _command_probability(command: Tensor) -> Tensor:
38
+ return (torch.tanh(command) + 1.0) * 0.5
39
+
40
+
41
+ def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor:
42
+ open_cmd = action_chunk[..., 0]
43
+ actor_reach = _command_probability(action_chunk[..., 8])
44
+ retrieve_cmd = _command_probability(action_chunk[..., 13])
45
+
46
+ retrieve = retrieve_cmd >= 0.55
47
+ recover = open_cmd <= -0.10
48
+ reveal = open_cmd > 0.35
49
+ hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55)
50
+
51
+ phase_target = torch.zeros_like(open_cmd, dtype=torch.long)
52
+ phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target)
53
+ phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target)
54
+ phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target)
55
+ phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target)
56
+ return phase_target
57
+
58
+
59
+ def _role_targets_like(arm_role_logits: Tensor) -> Tensor:
60
+ role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long)
61
+ expand_shape = [1] * (arm_role_logits.ndim - 2) + [2]
62
+ return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1])
63
+
64
+
65
  def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
66
  losses = {}
67
  if "phase_logits" in pred:
68
+ action_chunk = target.get("action_chunk")
69
+ if action_chunk is not None:
70
+ phase_target = infer_phase_targets_from_actions(action_chunk[:, 0])
71
+ else:
72
+ phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
73
+ phase_target = phase_map[target["support_mode"].long()]
74
  losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
75
  else:
76
  losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
77
  if "arm_role_logits" in pred:
78
+ role_target = _role_targets_like(pred["arm_role_logits"])
 
 
79
  role_ce = F.cross_entropy(
80
  pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
81
  role_target.reshape(-1),
 
136
  "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
137
  "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
138
  "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
139
+ "action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]),
140
  }
141
+ loss = (
142
  F.cross_entropy(
143
  pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
144
  target_rollout["support_mode"].reshape(-1).long(),
 
150
  + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
151
  + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
152
  )
153
+ if "phase_logits" in pred_rollout:
154
+ phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"])
155
+ loss = loss + 0.5 * F.cross_entropy(
156
+ pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]),
157
+ phase_target.reshape(-1),
158
+ )
159
+ if "arm_role_logits" in pred_rollout:
160
+ role_target = _role_targets_like(pred_rollout["arm_role_logits"])
161
+ loss = loss + 0.25 * F.cross_entropy(
162
+ pred_rollout["arm_role_logits"].reshape(-1, pred_rollout["arm_role_logits"].shape[-1]),
163
+ role_target.reshape(-1),
164
+ )
165
+ return loss
166
 
167
 
168
  def compute_total_loss(
 
205
  "corridor_feasible": batch["candidate_rollout_corridor_feasible"],
206
  "persistence_horizon": batch["candidate_rollout_persistence_horizon"],
207
  "disturbance_cost": batch["candidate_rollout_disturbance_cost"],
208
+ "action_chunk": batch["candidate_action_chunks"],
209
  }
210
  else:
211
  rollout_target = {
 
213
  "corridor_feasible": batch["rollout_corridor_feasible"],
214
  "persistence_horizon": batch["rollout_persistence_horizon"],
215
  "disturbance_cost": batch["rollout_disturbance_cost"],
216
+ "action_chunk": batch["action_chunk"],
217
  }
218
  world_model_loss = world_model_rollout_consistency_loss(
219
  model_output["planned_rollout"],
 
245
  ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
246
  else:
247
  ranking_loss = model_output["planner_scores"].new_tensor(0.0)
248
+ oracle_target = utility_target.argmax(dim=-1)
249
+ oracle_loss = F.cross_entropy(model_output["planner_scores"], oracle_target)
250
+ ranking_loss = ranking_loss + 0.5 * oracle_loss
251
  losses["planner_success"] = success_loss
252
  losses["planner_risk"] = risk_loss
253
  losses["planner_ranking"] = ranking_loss
 
308
  ].mean()
309
  else:
310
  proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
311
+ proposal_oracle_target = proposal_utility.argmax(dim=-1)
312
+ proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target)
313
+ proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss
314
  losses["proposal_success"] = proposal_success_loss
315
  losses["proposal_ranking"] = proposal_ranking_loss
316
  total = (
code/reveal_vla_bimanual/train/run_experiment.py CHANGED
@@ -61,6 +61,32 @@ def _loss_weights_from_omega(cfg: Any) -> LossWeights:
61
  return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
62
 
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
65
  dataset_path = data_cfg.get(f"{split}_dataset_path")
66
  if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
@@ -138,6 +164,7 @@ def main() -> None:
138
  trainer_config = _trainer_config_from_omega(cfg.trainer)
139
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
140
  model = build_policy(policy_config, trainer_config).to(device)
 
141
  frozen_modules = apply_planner_mode(model, trainer_config)
142
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
143
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
@@ -166,6 +193,7 @@ def main() -> None:
166
  "texts": moved["texts"],
167
  "history_images": moved.get("history_images"),
168
  "history_proprio": moved.get("history_proprio"),
 
169
  }
170
  if policy_supports_planning(trainer_config.policy_type):
171
  forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
@@ -195,6 +223,7 @@ def main() -> None:
195
  "state_dict": model.state_dict(),
196
  "history": history,
197
  "data_resolution": int(cfg.data.resolution),
 
198
  },
199
  best_checkpoint,
200
  )
@@ -212,7 +241,9 @@ def main() -> None:
212
  "num_val_samples": len(val_bundle["samples"]),
213
  "planner_mode": trainer_config.planner_mode,
214
  "frozen_modules": frozen_modules,
 
215
  }
 
216
  print(json.dumps(summary, indent=2))
217
 
218
 
 
61
  return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
62
 
63
 
64
+ def _load_init_checkpoint(model: torch.nn.Module, checkpoint_path: str | None, strict: bool) -> dict[str, Any] | None:
65
+ if not checkpoint_path:
66
+ return None
67
+ checkpoint = torch.load(Path(checkpoint_path), map_location="cpu", weights_only=False)
68
+ state_dict = checkpoint["state_dict"]
69
+ filtered_state_dict = state_dict
70
+ skipped_keys: list[str] = []
71
+ if not strict:
72
+ current_state = model.state_dict()
73
+ filtered_state_dict = {}
74
+ for key, value in state_dict.items():
75
+ current_value = current_state.get(key)
76
+ if current_value is None or current_value.shape != value.shape:
77
+ skipped_keys.append(key)
78
+ continue
79
+ filtered_state_dict[key] = value
80
+ incompatible = model.load_state_dict(filtered_state_dict, strict=strict)
81
+ return {
82
+ "path": str(checkpoint_path),
83
+ "loaded_keys": len(filtered_state_dict),
84
+ "skipped_shape_mismatch_keys": skipped_keys,
85
+ "missing_keys": list(incompatible.missing_keys),
86
+ "unexpected_keys": list(incompatible.unexpected_keys),
87
+ }
88
+
89
+
90
  def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
91
  dataset_path = data_cfg.get(f"{split}_dataset_path")
92
  if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
 
164
  trainer_config = _trainer_config_from_omega(cfg.trainer)
165
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
166
  model = build_policy(policy_config, trainer_config).to(device)
167
+ init_info = _load_init_checkpoint(model, cfg.get("init_checkpoint"), bool(cfg.get("init_strict", False)))
168
  frozen_modules = apply_planner_mode(model, trainer_config)
169
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
170
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
 
193
  "texts": moved["texts"],
194
  "history_images": moved.get("history_images"),
195
  "history_proprio": moved.get("history_proprio"),
196
+ "history_actions": moved.get("history_actions"),
197
  }
198
  if policy_supports_planning(trainer_config.policy_type):
199
  forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
 
223
  "state_dict": model.state_dict(),
224
  "history": history,
225
  "data_resolution": int(cfg.data.resolution),
226
+ "init_info": init_info,
227
  },
228
  best_checkpoint,
229
  )
 
241
  "num_val_samples": len(val_bundle["samples"]),
242
  "planner_mode": trainer_config.planner_mode,
243
  "frozen_modules": frozen_modules,
244
+ "init_info": init_info,
245
  }
246
+ (output_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
247
  print(json.dumps(summary, indent=2))
248
 
249
 
code/reveal_vla_bimanual/train/run_rlbench_experiment.py CHANGED
@@ -133,6 +133,7 @@ def main() -> None:
133
  "texts": moved["texts"],
134
  "history_images": moved.get("history_images"),
135
  "history_proprio": moved.get("history_proprio"),
 
136
  }
137
  if policy_supports_planning(trainer_config.policy_type):
138
  forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
 
133
  "texts": moved["texts"],
134
  "history_images": moved.get("history_images"),
135
  "history_proprio": moved.get("history_proprio"),
136
+ "history_actions": moved.get("history_actions"),
137
  }
138
  if policy_supports_planning(trainer_config.policy_type):
139
  forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
code/reveal_vla_bimanual/train/smoke_checks.py CHANGED
@@ -139,11 +139,13 @@ def _synthetic_rlbench_batch(
139
  history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
140
  proprio = torch.rand(batch_size, 32, device=device)
141
  history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
 
142
  action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
143
  return {
144
  "images": images,
145
  "history_images": history_images,
146
  "history_proprio": history_proprio,
 
147
  "proprio": proprio,
148
  "texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
149
  "action_chunk": action_chunk,
@@ -207,6 +209,7 @@ def main() -> None:
207
  texts=proxy_batch["texts"],
208
  history_images=proxy_batch.get("history_images"),
209
  history_proprio=proxy_batch.get("history_proprio"),
 
210
  plan=True,
211
  candidate_chunks_override=proxy_batch["candidate_action_chunks"],
212
  )
@@ -245,6 +248,7 @@ def main() -> None:
245
  texts=rlbench_batch["texts"],
246
  history_images=rlbench_batch.get("history_images"),
247
  history_proprio=rlbench_batch.get("history_proprio"),
 
248
  plan=True,
249
  )
250
  _check_output_shapes(
 
139
  history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
140
  proprio = torch.rand(batch_size, 32, device=device)
141
  history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
142
+ history_actions = torch.rand(batch_size, history_steps, 14, device=device)
143
  action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
144
  return {
145
  "images": images,
146
  "history_images": history_images,
147
  "history_proprio": history_proprio,
148
+ "history_actions": history_actions,
149
  "proprio": proprio,
150
  "texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
151
  "action_chunk": action_chunk,
 
209
  texts=proxy_batch["texts"],
210
  history_images=proxy_batch.get("history_images"),
211
  history_proprio=proxy_batch.get("history_proprio"),
212
+ history_actions=proxy_batch.get("history_actions"),
213
  plan=True,
214
  candidate_chunks_override=proxy_batch["candidate_action_chunks"],
215
  )
 
248
  texts=rlbench_batch["texts"],
249
  history_images=rlbench_batch.get("history_images"),
250
  history_proprio=rlbench_batch.get("history_proprio"),
251
+ history_actions=rlbench_batch.get("history_actions"),
252
  plan=True,
253
  )
254
  _check_output_shapes(
code/reveal_vla_bimanual/train/trainer.py CHANGED
@@ -86,6 +86,7 @@ class BimanualTrainer:
86
  "language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
87
  "history_images": batch.get("history_images"),
88
  "history_proprio": batch.get("history_proprio"),
 
89
  }
90
  if policy_supports_planning(self.config.policy_type):
91
  forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
 
86
  "language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
87
  "history_images": batch.get("history_images"),
88
  "history_proprio": batch.get("history_proprio"),
89
+ "history_actions": batch.get("history_actions"),
90
  }
91
  if policy_supports_planning(self.config.policy_type):
92
  forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
environment/validate_same_machine.sh CHANGED
@@ -14,6 +14,27 @@ RUNTIME_DIR="${ROOT_DIR}/runtime"
14
  mkdir -p "${RUNTIME_DIR}"
15
  chmod 700 "${RUNTIME_DIR}"
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  run_in_rlbench_env() {
18
  local driver_version=""
19
  local driver_branch=""
@@ -34,6 +55,8 @@ run_in_rlbench_env() {
34
  "${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
35
  }
36
 
 
 
37
  echo "Display check"
38
  DISPLAY="${DISPLAY}" glxinfo -B
39
 
 
14
  mkdir -p "${RUNTIME_DIR}"
15
  chmod 700 "${RUNTIME_DIR}"
16
 
17
+ ensure_rlbench_display() {
18
+ if DISPLAY="${DISPLAY}" xdpyinfo >/dev/null 2>&1; then
19
+ return 0
20
+ fi
21
+
22
+ local driver_version=""
23
+ local driver_branch=""
24
+ if command -v nvidia-smi >/dev/null 2>&1; then
25
+ driver_version="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 || true)"
26
+ driver_branch="${driver_version%%.*}"
27
+ fi
28
+
29
+ if [[ -n "${driver_branch}" && ! -f "${ROOT_DIR}/system_shims/nvidia${driver_branch}/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so" ]]; then
30
+ echo "RLBench X shims missing; installing headless X prerequisites"
31
+ ROOT_DIR="${ROOT_DIR}" "${PROJECT_DIR}/scripts/setup_rlbench_headless_x.sh"
32
+ fi
33
+
34
+ echo "Starting RLBench X server on ${DISPLAY}"
35
+ ROOT_DIR="${ROOT_DIR}" DISPLAY_NUM="${DISPLAY_NUM}" "${PROJECT_DIR}/scripts/start_rlbench_x.sh"
36
+ }
37
+
38
  run_in_rlbench_env() {
39
  local driver_version=""
40
  local driver_branch=""
 
55
  "${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
56
  }
57
 
58
+ ensure_rlbench_display
59
+
60
  echo "Display check"
61
  DISPLAY="${DISPLAY}" glxinfo -B
62