lsnu commited on 8 days ago

Commit

d5d49c1

verified ·

1 Parent(s): 572d64a

Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

FILE_MANIFEST.txt +72 -0
MODEL_INDEX.md +75 -0
README.md +76 -0
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json +15 -0
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md +13 -0
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json +15 -0
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md +13 -0
artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md +13 -0
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json +15 -0
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md +13 -0
artifacts/outputs/interaction_debug/chunk_debug_trace.json +140 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json +7 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json +174 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml +127 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json +7 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json +174 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json +573 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml +125 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json +7 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json +346 -0
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json +16 -0
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json +28 -0
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md +25 -0
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json +41 -0
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md +37 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json +15 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md +13 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json +41 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md +37 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json +41 -0
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md +37 -0
artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json +157 -0
code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py +1 -0
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +45 -13
code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py +112 -18
code/reveal_vla_bimanual/models/backbones.py +1 -1
code/reveal_vla_bimanual/models/observation_memory.py +38 -3
code/reveal_vla_bimanual/models/policy.py +14 -2
code/reveal_vla_bimanual/sim_reveal/dataset.py +17 -2
code/reveal_vla_bimanual/sim_rlbench/dataset.py +27 -2
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml +125 -0
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml +125 -0
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml +129 -0
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml +127 -0
code/reveal_vla_bimanual/train/losses.py +58 -6
code/reveal_vla_bimanual/train/run_experiment.py +31 -0
code/reveal_vla_bimanual/train/run_rlbench_experiment.py +1 -0
code/reveal_vla_bimanual/train/smoke_checks.py +4 -0
code/reveal_vla_bimanual/train/trainer.py +1 -0
environment/validate_same_machine.sh +23 -0

FILE_MANIFEST.txt CHANGED Viewed

@@ -3,11 +3,17 @@
 ./MODEL_INDEX.md
 ./README.md
 ./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
 ./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
 ./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
 ./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
 ./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
 ./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
@@ -98,6 +104,68 @@
 ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
 ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
 ./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
@@ -256,6 +324,10 @@
 ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml

 ./MODEL_INDEX.md
 ./README.md
 ./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
+./artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
 ./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
+./artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
 ./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
+./artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
 ./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
+./artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
 ./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
+./artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
 ./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
+./artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
 ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
 ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
 ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
 ./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
+./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/chunk_debug_trace.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/config_resolved.yaml
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/config_resolved.yaml
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
+./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml
+./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
+./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml
+./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json
+./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json
+./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json
+./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.md
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.md
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.md
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json
+./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.md
+./artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
 ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
 ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
+./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml
+./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml
+./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml
+./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
 ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml

MODEL_INDEX.md CHANGED Viewed

@@ -40,6 +40,18 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
   - `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
 - smoke val dataset
   - `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
 ## Raw Benchmark Reports
@@ -95,6 +107,63 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
 - interaction-state rolefix full benchmark JSON
   - `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
 ## RLBench Two-Robot Smoke Outputs
 - import smoke JSON
@@ -115,6 +184,12 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
   - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
 - RLBench open_drawer rollout eval Markdown
   - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
 - RLBench smoke dataset root
   - `artifacts/data/rlbench_smoke_open_drawer/`

   - `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
 - smoke val dataset
   - `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
+- actionhist train dataset
+  - `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
+- actionhist val dataset
+  - `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
+- actionhist smoke train dataset
+  - `artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt`
+- actionhist smoke val dataset
+  - `artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt`
+- CLIP actionhist train dataset
+  - `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
+- CLIP actionhist val dataset
+  - `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
 ## Raw Benchmark Reports
 - interaction-state rolefix full benchmark JSON
   - `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
+## Interaction Debug Outputs
+- actionhist smoke checkpoint
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt`
+- actionhist smoke metrics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json`
+- actionhist smoke diagnostics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json`
+- actionhist full checkpoint
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt`
+- actionhist full metrics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json`
+- actionhist full diagnostics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json`
+- recency-oracleft full checkpoint
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt`
+- recency-oracleft full metrics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json`
+- recency-oracleft full summary
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json`
+- recency-oracleft diagnostics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json`
+- CLIP actionhist full checkpoint
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt`
+- CLIP actionhist full metrics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json`
+- CLIP actionhist full summary
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json`
+- CLIP actionhist diagnostics
+  - `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json`
+- corrected interaction benchmark JSON
+  - `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
+- corrected baseline compare benchmark JSON
+  - `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
+- corrected CLIP baseline benchmark JSON
+  - `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
+- corrected CLIP interaction compare benchmark JSON
+  - `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
+- corrected recency-oracleft compare benchmark JSON
+  - `artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json`
+- actionhist ablation full benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
+- actionhist ablation no-interaction-head benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
+- actionhist ablation no-world-model benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
+- actionhist ablation no-planner benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
+- actionhist ablation no-role-tokens benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
+- actionhist ablation short-history benchmark JSON
+  - `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
+- chunk debug trace
+  - `artifacts/outputs/interaction_debug/chunk_debug_trace.json`
+- actionhist smoke checks
+  - `artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json`
 ## RLBench Two-Robot Smoke Outputs
 - import smoke JSON
   - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
 - RLBench open_drawer rollout eval Markdown
   - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
+- RLBench open_drawer rollout eval rerun JSON
+  - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json`
+- RLBench open_drawer rollout eval clipped JSON
+  - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json`
+- RLBench open_drawer rollout eval recovered JSON
+  - `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json`
 - RLBench smoke dataset root
   - `artifacts/data/rlbench_smoke_open_drawer/`

README.md CHANGED Viewed

@@ -166,8 +166,84 @@ The smoke output file is:
 - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
 - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
 - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
 - `artifacts/data/rlbench_smoke_open_drawer/`
 ## Recreate The Same Software Layout
 Use:

 - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
 - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
 - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
+- `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
+- `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
+- `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
+- `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
 - `artifacts/data/rlbench_smoke_open_drawer/`
+## Raw Follow-Up Interaction Runs
+### Proxy Training Endpoints
+| Run | Checkpoint | Final train total | Final val total | Metrics or summary |
+| --- | --- | ---: | ---: | --- |
+| interaction-state actionhist smoke | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt` | 1.229741208255291 | 1.1121365427970886 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json` |
+| interaction-state actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt` | 0.7432626067979089 | 0.8655468797630735 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json` |
+| interaction-state recency oracleft full | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt` | 0.9377426480253538 | 1.211510909928216 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json` |
+| interaction-state CLIP actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt` | 1.2094011244349454 | 1.1205205075324527 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json` |
+### Proxy Benchmark Results With Committed-Chunk Evaluator
+Source files:
+- `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
+| Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| interaction | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
+| backbone | 0.5555555555555555 | 0.4166666666666667 | 0.5833333333333334 | 0.6666666666666666 | 29.27436817354626 | 0.7935162136952082 | 0.07854136604136604 | 0.0 | 0.4006388829503622 |
+| reveal | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 30.107333534293705 | 0.8134206715557311 | 0.05241552429052429 | 2.0996421982129196 | 0.42389288420478505 |
+### Frozen CLIP Proxy Benchmark Results With Committed-Chunk Evaluator
+Source files:
+- `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
+| Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| interaction_clip | 0.3055555555555556 | 0.2916666666666667 | 0.2916666666666667 | 0.3333333333333333 | 10.379729785852962 | 0.38910322284532917 | 0.026909722222222224 | 3.8014686041765726 | 0.392014082081409 |
+| backbone_clip | 0.3333333333333333 | 0.2916666666666667 | 0.4166666666666667 | 0.2916666666666667 | 5.090670637786388 | 0.30186899772120845 | 0.013541666666666667 | 0.0 | 0.36051381931045196 |
+| reveal_clip | 0.20833333333333334 | 0.20833333333333334 | 0.25 | 0.16666666666666666 | 48.426281129320465 | 0.8251730443702804 | 0.06718750000000001 | 0.9353624902194482 | 0.709741123020649 |
+### Proxy Diagnostics
+| Run | Planner top-1 accuracy | Planner regret | Risk calibration MSE | Role collapse rate | Samples | JSON |
+| --- | ---: | ---: | ---: | ---: | ---: | --- |
+| interaction-state actionhist full | 0.1984732824427481 | 0.07150506228208542 | 0.009851997718214989 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json` |
+| interaction-state recency oracleft full | 0.2824427480916031 | 0.24119873344898224 | 0.009003574028611183 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json` |
+| interaction-state CLIP actionhist full | 0.3253968253968254 | 0.1786193549633026 | 0.01645304262638092 | 0.0 | 126 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json` |
+### Proxy Ablation Results For Actionhist Checkpoint
+Source files:
+- `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
+- `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
+| Ablation | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
+| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
+| full_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
+| no_interaction_head | 0.38888888888888884 | 0.16666666666666666 | 0.5 | 0.5 | 42.193298303418686 | 0.9207814501391517 | 0.016840277777777777 | 0.0 | 0.5719093395810988 |
+| no_world_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
+| no_planner | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
+| no_role_tokens | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 33.69023843109608 | 0.8873094982571073 | 0.0 | 1.165569365169578 | 0.4185725698868434 |
+| short_history | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 31.347230527136063 | 0.875287824206882 | 0.0 | 3.0816725173931325 | 0.459634010369579 |
+### RLBench Open Drawer Rollout Reruns
+| Output | Raw values | File |
+| --- | --- | --- |
+| rollout rerun with path error | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.error="A path could not be found because the target is outside of workspace."`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json` |
+| rollout rerun after display and path recovery fixes | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.path_recoveries=0`, `tasks.open_drawer.noop_fallbacks=0`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json` |
 ## Recreate The Same Software Layout
 Use:

artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 32.94181125528283,
+    "corridor_availability": 0.8710797395971086,
+    "reocclusion_rate": 0.003125,
+    "persistence_horizon_mae": 1.1577362408331497,
+    "disturbance_cost": 0.42711537962572443
+  }
+}

artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 32.942
+- corridor_availability: 0.871
+- reocclusion_rate: 0.003
+- persistence_horizon_mae: 1.158
+- disturbance_cost: 0.427
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 33.69023843109608,
+    "corridor_availability": 0.8873094982571073,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.165569365169578,
+    "disturbance_cost": 0.4185725698868434
+  }
+}

artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 33.690
+- corridor_availability: 0.887
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.166
+- disturbance_cost: 0.419
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 32.942
+- corridor_availability: 0.871
+- reocclusion_rate: 0.003
+- persistence_horizon_mae: 1.158
+- disturbance_cost: 0.427
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 31.347230527136063,
+    "corridor_availability": 0.875287824206882,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 3.0816725173931325,
+    "disturbance_cost": 0.459634010369579
+  }
+}

artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 31.347
+- corridor_availability: 0.875
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 3.082
+- disturbance_cost: 0.460
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/chunk_debug_trace.json ADDED Viewed

	@@ -0,0 +1,140 @@

+[
+  {
+    "label": "rolefix_smoke_old",
+    "proxy": "foliage_proxy",
+    "best_candidate_index": 2,
+    "retrieve_sequence": [
+      0.22872358560562134,
+      0.7541071176528931,
+      0.6303636431694031,
+      0.4685209095478058
+    ],
+    "open_sequence": [
+      1.2554516792297363,
+      0.8975364565849304,
+      0.5596103668212891,
+      0.4779726266860962
+    ],
+    "template_sequence": [
+      0.47042158246040344,
+      0.6467143297195435,
+      0.5085114240646362,
+      0.478359580039978
+    ]
+  },
+  {
+    "label": "rolefix_smoke_old",
+    "proxy": "bag_proxy",
+    "best_candidate_index": 2,
+    "retrieve_sequence": [
+      0.2374069094657898,
+      0.7521002292633057,
+      0.6305321455001831,
+      0.4743019640445709
+    ],
+    "open_sequence": [
+      1.257965326309204,
+      0.896579384803772,
+      0.5625595450401306,
+      0.4776189923286438
+    ],
+    "template_sequence": [
+      0.47550493478775024,
+      0.6366342306137085,
+      0.5038254261016846,
+      0.4769764542579651
+    ]
+  },
+  {
+    "label": "rolefix_smoke_old",
+    "proxy": "cloth_proxy",
+    "best_candidate_index": 2,
+    "retrieve_sequence": [
+      0.24050980806350708,
+      0.7626074552536011,
+      0.6310772895812988,
+      0.47661182284355164
+    ],
+    "open_sequence": [
+      1.2510802745819092,
+      0.8940063714981079,
+      0.5478025078773499,
+      0.470864862203598
+    ],
+    "template_sequence": [
+      0.46881186962127686,
+      0.6378085613250732,
+      0.504069447517395,
+      0.4773429036140442
+    ]
+  },
+  {
+    "label": "actionhist_smoke_new",
+    "proxy": "foliage_proxy",
+    "best_candidate_index": 0,
+    "retrieve_sequence": [
+      0.23512092232704163,
+      0.5730606317520142,
+      0.5967459678649902,
+      0.4731495678424835
+    ],
+    "open_sequence": [
+      0.6600309014320374,
+      0.43168342113494873,
+      0.15955285727977753,
+      -0.09488785266876221
+    ],
+    "template_sequence": [
+      -0.017185214906930923,
+      0.017828624695539474,
+      0.013375137001276016,
+      -0.01390126720070839
+    ]
+  },
+  {
+    "label": "actionhist_smoke_new",
+    "proxy": "bag_proxy",
+    "best_candidate_index": 0,
+    "retrieve_sequence": [
+      0.2351658046245575,
+      0.572963535785675,
+      0.5971102714538574,
+      0.4758695065975189
+    ],
+    "open_sequence": [
+      0.6608113646507263,
+      0.4318099617958069,
+      0.16285540163516998,
+      -0.09124644100666046
+    ],
+    "template_sequence": [
+      -0.018705788999795914,
+      0.016191553324460983,
+      0.012765157967805862,
+      -0.016781020909547806
+    ]
+  },
+  {
+    "label": "actionhist_smoke_new",
+    "proxy": "cloth_proxy",
+    "best_candidate_index": 0,
+    "retrieve_sequence": [
+      0.23625126481056213,
+      0.5730390548706055,
+      0.59672611951828,
+      0.4727664887905121
+    ],
+    "open_sequence": [
+      0.6570022106170654,
+      0.4338717460632324,
+      0.15934017300605774,
+      -0.09580504149198532
+    ],
+    "template_sequence": [
+      -0.028799299150705338,
+      0.006899785250425339,
+      0.004223380237817764,
+      -0.026467766612768173
+    ]
+  }
+]

artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "planner_top1_accuracy": 0.35294117647058826,
+  "planner_regret": 0.017080334946513176,
+  "risk_calibration_mse": 0.00906219333410263,
+  "role_collapse_rate": 0.0,
+  "num_samples": 17
+}

artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json ADDED Viewed

	@@ -0,0 +1,174 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.23455160359541574,
+      "arm_role": 1.2069129049777985,
+      "belief": 0.48631568253040314,
+      "corridor": 0.5782903432846069,
+      "disturbance": 0.17786112676064172,
+      "persistence": 1.815186083316803,
+      "phase": 1.3141004741191864,
+      "planner_ranking": 0.15019067749381065,
+      "planner_risk": 0.05527863139286637,
+      "planner_success": 0.6984443863232931,
+      "proposal_ranking": 0.10006876041491826,
+      "proposal_reconstruction": 0.3053521513938904,
+      "proposal_success": 0.6853575110435486,
+      "reocclusion": 0.6961739559968313,
+      "support_mode": 0.8659396668275198,
+      "total": 2.116169492403666,
+      "uncertainty": 0.6137877206007639,
+      "world_model": 2.6161614656448364
+    },
+    "val": {
+      "action": 0.07151262213786443,
+      "arm_role": 0.6764164765675863,
+      "belief": 0.36398513118426007,
+      "corridor": 0.4683004717032115,
+      "disturbance": 0.102281058828036,
+      "persistence": 2.114008625348409,
+      "phase": 0.9027760624885559,
+      "planner_ranking": 0.09026545286178589,
+      "planner_risk": 0.02189356298185885,
+      "planner_success": 0.6435574690500895,
+      "proposal_ranking": 0.16597949465115866,
+      "proposal_reconstruction": 0.11828663945198059,
+      "proposal_success": 0.6095772981643677,
+      "reocclusion": 0.7000808914502462,
+      "support_mode": 0.6359505653381348,
+      "total": 1.4241125186284382,
+      "uncertainty": 0.5725147326787313,
+      "world_model": 1.5686078071594238
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.07887393422424793,
+      "arm_role": 0.4496926615635554,
+      "belief": 0.28958051403363544,
+      "corridor": 0.3720829039812088,
+      "disturbance": 0.07337014439205329,
+      "persistence": 1.7143786152203877,
+      "phase": 0.777398000160853,
+      "planner_ranking": 0.14400668690601984,
+      "planner_risk": 0.016193983455499012,
+      "planner_success": 0.6361206471920013,
+      "proposal_ranking": 0.11434461300571759,
+      "proposal_reconstruction": 0.11045620342095692,
+      "proposal_success": 0.6260021726290385,
+      "reocclusion": 0.6881168782711029,
+      "support_mode": 0.784478078285853,
+      "total": 1.2963247100512187,
+      "uncertainty": 0.5047676662604014,
+      "world_model": 1.4695208072662354
+    },
+    "val": {
+      "action": 0.05061729749043783,
+      "arm_role": 0.2217621256907781,
+      "belief": 0.19144149124622345,
+      "corridor": 0.33698633313179016,
+      "disturbance": 0.019655164952079456,
+      "persistence": 2.276299834251404,
+      "phase": 0.7830212910970052,
+      "planner_ranking": 0.10330406576395035,
+      "planner_risk": 0.012047629677302515,
+      "planner_success": 0.46883141497770947,
+      "proposal_ranking": 0.16881480813026428,
+      "proposal_reconstruction": 0.08914910753568013,
+      "proposal_success": 0.5338547825813293,
+      "reocclusion": 0.7235203385353088,
+      "support_mode": 0.6643315752347311,
+      "total": 1.1495283842086792,
+      "uncertainty": 0.36858222881952923,
+      "world_model": 1.2773457169532776
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.0648206224044164,
+      "arm_role": 0.1347198486328125,
+      "belief": 0.14715169121821722,
+      "corridor": 0.2695915202299754,
+      "disturbance": 0.010349508646565178,
+      "persistence": 1.7063330213228862,
+      "phase": 0.726386179526647,
+      "planner_ranking": 0.11673471455772717,
+      "planner_risk": 0.009400874686737856,
+      "planner_success": 0.6698183119297028,
+      "proposal_ranking": 0.10080837706724803,
+      "proposal_reconstruction": 0.10316941390434901,
+      "proposal_success": 0.6286104818185171,
+      "reocclusion": 0.6681396464506785,
+      "support_mode": 0.6904432475566864,
+      "total": 1.1366514563560486,
+      "uncertainty": 0.27301351229349774,
+      "world_model": 1.372689664363861
+    },
+    "val": {
+      "action": 0.05020085473855337,
+      "arm_role": 0.054195716977119446,
+      "belief": 0.12719580034414926,
+      "corridor": 0.33358681698640186,
+      "disturbance": 0.0010723281108463805,
+      "persistence": 2.3125662008921304,
+      "phase": 0.7737143238385519,
+      "planner_ranking": 0.12118598818778992,
+      "planner_risk": 0.008284708329786858,
+      "planner_success": 0.6051804622014364,
+      "proposal_ranking": 0.1250954990585645,
+      "proposal_reconstruction": 0.08273230989774068,
+      "proposal_success": 0.5201686124006907,
+      "reocclusion": 0.6809982657432556,
+      "support_mode": 0.5777197976907095,
+      "total": 1.1349389950434368,
+      "uncertainty": 0.17320589224497476,
+      "world_model": 1.3453394174575806
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.055803545440236725,
+      "arm_role": 0.033050537109375,
+      "belief": 0.11564020191629727,
+      "corridor": 0.256190650165081,
+      "disturbance": 0.002490642402941982,
+      "persistence": 1.711540162563324,
+      "phase": 0.681098093589147,
+      "planner_ranking": 0.10920613507429759,
+      "planner_risk": 0.010532331497718891,
+      "planner_success": 0.6514300604661306,
+      "proposal_ranking": 0.08523762846986453,
+      "proposal_reconstruction": 0.08513934289415677,
+      "proposal_success": 0.6457574268182119,
+      "reocclusion": 0.6691893935203552,
+      "support_mode": 0.6864420572916666,
+      "total": 1.0746445059776306,
+      "uncertainty": 0.1379331536591053,
+      "world_model": 1.3261052171389263
+    },
+    "val": {
+      "action": 0.04372807095448176,
+      "arm_role": 0.014572909101843834,
+      "belief": 0.12325718998908997,
+      "corridor": 0.344586377342542,
+      "disturbance": 0.002586025783481697,
+      "persistence": 2.2659462292989097,
+      "phase": 0.712437629699707,
+      "planner_ranking": 0.1231433277328809,
+      "planner_risk": 0.00803024492536982,
+      "planner_success": 0.5179306268692017,
+      "proposal_ranking": 0.11125253637631734,
+      "proposal_reconstruction": 0.07622659454743068,
+      "proposal_success": 0.5146457950274149,
+      "reocclusion": 0.6703451077143351,
+      "support_mode": 0.6071783800919851,
+      "total": 1.0756589968999226,
+      "uncertainty": 0.10349630812803905,
+      "world_model": 1.2806402842203777
+    }
+  }
+]

artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,127 @@

+experiment_name: proxy_interaction_state_clip_actionhist
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 7
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 224
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 7
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2

artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "planner_top1_accuracy": 0.3253968253968254,
+  "planner_regret": 0.1786193549633026,
+  "risk_calibration_mse": 0.01645304262638092,
+  "role_collapse_rate": 0.0,
+  "num_samples": 126
+}

artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json ADDED Viewed

	@@ -0,0 +1,174 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.17748002509016017,
+      "arm_role": 0.01832497369556527,
+      "belief": 0.12835299933612035,
+      "corridor": 0.2547702425456952,
+      "disturbance": 0.00850862705773346,
+      "persistence": 4.974573742500774,
+      "phase": 0.7463235106143652,
+      "planner_ranking": 1.3405994254881175,
+      "planner_risk": 0.024703218532160547,
+      "planner_success": 0.7777972318115035,
+      "proposal_ranking": 1.165930202494117,
+      "proposal_reconstruction": 0.2531185241035766,
+      "proposal_success": 0.6786430877540748,
+      "reocclusion": 0.7147265204584411,
+      "support_mode": 0.7602155595549738,
+      "total": 2.0788989903415063,
+      "uncertainty": 0.03309597126671469,
+      "world_model": 3.071348061112209
+    },
+    "val": {
+      "action": 0.03192901705938672,
+      "arm_role": 6.15250448592835e-06,
+      "belief": 0.10559089872099105,
+      "corridor": 0.23193429670636617,
+      "disturbance": 0.0022747389350750756,
+      "persistence": 3.85837465619284,
+      "phase": 0.6875752177503374,
+      "planner_ranking": 1.1088495595114571,
+      "planner_risk": 0.018587306145549057,
+      "planner_success": 0.6127710470131466,
+      "proposal_ranking": 1.1232511202494304,
+      "proposal_reconstruction": 0.08394021162438015,
+      "proposal_success": 0.681461288815453,
+      "reocclusion": 0.6769484205851479,
+      "support_mode": 0.6654504603809781,
+      "total": 1.5210873153474596,
+      "uncertainty": 0.011785898017623121,
+      "world_model": 1.9750548638994732
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.030109174476439102,
+      "arm_role": 8.612091004536414e-06,
+      "belief": 0.104316227781679,
+      "corridor": 0.23850143234689197,
+      "disturbance": 0.0025595128212472823,
+      "persistence": 3.9934506887540766,
+      "phase": 0.6901740428664922,
+      "planner_ranking": 1.239893207687358,
+      "planner_risk": 0.026462018369155793,
+      "planner_success": 0.664632208528319,
+      "proposal_ranking": 1.1259761543174065,
+      "proposal_reconstruction": 0.08132225903072907,
+      "proposal_success": 0.6764243753792728,
+      "reocclusion": 0.6790863540784227,
+      "support_mode": 0.6789359047774869,
+      "total": 1.550120969093283,
+      "uncertainty": 0.007208701525449128,
+      "world_model": 1.8854006223029491
+    },
+    "val": {
+      "action": 0.02197206175575654,
+      "arm_role": 2.089118947517977e-05,
+      "belief": 0.09741538857656812,
+      "corridor": 0.22761633885758265,
+      "disturbance": 0.0017140347070323067,
+      "persistence": 3.6565530148763505,
+      "phase": 0.6668311646060338,
+      "planner_ranking": 1.1634496355813646,
+      "planner_risk": 0.047890776559518324,
+      "planner_success": 0.5928089713293409,
+      "proposal_ranking": 1.1224727725225783,
+      "proposal_reconstruction": 0.06971718163953887,
+      "proposal_success": 0.6724110945822701,
+      "reocclusion": 0.6611922624565306,
+      "support_mode": 0.6766654224622817,
+      "total": 1.4845811980111259,
+      "uncertainty": 0.004251384046963519,
+      "world_model": 1.875271028942532
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.02331933839470928,
+      "arm_role": 8.285201656880802e-06,
+      "belief": 0.1041115006695243,
+      "corridor": 0.2380418391820258,
+      "disturbance": 0.002577872130260731,
+      "persistence": 3.555448654902543,
+      "phase": 0.6753773314790575,
+      "planner_ranking": 1.1668821538930163,
+      "planner_risk": 0.020309378087023242,
+      "planner_success": 0.623614322296612,
+      "proposal_ranking": 1.1245252312165905,
+      "proposal_reconstruction": 0.07289492924019929,
+      "proposal_success": 0.6749192248464255,
+      "reocclusion": 0.6692662537097931,
+      "support_mode": 0.6756738792539267,
+      "total": 1.4613653153025044,
+      "uncertainty": 0.012397505843296725,
+      "world_model": 1.7293687263084332
+    },
+    "val": {
+      "action": 0.03152821023785879,
+      "arm_role": 1.913968098564048e-06,
+      "belief": 0.10549203495657633,
+      "corridor": 0.20762673824552505,
+      "disturbance": 0.0014280516678275214,
+      "persistence": 2.0710838323547724,
+      "phase": 0.6628126601378123,
+      "planner_ranking": 1.0928319522312708,
+      "planner_risk": 0.021120590453464833,
+      "planner_success": 0.5570865495810433,
+      "proposal_ranking": 1.1183055109447904,
+      "proposal_reconstruction": 0.08380144739907885,
+      "proposal_success": 0.6772379392669314,
+      "reocclusion": 0.6509462926122878,
+      "support_mode": 0.6650945194183834,
+      "total": 1.3528291233002194,
+      "uncertainty": 0.0025819382726839403,
+      "world_model": 1.7092195824971275
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.021615957470698506,
+      "arm_role": 9.251202588306048e-07,
+      "belief": 0.10970319874818725,
+      "corridor": 0.2036819358732704,
+      "disturbance": 0.002751460597729129,
+      "persistence": 1.0053820329420464,
+      "phase": 0.4392661486620678,
+      "planner_ranking": 1.1170655027109915,
+      "planner_risk": 0.023540541935585323,
+      "planner_success": 0.574678816408387,
+      "proposal_ranking": 1.1232363391297027,
+      "proposal_reconstruction": 0.07163417897143289,
+      "proposal_success": 0.6759519848523964,
+      "reocclusion": 0.3594565280497986,
+      "support_mode": 0.1658484423971925,
+      "total": 1.2094011244349454,
+      "uncertainty": 0.001485606099231278,
+      "world_model": 1.6549255024076133
+    },
+    "val": {
+      "action": 0.01307902658092124,
+      "arm_role": 3.7938821602466983e-07,
+      "belief": 0.10557046154188732,
+      "corridor": 0.18899264949418249,
+      "disturbance": 0.003063943787498237,
+      "persistence": 0.6038030874915421,
+      "phase": 0.19549169234694944,
+      "planner_ranking": 1.1149483919143677,
+      "planner_risk": 0.01645888195424858,
+      "planner_success": 0.5231598180437845,
+      "proposal_ranking": 1.1176083068999032,
+      "proposal_reconstruction": 0.05967588533484747,
+      "proposal_success": 0.6721902480201115,
+      "reocclusion": 0.1391045902338293,
+      "support_mode": 0.0005700885616649415,
+      "total": 1.1205205075324527,
+      "uncertainty": 0.0005439088721946714,
+      "world_model": 1.6766679949230618
+    }
+  }
+]

artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json ADDED Viewed

	@@ -0,0 +1,573 @@

+{
+  "experiment_name": "proxy_interaction_state_clip_actionhist",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt",
+  "final_train_total": 1.2094011244349454,
+  "final_val_total": 1.1205205075324527,
+  "num_train_samples": 382,
+  "num_val_samples": 126,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
+    "loaded_keys": 467,
+    "skipped_shape_mismatch_keys": [
+      "memory.gru.weight_ih_l0",
+      "memory.gru.weight_hh_l0",
+      "memory.gru.bias_ih_l0",
+      "memory.gru.bias_hh_l0",
+      "decoder.actor_role_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.0.linear1.weight",
+      "decoder.revealer_decoder.layers.0.linear1.bias",
+      "decoder.revealer_decoder.layers.0.linear2.weight",
+      "decoder.revealer_decoder.layers.0.linear2.bias",
+      "decoder.revealer_decoder.layers.0.norm1.weight",
+      "decoder.revealer_decoder.layers.0.norm1.bias",
+      "decoder.revealer_decoder.layers.0.norm2.weight",
+      "decoder.revealer_decoder.layers.0.norm2.bias",
+      "decoder.revealer_decoder.layers.0.norm3.weight",
+      "decoder.revealer_decoder.layers.0.norm3.bias",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.1.linear1.weight",
+      "decoder.revealer_decoder.layers.1.linear1.bias",
+      "decoder.revealer_decoder.layers.1.linear2.weight",
+      "decoder.revealer_decoder.layers.1.linear2.bias",
+      "decoder.revealer_decoder.layers.1.norm1.weight",
+      "decoder.revealer_decoder.layers.1.norm1.bias",
+      "decoder.revealer_decoder.layers.1.norm2.weight",
+      "decoder.revealer_decoder.layers.1.norm2.bias",
+      "decoder.revealer_decoder.layers.1.norm3.weight",
+      "decoder.revealer_decoder.layers.1.norm3.bias",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.2.linear1.weight",
+      "decoder.revealer_decoder.layers.2.linear1.bias",
+      "decoder.revealer_decoder.layers.2.linear2.weight",
+      "decoder.revealer_decoder.layers.2.linear2.bias",
+      "decoder.revealer_decoder.layers.2.norm1.weight",
+      "decoder.revealer_decoder.layers.2.norm1.bias",
+      "decoder.revealer_decoder.layers.2.norm2.weight",
+      "decoder.revealer_decoder.layers.2.norm2.bias",
+      "decoder.revealer_decoder.layers.2.norm3.weight",
+      "decoder.revealer_decoder.layers.2.norm3.bias",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.revealer_decoder.layers.3.linear1.weight",
+      "decoder.revealer_decoder.layers.3.linear1.bias",
+      "decoder.revealer_decoder.layers.3.linear2.weight",
+      "decoder.revealer_decoder.layers.3.linear2.bias",
+      "decoder.revealer_decoder.layers.3.norm1.weight",
+      "decoder.revealer_decoder.layers.3.norm1.bias",
+      "decoder.revealer_decoder.layers.3.norm2.weight",
+      "decoder.revealer_decoder.layers.3.norm2.bias",
+      "decoder.revealer_decoder.layers.3.norm3.weight",
+      "decoder.revealer_decoder.layers.3.norm3.bias",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.0.linear1.weight",
+      "decoder.actor_decoder.layers.0.linear1.bias",
+      "decoder.actor_decoder.layers.0.linear2.weight",
+      "decoder.actor_decoder.layers.0.linear2.bias",
+      "decoder.actor_decoder.layers.0.norm1.weight",
+      "decoder.actor_decoder.layers.0.norm1.bias",
+      "decoder.actor_decoder.layers.0.norm2.weight",
+      "decoder.actor_decoder.layers.0.norm2.bias",
+      "decoder.actor_decoder.layers.0.norm3.weight",
+      "decoder.actor_decoder.layers.0.norm3.bias",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.1.linear1.weight",
+      "decoder.actor_decoder.layers.1.linear1.bias",
+      "decoder.actor_decoder.layers.1.linear2.weight",
+      "decoder.actor_decoder.layers.1.linear2.bias",
+      "decoder.actor_decoder.layers.1.norm1.weight",
+      "decoder.actor_decoder.layers.1.norm1.bias",
+      "decoder.actor_decoder.layers.1.norm2.weight",
+      "decoder.actor_decoder.layers.1.norm2.bias",
+      "decoder.actor_decoder.layers.1.norm3.weight",
+      "decoder.actor_decoder.layers.1.norm3.bias",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.2.linear1.weight",
+      "decoder.actor_decoder.layers.2.linear1.bias",
+      "decoder.actor_decoder.layers.2.linear2.weight",
+      "decoder.actor_decoder.layers.2.linear2.bias",
+      "decoder.actor_decoder.layers.2.norm1.weight",
+      "decoder.actor_decoder.layers.2.norm1.bias",
+      "decoder.actor_decoder.layers.2.norm2.weight",
+      "decoder.actor_decoder.layers.2.norm2.bias",
+      "decoder.actor_decoder.layers.2.norm3.weight",
+      "decoder.actor_decoder.layers.2.norm3.bias",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.actor_decoder.layers.3.linear1.weight",
+      "decoder.actor_decoder.layers.3.linear1.bias",
+      "decoder.actor_decoder.layers.3.linear2.weight",
+      "decoder.actor_decoder.layers.3.linear2.bias",
+      "decoder.actor_decoder.layers.3.norm1.weight",
+      "decoder.actor_decoder.layers.3.norm1.bias",
+      "decoder.actor_decoder.layers.3.norm2.weight",
+      "decoder.actor_decoder.layers.3.norm2.bias",
+      "decoder.actor_decoder.layers.3.norm3.weight",
+      "decoder.actor_decoder.layers.3.norm3.bias",
+      "decoder.revealer_mean.weight",
+      "decoder.revealer_mean.bias",
+      "decoder.revealer_log_std.weight",
+      "decoder.revealer_log_std.bias",
+      "decoder.actor_mean.weight",
+      "decoder.actor_mean.bias",
+      "decoder.actor_log_std.weight",
+      "decoder.actor_log_std.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias"
+    ],
+    "missing_keys": [
+      "memory.position_embedding",
+      "memory.bank_queries",
+      "memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "memory.sequence_encoder.layers.0.linear1.weight",
+      "memory.sequence_encoder.layers.0.linear1.bias",
+      "memory.sequence_encoder.layers.0.linear2.weight",
+      "memory.sequence_encoder.layers.0.linear2.bias",
+      "memory.sequence_encoder.layers.0.norm1.weight",
+      "memory.sequence_encoder.layers.0.norm1.bias",
+      "memory.sequence_encoder.layers.0.norm2.weight",
+      "memory.sequence_encoder.layers.0.norm2.bias",
+      "memory.sequence_encoder.layers.1.self_attn.in_proj_weight",
+      "memory.sequence_encoder.layers.1.self_attn.in_proj_bias",
+      "memory.sequence_encoder.layers.1.self_attn.out_proj.weight",
+      "memory.sequence_encoder.layers.1.self_attn.out_proj.bias",
+      "memory.sequence_encoder.layers.1.linear1.weight",
+      "memory.sequence_encoder.layers.1.linear1.bias",
+      "memory.sequence_encoder.layers.1.linear2.weight",
+      "memory.sequence_encoder.layers.1.linear2.bias",
+      "memory.sequence_encoder.layers.1.norm1.weight",
+      "memory.sequence_encoder.layers.1.norm1.bias",
+      "memory.sequence_encoder.layers.1.norm2.weight",
+      "memory.sequence_encoder.layers.1.norm2.bias",
+      "memory.bank_attention.in_proj_weight",
+      "memory.bank_attention.in_proj_bias",
+      "memory.bank_attention.out_proj.weight",
+      "memory.bank_attention.out_proj.bias",
+      "memory.bank_mlp.0.weight",
+      "memory.bank_mlp.0.bias",
+      "memory.bank_mlp.1.weight",
+      "memory.bank_mlp.1.bias",
+      "memory.bank_mlp.3.weight",
+      "memory.bank_mlp.3.bias",
+      "memory.action_proj.0.weight",
+      "memory.action_proj.0.bias",
+      "memory.action_proj.1.weight",
+      "memory.action_proj.1.bias",
+      "decoder.right_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.right_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.right_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.right_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.right_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.right_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.right_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.right_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.right_decoder.layers.0.linear1.weight",
+      "decoder.right_decoder.layers.0.linear1.bias",
+      "decoder.right_decoder.layers.0.linear2.weight",
+      "decoder.right_decoder.layers.0.linear2.bias",
+      "decoder.right_decoder.layers.0.norm1.weight",
+      "decoder.right_decoder.layers.0.norm1.bias",
+      "decoder.right_decoder.layers.0.norm2.weight",
+      "decoder.right_decoder.layers.0.norm2.bias",
+      "decoder.right_decoder.layers.0.norm3.weight",
+      "decoder.right_decoder.layers.0.norm3.bias",
+      "decoder.right_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.right_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.right_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.right_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.right_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.right_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.right_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.right_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.right_decoder.layers.1.linear1.weight",
+      "decoder.right_decoder.layers.1.linear1.bias",
+      "decoder.right_decoder.layers.1.linear2.weight",
+      "decoder.right_decoder.layers.1.linear2.bias",
+      "decoder.right_decoder.layers.1.norm1.weight",
+      "decoder.right_decoder.layers.1.norm1.bias",
+      "decoder.right_decoder.layers.1.norm2.weight",
+      "decoder.right_decoder.layers.1.norm2.bias",
+      "decoder.right_decoder.layers.1.norm3.weight",
+      "decoder.right_decoder.layers.1.norm3.bias",
+      "decoder.right_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.right_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.right_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.right_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.right_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.right_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.right_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.right_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.right_decoder.layers.2.linear1.weight",
+      "decoder.right_decoder.layers.2.linear1.bias",
+      "decoder.right_decoder.layers.2.linear2.weight",
+      "decoder.right_decoder.layers.2.linear2.bias",
+      "decoder.right_decoder.layers.2.norm1.weight",
+      "decoder.right_decoder.layers.2.norm1.bias",
+      "decoder.right_decoder.layers.2.norm2.weight",
+      "decoder.right_decoder.layers.2.norm2.bias",
+      "decoder.right_decoder.layers.2.norm3.weight",
+      "decoder.right_decoder.layers.2.norm3.bias",
+      "decoder.right_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.right_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.right_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.right_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.right_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.right_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.right_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.right_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.right_decoder.layers.3.linear1.weight",
+      "decoder.right_decoder.layers.3.linear1.bias",
+      "decoder.right_decoder.layers.3.linear2.weight",
+      "decoder.right_decoder.layers.3.linear2.bias",
+      "decoder.right_decoder.layers.3.norm1.weight",
+      "decoder.right_decoder.layers.3.norm1.bias",
+      "decoder.right_decoder.layers.3.norm2.weight",
+      "decoder.right_decoder.layers.3.norm2.bias",
+      "decoder.right_decoder.layers.3.norm3.weight",
+      "decoder.right_decoder.layers.3.norm3.bias",
+      "decoder.left_decoder.layers.0.self_attn.in_proj_weight",
+      "decoder.left_decoder.layers.0.self_attn.in_proj_bias",
+      "decoder.left_decoder.layers.0.self_attn.out_proj.weight",
+      "decoder.left_decoder.layers.0.self_attn.out_proj.bias",
+      "decoder.left_decoder.layers.0.multihead_attn.in_proj_weight",
+      "decoder.left_decoder.layers.0.multihead_attn.in_proj_bias",
+      "decoder.left_decoder.layers.0.multihead_attn.out_proj.weight",
+      "decoder.left_decoder.layers.0.multihead_attn.out_proj.bias",
+      "decoder.left_decoder.layers.0.linear1.weight",
+      "decoder.left_decoder.layers.0.linear1.bias",
+      "decoder.left_decoder.layers.0.linear2.weight",
+      "decoder.left_decoder.layers.0.linear2.bias",
+      "decoder.left_decoder.layers.0.norm1.weight",
+      "decoder.left_decoder.layers.0.norm1.bias",
+      "decoder.left_decoder.layers.0.norm2.weight",
+      "decoder.left_decoder.layers.0.norm2.bias",
+      "decoder.left_decoder.layers.0.norm3.weight",
+      "decoder.left_decoder.layers.0.norm3.bias",
+      "decoder.left_decoder.layers.1.self_attn.in_proj_weight",
+      "decoder.left_decoder.layers.1.self_attn.in_proj_bias",
+      "decoder.left_decoder.layers.1.self_attn.out_proj.weight",
+      "decoder.left_decoder.layers.1.self_attn.out_proj.bias",
+      "decoder.left_decoder.layers.1.multihead_attn.in_proj_weight",
+      "decoder.left_decoder.layers.1.multihead_attn.in_proj_bias",
+      "decoder.left_decoder.layers.1.multihead_attn.out_proj.weight",
+      "decoder.left_decoder.layers.1.multihead_attn.out_proj.bias",
+      "decoder.left_decoder.layers.1.linear1.weight",
+      "decoder.left_decoder.layers.1.linear1.bias",
+      "decoder.left_decoder.layers.1.linear2.weight",
+      "decoder.left_decoder.layers.1.linear2.bias",
+      "decoder.left_decoder.layers.1.norm1.weight",
+      "decoder.left_decoder.layers.1.norm1.bias",
+      "decoder.left_decoder.layers.1.norm2.weight",
+      "decoder.left_decoder.layers.1.norm2.bias",
+      "decoder.left_decoder.layers.1.norm3.weight",
+      "decoder.left_decoder.layers.1.norm3.bias",
+      "decoder.left_decoder.layers.2.self_attn.in_proj_weight",
+      "decoder.left_decoder.layers.2.self_attn.in_proj_bias",
+      "decoder.left_decoder.layers.2.self_attn.out_proj.weight",
+      "decoder.left_decoder.layers.2.self_attn.out_proj.bias",
+      "decoder.left_decoder.layers.2.multihead_attn.in_proj_weight",
+      "decoder.left_decoder.layers.2.multihead_attn.in_proj_bias",
+      "decoder.left_decoder.layers.2.multihead_attn.out_proj.weight",
+      "decoder.left_decoder.layers.2.multihead_attn.out_proj.bias",
+      "decoder.left_decoder.layers.2.linear1.weight",
+      "decoder.left_decoder.layers.2.linear1.bias",
+      "decoder.left_decoder.layers.2.linear2.weight",
+      "decoder.left_decoder.layers.2.linear2.bias",
+      "decoder.left_decoder.layers.2.norm1.weight",
+      "decoder.left_decoder.layers.2.norm1.bias",
+      "decoder.left_decoder.layers.2.norm2.weight",
+      "decoder.left_decoder.layers.2.norm2.bias",
+      "decoder.left_decoder.layers.2.norm3.weight",
+      "decoder.left_decoder.layers.2.norm3.bias",
+      "decoder.left_decoder.layers.3.self_attn.in_proj_weight",
+      "decoder.left_decoder.layers.3.self_attn.in_proj_bias",
+      "decoder.left_decoder.layers.3.self_attn.out_proj.weight",
+      "decoder.left_decoder.layers.3.self_attn.out_proj.bias",
+      "decoder.left_decoder.layers.3.multihead_attn.in_proj_weight",
+      "decoder.left_decoder.layers.3.multihead_attn.in_proj_bias",
+      "decoder.left_decoder.layers.3.multihead_attn.out_proj.weight",
+      "decoder.left_decoder.layers.3.multihead_attn.out_proj.bias",
+      "decoder.left_decoder.layers.3.linear1.weight",
+      "decoder.left_decoder.layers.3.linear1.bias",
+      "decoder.left_decoder.layers.3.linear2.weight",
+      "decoder.left_decoder.layers.3.linear2.bias",
+      "decoder.left_decoder.layers.3.norm1.weight",
+      "decoder.left_decoder.layers.3.norm1.bias",
+      "decoder.left_decoder.layers.3.norm2.weight",
+      "decoder.left_decoder.layers.3.norm2.bias",
+      "decoder.left_decoder.layers.3.norm3.weight",
+      "decoder.left_decoder.layers.3.norm3.bias",
+      "decoder.proposal_queries.weight",
+      "decoder.arm_identity.weight",
+      "decoder.phase_adapter.weight",
+      "decoder.phase_adapter.bias",
+      "decoder.role_adapter.weight",
+      "decoder.role_adapter.bias",
+      "decoder.context_proj.0.weight",
+      "decoder.context_proj.0.bias",
+      "decoder.context_proj.1.weight",
+      "decoder.context_proj.1.bias",
+      "decoder.right_mean.weight",
+      "decoder.right_mean.bias",
+      "decoder.right_log_std.weight",
+      "decoder.right_log_std.bias",
+      "decoder.left_mean.weight",
+      "decoder.left_mean.bias",
+      "decoder.left_log_std.weight",
+      "decoder.left_log_std.bias",
+      "decoder.proposal_score.1.weight",
+      "decoder.proposal_score.1.bias",
+      "decoder.proposal_score.3.weight",
+      "decoder.proposal_score.3.bias",
+      "interaction_head.interaction_queries",
+      "interaction_head.interaction_attention.in_proj_weight",
+      "interaction_head.interaction_attention.in_proj_bias",
+      "interaction_head.interaction_attention.out_proj.weight",
+      "interaction_head.interaction_attention.out_proj.bias",
+      "interaction_head.interaction_mlp.0.weight",
+      "interaction_head.interaction_mlp.0.bias",
+      "interaction_head.interaction_mlp.1.weight",
+      "interaction_head.interaction_mlp.1.bias",
+      "interaction_head.interaction_mlp.3.weight",
+      "interaction_head.interaction_mlp.3.bias",
+      "interaction_head.decoder.field_queries",
+      "interaction_head.decoder.field_attention.in_proj_weight",
+      "interaction_head.decoder.field_attention.in_proj_bias",
+      "interaction_head.decoder.field_attention.out_proj.weight",
+      "interaction_head.decoder.field_attention.out_proj.bias",
+      "interaction_head.decoder.field_mlp.0.weight",
+      "interaction_head.decoder.field_mlp.0.bias",
+      "interaction_head.decoder.field_mlp.1.weight",
+      "interaction_head.decoder.field_mlp.1.bias",
+      "interaction_head.decoder.field_mlp.3.weight",
+      "interaction_head.decoder.field_mlp.3.bias",
+      "interaction_head.decoder.summary_proj.0.weight",
+      "interaction_head.decoder.summary_proj.0.bias",
+      "interaction_head.decoder.summary_proj.1.weight",
+      "interaction_head.decoder.summary_proj.1.bias",
+      "interaction_head.decoder.phase_head.0.weight",
+      "interaction_head.decoder.phase_head.0.bias",
+      "interaction_head.decoder.phase_head.1.weight",
+      "interaction_head.decoder.phase_head.1.bias",
+      "interaction_head.decoder.phase_head.3.weight",
+      "interaction_head.decoder.phase_head.3.bias",
+      "interaction_head.decoder.arm_role_head.0.weight",
+      "interaction_head.decoder.arm_role_head.0.bias",
+      "interaction_head.decoder.arm_role_head.1.weight",
+      "interaction_head.decoder.arm_role_head.1.bias",
+      "interaction_head.decoder.arm_role_head.3.weight",
+      "interaction_head.decoder.arm_role_head.3.bias",
+      "interaction_head.decoder.arm_identity.weight",
+      "interaction_head.decoder.support_mode.0.weight",
+      "interaction_head.decoder.support_mode.0.bias",
+      "interaction_head.decoder.support_mode.1.weight",
+      "interaction_head.decoder.support_mode.1.bias",
+      "interaction_head.decoder.support_mode.3.weight",
+      "interaction_head.decoder.support_mode.3.bias",
+      "interaction_head.decoder.target_field.weight",
+      "interaction_head.decoder.target_field.bias",
+      "interaction_head.decoder.actor_feasibility_field.weight",
+      "interaction_head.decoder.actor_feasibility_field.bias",
+      "interaction_head.decoder.persistence_field.weight",
+      "interaction_head.decoder.persistence_field.bias",
+      "interaction_head.decoder.risk_field.weight",
+      "interaction_head.decoder.risk_field.bias",
+      "interaction_head.decoder.uncertainty_field.weight",
+      "interaction_head.decoder.uncertainty_field.bias",
+      "interaction_head.decoder.compat_access_field.weight",
+      "interaction_head.decoder.compat_access_field.bias",
+      "interaction_head.decoder.compat_persistence.weight",
+      "interaction_head.decoder.compat_persistence.bias",
+      "interaction_head.decoder.reocclusion_head.0.weight",
+      "interaction_head.decoder.reocclusion_head.0.bias",
+      "interaction_head.decoder.reocclusion_head.1.weight",
+      "interaction_head.decoder.reocclusion_head.1.bias",
+      "interaction_head.decoder.reocclusion_head.3.weight",
+      "interaction_head.decoder.reocclusion_head.3.bias",
+      "world_model.action_encoder.0.weight",
+      "world_model.action_encoder.0.bias",
+      "world_model.action_encoder.1.weight",
+      "world_model.action_encoder.1.bias",
+      "world_model.transition.layers.0.self_attn.in_proj_weight",
+      "world_model.transition.layers.0.self_attn.in_proj_bias",
+      "world_model.transition.layers.0.self_attn.out_proj.weight",
+      "world_model.transition.layers.0.self_attn.out_proj.bias",
+      "world_model.transition.layers.0.linear1.weight",
+      "world_model.transition.layers.0.linear1.bias",
+      "world_model.transition.layers.0.linear2.weight",
+      "world_model.transition.layers.0.linear2.bias",
+      "world_model.transition.layers.0.norm1.weight",
+      "world_model.transition.layers.0.norm1.bias",
+      "world_model.transition.layers.0.norm2.weight",
+      "world_model.transition.layers.0.norm2.bias",
+      "world_model.transition.layers.1.self_attn.in_proj_weight",
+      "world_model.transition.layers.1.self_attn.in_proj_bias",
+      "world_model.transition.layers.1.self_attn.out_proj.weight",
+      "world_model.transition.layers.1.self_attn.out_proj.bias",
+      "world_model.transition.layers.1.linear1.weight",
+      "world_model.transition.layers.1.linear1.bias",
+      "world_model.transition.layers.1.linear2.weight",
+      "world_model.transition.layers.1.linear2.bias",
+      "world_model.transition.layers.1.norm1.weight",
+      "world_model.transition.layers.1.norm1.bias",
+      "world_model.transition.layers.1.norm2.weight",
+      "world_model.transition.layers.1.norm2.bias",
+      "world_model.token_update.0.weight",
+      "world_model.token_update.0.bias",
+      "world_model.token_update.1.weight",
+      "world_model.token_update.1.bias",
+      "world_model.token_update.3.weight",
+      "world_model.token_update.3.bias",
+      "world_model.decoder.field_queries",
+      "world_model.decoder.field_attention.in_proj_weight",
+      "world_model.decoder.field_attention.in_proj_bias",
+      "world_model.decoder.field_attention.out_proj.weight",
+      "world_model.decoder.field_attention.out_proj.bias",
+      "world_model.decoder.field_mlp.0.weight",
+      "world_model.decoder.field_mlp.0.bias",
+      "world_model.decoder.field_mlp.1.weight",
+      "world_model.decoder.field_mlp.1.bias",
+      "world_model.decoder.field_mlp.3.weight",
+      "world_model.decoder.field_mlp.3.bias",
+      "world_model.decoder.summary_proj.0.weight",
+      "world_model.decoder.summary_proj.0.bias",
+      "world_model.decoder.summary_proj.1.weight",
+      "world_model.decoder.summary_proj.1.bias",
+      "world_model.decoder.phase_head.0.weight",
+      "world_model.decoder.phase_head.0.bias",
+      "world_model.decoder.phase_head.1.weight",
+      "world_model.decoder.phase_head.1.bias",
+      "world_model.decoder.phase_head.3.weight",
+      "world_model.decoder.phase_head.3.bias",
+      "world_model.decoder.arm_role_head.0.weight",
+      "world_model.decoder.arm_role_head.0.bias",
+      "world_model.decoder.arm_role_head.1.weight",
+      "world_model.decoder.arm_role_head.1.bias",
+      "world_model.decoder.arm_role_head.3.weight",
+      "world_model.decoder.arm_role_head.3.bias",
+      "world_model.decoder.arm_identity.weight",
+      "world_model.decoder.support_mode.0.weight",
+      "world_model.decoder.support_mode.0.bias",
+      "world_model.decoder.support_mode.1.weight",
+      "world_model.decoder.support_mode.1.bias",
+      "world_model.decoder.support_mode.3.weight",
+      "world_model.decoder.support_mode.3.bias",
+      "world_model.decoder.target_field.weight",
+      "world_model.decoder.target_field.bias",
+      "world_model.decoder.actor_feasibility_field.weight",
+      "world_model.decoder.actor_feasibility_field.bias",
+      "world_model.decoder.persistence_field.weight",
+      "world_model.decoder.persistence_field.bias",
+      "world_model.decoder.risk_field.weight",
+      "world_model.decoder.risk_field.bias",
+      "world_model.decoder.uncertainty_field.weight",
+      "world_model.decoder.uncertainty_field.bias",
+      "world_model.decoder.compat_access_field.weight",
+      "world_model.decoder.compat_access_field.bias",
+      "world_model.decoder.compat_persistence.weight",
+      "world_model.decoder.compat_persistence.bias",
+      "world_model.decoder.reocclusion_head.0.weight",
+      "world_model.decoder.reocclusion_head.0.bias",
+      "world_model.decoder.reocclusion_head.1.weight",
+      "world_model.decoder.reocclusion_head.1.bias",
+      "world_model.decoder.reocclusion_head.3.weight",
+      "world_model.decoder.reocclusion_head.3.bias",
+      "planner.cls_token",
+      "planner.step_proj.0.weight",
+      "planner.step_proj.0.bias",
+      "planner.step_proj.1.weight",
+      "planner.step_proj.1.bias",
+      "planner.sequence_encoder.layers.0.self_attn.in_proj_weight",
+      "planner.sequence_encoder.layers.0.self_attn.in_proj_bias",
+      "planner.sequence_encoder.layers.0.self_attn.out_proj.weight",
+      "planner.sequence_encoder.layers.0.self_attn.out_proj.bias",
+      "planner.sequence_encoder.layers.0.linear1.weight",
+      "planner.sequence_encoder.layers.0.linear1.bias",
+      "planner.sequence_encoder.layers.0.linear2.weight",
+      "planner.sequence_encoder.layers.0.linear2.bias",
+      "planner.sequence_encoder.layers.0.norm1.weight",
+      "planner.sequence_encoder.layers.0.norm1.bias",
+      "planner.sequence_encoder.layers.0.norm2.weight",
+      "planner.sequence_encoder.layers.0.norm2.bias",
+      "planner.sequence_encoder.layers.1.self_attn.in_proj_weight",
+      "planner.sequence_encoder.layers.1.self_attn.in_proj_bias",
+      "planner.sequence_encoder.layers.1.self_attn.out_proj.weight",
+      "planner.sequence_encoder.layers.1.self_attn.out_proj.bias",
+      "planner.sequence_encoder.layers.1.linear1.weight",
+      "planner.sequence_encoder.layers.1.linear1.bias",
+      "planner.sequence_encoder.layers.1.linear2.weight",
+      "planner.sequence_encoder.layers.1.linear2.bias",
+      "planner.sequence_encoder.layers.1.norm1.weight",
+      "planner.sequence_encoder.layers.1.norm1.bias",
+      "planner.sequence_encoder.layers.1.norm2.weight",
+      "planner.sequence_encoder.layers.1.norm2.bias",
+      "planner.success_head.weight",
+      "planner.success_head.bias",
+      "planner.risk_head.weight",
+      "planner.risk_head.bias",
+      "planner.score_head.weight",
+      "planner.score_head.bias"
+    ],
+    "unexpected_keys": []
+  }
+}

artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml ADDED Viewed

	@@ -0,0 +1,125 @@

+experiment_name: proxy_interaction_state_recency_oracleft
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 13
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+init_strict: true
+data:
+  proxies:
+  - foliage_proxy
+  - bag_proxy
+  - cloth_proxy
+  resolution: 96
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 13
+optim:
+  epochs: 8
+  batch_size: 16
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 128
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  planner:
+    hidden_dim: 128
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2

artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "planner_top1_accuracy": 0.2824427480916031,
+  "planner_regret": 0.24119873344898224,
+  "risk_calibration_mse": 0.009003574028611183,
+  "role_collapse_rate": 0.0,
+  "num_samples": 131
+}

artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json ADDED Viewed

	@@ -0,0 +1,346 @@

+[
+  {
+    "epoch": 0,
+    "train": {
+      "action": 0.019621784721190732,
+      "arm_role": 1.4238473445023677e-05,
+      "belief": 0.10273545235395432,
+      "corridor": 0.1970261943837007,
+      "disturbance": 0.0021691546814205744,
+      "persistence": 1.1530707913140457,
+      "phase": 0.40012874578436214,
+      "planner_ranking": 1.0832201441129048,
+      "planner_risk": 0.009513227792922407,
+      "planner_success": 0.44357747708757717,
+      "proposal_ranking": 1.1302440961201985,
+      "proposal_reconstruction": 0.06888884957879782,
+      "proposal_success": 0.6472248112161955,
+      "reocclusion": 0.23694051212320724,
+      "support_mode": 0.0007155667990446091,
+      "total": 1.1227939675251644,
+      "uncertainty": 0.00357946046278812,
+      "world_model": 1.5367356936136882
+    },
+    "val": {
+      "action": 0.01623468690862258,
+      "arm_role": 1.7815142427934916e-06,
+      "belief": 0.09838261952002843,
+      "corridor": 0.19904182685746086,
+      "disturbance": 0.0012887230906118122,
+      "persistence": 1.2435127298037212,
+      "phase": 0.451065621442265,
+      "planner_ranking": 1.1237382623884413,
+      "planner_risk": 0.00879605039436784,
+      "planner_success": 0.5527588526407877,
+      "proposal_ranking": 1.1329045295715332,
+      "proposal_reconstruction": 0.062247288723786674,
+      "proposal_success": 0.6402903331650628,
+      "reocclusion": 0.19368870432178178,
+      "support_mode": 4.385759530123323e-05,
+      "total": 1.1563972632090251,
+      "uncertainty": 0.003620341523653931,
+      "world_model": 1.507298681471083
+    }
+  },
+  {
+    "epoch": 1,
+    "train": {
+      "action": 0.015102950584453842,
+      "arm_role": 7.929694329315377e-07,
+      "belief": 0.09922042830536763,
+      "corridor": 0.1954052426541845,
+      "disturbance": 0.0012760455817139398,
+      "persistence": 1.1133080422878265,
+      "phase": 0.40078286826610565,
+      "planner_ranking": 1.0535631676514943,
+      "planner_risk": 0.009265869099181145,
+      "planner_success": 0.41223976016044617,
+      "proposal_ranking": 1.130059376358986,
+      "proposal_reconstruction": 0.061719981798281275,
+      "proposal_success": 0.6374408900737762,
+      "reocclusion": 0.22066612169146538,
+      "support_mode": 3.152040555202499e-05,
+      "total": 1.0627698848644893,
+      "uncertainty": 0.002360584529621216,
+      "world_model": 1.3532413293917973
+    },
+    "val": {
+      "action": 0.014689018225504292,
+      "arm_role": 5.302327663356563e-07,
+      "belief": 0.09588906251721913,
+      "corridor": 0.19485984411504534,
+      "disturbance": 0.0013201889879484144,
+      "persistence": 1.211418045891656,
+      "phase": 0.4520965864260991,
+      "planner_ranking": 1.1366683509614732,
+      "planner_risk": 0.009635515045374632,
+      "planner_success": 0.5696005490091112,
+      "proposal_ranking": 1.1199064254760742,
+      "proposal_reconstruction": 0.06043942438231574,
+      "proposal_success": 0.6389325261116028,
+      "reocclusion": 0.18208894692361355,
+      "support_mode": 1.9065460340546753e-05,
+      "total": 1.1511138545142279,
+      "uncertainty": 0.0020409094027450513,
+      "world_model": 1.481640590561761
+    }
+  },
+  {
+    "epoch": 2,
+    "train": {
+      "action": 0.013718575122766197,
+      "arm_role": 3.225997922129409e-07,
+      "belief": 0.09694493561983109,
+      "corridor": 0.19660565722733736,
+      "disturbance": 0.0012764433243622382,
+      "persistence": 1.1350401155650616,
+      "phase": 0.4035409850378831,
+      "planner_ranking": 1.0236077308654785,
+      "planner_risk": 0.009166777638408044,
+      "planner_success": 0.3850418192644914,
+      "proposal_ranking": 1.129315584897995,
+      "proposal_reconstruction": 0.06000282304982344,
+      "proposal_success": 0.6322548364599546,
+      "reocclusion": 0.22824073505277434,
+      "support_mode": 1.4410975078741709e-05,
+      "total": 1.0505772059162457,
+      "uncertainty": 0.001883886650224061,
+      "world_model": 1.3608256032069523
+    },
+    "val": {
+      "action": 0.015656203031539917,
+      "arm_role": 3.1802936541048945e-07,
+      "belief": 0.09277311464150746,
+      "corridor": 0.19478923082351685,
+      "disturbance": 0.001490643351442284,
+      "persistence": 1.2428188456429377,
+      "phase": 0.44441814886199105,
+      "planner_ranking": 1.1642935540941026,
+      "planner_risk": 0.008580206893384457,
+      "planner_success": 0.5712412032816145,
+      "proposal_ranking": 1.1173533731036716,
+      "proposal_reconstruction": 0.060967493802309036,
+      "proposal_success": 0.6362337801191542,
+      "reocclusion": 0.18877888905505338,
+      "support_mode": 8.590733412145508e-06,
+      "total": 1.164333701133728,
+      "uncertainty": 0.0018725828914385703,
+      "world_model": 1.5141921705669827
+    }
+  },
+  {
+    "epoch": 3,
+    "train": {
+      "action": 0.013463407677287856,
+      "arm_role": 2.1441115283238332e-07,
+      "belief": 0.09542769007384777,
+      "corridor": 0.19438757871588072,
+      "disturbance": 0.0012542814802145585,
+      "persistence": 1.0960917932291825,
+      "phase": 0.39501943811774254,
+      "planner_ranking": 0.9881478076179823,
+      "planner_risk": 0.008955476262296239,
+      "planner_success": 0.37380507588386536,
+      "proposal_ranking": 1.1260421325763066,
+      "proposal_reconstruction": 0.05954852948586146,
+      "proposal_success": 0.6345230092604955,
+      "reocclusion": 0.222653156456848,
+      "support_mode": 1.0468997061252594e-05,
+      "total": 1.029868942995866,
+      "uncertainty": 0.001529014749394264,
+      "world_model": 1.3265959272782009
+    },
+    "val": {
+      "action": 0.014502381595472494,
+      "arm_role": 1.8074554909554132e-07,
+      "belief": 0.09227573540475634,
+      "corridor": 0.19471332927544913,
+      "disturbance": 0.0014174091500333613,
+      "persistence": 1.2068392270141177,
+      "phase": 0.44181974563333726,
+      "planner_ranking": 1.1894211106830173,
+      "planner_risk": 0.008801783072865672,
+      "planner_success": 0.5882998870478736,
+      "proposal_ranking": 1.1234880420896742,
+      "proposal_reconstruction": 0.06003963781727685,
+      "proposal_success": 0.6316338512632582,
+      "reocclusion": 0.18501534023218685,
+      "support_mode": 1.0471259353532028e-05,
+      "total": 1.1484977669186063,
+      "uncertainty": 0.0011159069642114143,
+      "world_model": 1.402906020482381
+    }
+  },
+  {
+    "epoch": 4,
+    "train": {
+      "action": 0.013384843982445696,
+      "arm_role": 1.8212530328298726e-07,
+      "belief": 0.0940939641247193,
+      "corridor": 0.19484392801920572,
+      "disturbance": 0.0013209530419165578,
+      "persistence": 1.1009935376544793,
+      "phase": 0.39735961332917213,
+      "planner_ranking": 0.9340380703409513,
+      "planner_risk": 0.009769223863258958,
+      "planner_success": 0.35210378592212993,
+      "proposal_ranking": 1.1286269277334213,
+      "proposal_reconstruction": 0.05935003887861967,
+      "proposal_success": 0.6315460602442423,
+      "reocclusion": 0.22644051164388657,
+      "support_mode": 8.073221484513246e-06,
+      "total": 1.0135142927368481,
+      "uncertainty": 0.0014145106833893806,
+      "world_model": 1.3229995171229045
+    },
+    "val": {
+      "action": 0.014795408584177494,
+      "arm_role": 2.842257956893314e-07,
+      "belief": 0.09148034122255114,
+      "corridor": 0.1952296942472458,
+      "disturbance": 0.0014219412179146376,
+      "persistence": 1.2065883709324732,
+      "phase": 0.4573909127049976,
+      "planner_ranking": 1.264210171169705,
+      "planner_risk": 0.008240946154627535,
+      "planner_success": 0.6136878695752885,
+      "proposal_ranking": 1.1302801105711195,
+      "proposal_reconstruction": 0.06015601671404309,
+      "proposal_success": 0.6339429616928101,
+      "reocclusion": 0.18241143381843963,
+      "support_mode": 5.932560725341318e-06,
+      "total": 1.1784167952007718,
+      "uncertainty": 0.0014715428373569415,
+      "world_model": 1.4285426007376776
+    }
+  },
+  {
+    "epoch": 5,
+    "train": {
+      "action": 0.013003619310135642,
+      "arm_role": 1.6706892166003703e-07,
+      "belief": 0.09372370348622401,
+      "corridor": 0.19377528379360834,
+      "disturbance": 0.0012515889684436843,
+      "persistence": 1.087764959782362,
+      "phase": 0.39413714533050853,
+      "planner_ranking": 0.8574716374278069,
+      "planner_risk": 0.00931960518937558,
+      "planner_success": 0.32699467862645787,
+      "proposal_ranking": 1.1296403209368389,
+      "proposal_reconstruction": 0.058937749825417995,
+      "proposal_success": 0.6314020653565725,
+      "reocclusion": 0.22137584226826826,
+      "support_mode": 6.786340643808823e-06,
+      "total": 0.9859138304988543,
+      "uncertainty": 0.0011173486830860686,
+      "world_model": 1.3007333129644394
+    },
+    "val": {
+      "action": 0.014327830738491483,
+      "arm_role": 2.553892981538297e-07,
+      "belief": 0.0923299789428711,
+      "corridor": 0.19848757651117113,
+      "disturbance": 0.0011894687777385116,
+      "persistence": 1.2340974575943418,
+      "phase": 0.4644339034954707,
+      "planner_ranking": 1.3578486972384982,
+      "planner_risk": 0.009015874264554845,
+      "planner_success": 0.6275921530193753,
+      "proposal_ranking": 1.1163699362013075,
+      "proposal_reconstruction": 0.05984223840965165,
+      "proposal_success": 0.6348666879865859,
+      "reocclusion": 0.20307053801500136,
+      "support_mode": 3.7443181150188643e-06,
+      "total": 1.2028855217827692,
+      "uncertainty": 0.0018055843215228783,
+      "world_model": 1.4401142862108018
+    }
+  },
+  {
+    "epoch": 6,
+    "train": {
+      "action": 0.012725909279348949,
+      "arm_role": 1.4006056699618816e-07,
+      "belief": 0.09327782255907853,
+      "corridor": 0.19324024704595408,
+      "disturbance": 0.0013581588767313708,
+      "persistence": 1.0872996002435684,
+      "phase": 0.3942833219965299,
+      "planner_ranking": 0.8039915859699249,
+      "planner_risk": 0.009058927069418132,
+      "planner_success": 0.3132968743642171,
+      "proposal_ranking": 1.1225138505299885,
+      "proposal_reconstruction": 0.058770577888935804,
+      "proposal_success": 0.6332228208581606,
+      "reocclusion": 0.22015962299580374,
+      "support_mode": 4.966122408707936e-06,
+      "total": 0.9676197816928228,
+      "uncertainty": 0.0011598596538533457,
+      "world_model": 1.2878785928090413
+    },
+    "val": {
+      "action": 0.014496596633560128,
+      "arm_role": 2.192401922229692e-07,
+      "belief": 0.090823319223192,
+      "corridor": 0.19339712626404232,
+      "disturbance": 0.0016455024532559845,
+      "persistence": 1.2035431563854218,
+      "phase": 0.45077220764425063,
+      "planner_ranking": 1.4061412149005466,
+      "planner_risk": 0.008559927913463779,
+      "planner_success": 0.6576948232120938,
+      "proposal_ranking": 1.115302946832445,
+      "proposal_reconstruction": 0.059833423958884344,
+      "proposal_success": 0.6364065806070963,
+      "reocclusion": 0.1801526459554831,
+      "support_mode": 4.350292038503136e-06,
+      "total": 1.2042852375242445,
+      "uncertainty": 0.0007912304588697023,
+      "world_model": 1.3813848230573866
+    }
+  },
+  {
+    "epoch": 7,
+    "train": {
+      "action": 0.012622703972738236,
+      "arm_role": 1.0477378964424133e-07,
+      "belief": 0.09258855165292819,
+      "corridor": 0.19252262574930987,
+      "disturbance": 0.0013018598037888296,
+      "persistence": 1.066667130837838,
+      "phase": 0.3908200403675437,
+      "planner_ranking": 0.7140753443042437,
+      "planner_risk": 0.009592532160847137,
+      "planner_success": 0.2998263432333867,
+      "proposal_ranking": 1.125225270787875,
+      "proposal_reconstruction": 0.05837386598189672,
+      "proposal_success": 0.630388061205546,
+      "reocclusion": 0.21032434065515795,
+      "support_mode": 4.515569240008214e-06,
+      "total": 0.9377426480253538,
+      "uncertainty": 0.0009036514068914888,
+      "world_model": 1.2537205666303635
+    },
+    "val": {
+      "action": 0.01393873720533318,
+      "arm_role": 1.9219735413066195e-07,
+      "belief": 0.09070102870464325,
+      "corridor": 0.19287915196683672,
+      "disturbance": 0.0018688688416861826,
+      "persistence": 1.1937838825914595,
+      "phase": 0.44357551468743217,
+      "planner_ranking": 1.4278014368481107,
+      "planner_risk": 0.007952027747200595,
+      "planner_success": 0.6735637684663137,
+      "proposal_ranking": 1.121930678685506,
+      "proposal_reconstruction": 0.059713507278098,
+      "proposal_success": 0.6304158767064413,
+      "reocclusion": 0.17670889291912317,
+      "support_mode": 4.034886008715451e-06,
+      "total": 1.211510909928216,
+      "uncertainty": 0.0006605643041742345,
+      "world_model": 1.383787711461385
+    }
+  }
+]

artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "experiment_name": "proxy_interaction_state_recency_oracleft",
+  "device": "cuda",
+  "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt",
+  "final_train_total": 0.9377426480253538,
+  "final_val_total": 1.211510909928216,
+  "num_train_samples": 380,
+  "num_val_samples": 131,
+  "planner_mode": "trainable",
+  "frozen_modules": [],
+  "init_info": {
+    "path": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt",
+    "missing_keys": [],
+    "unexpected_keys": []
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "backbone_clip": {
+    "per_task_success": {
+      "foliage_proxy": 0.2916666666666667,
+      "bag_proxy": 0.4166666666666667,
+      "cloth_proxy": 0.2916666666666667
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 5.090650259620613,
+    "corridor_availability": 0.30186899772120845,
+    "reocclusion_rate": 0.013541666666666667,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.36051484262053335
+  },
+  "reveal_clip": {
+    "per_task_success": {
+      "foliage_proxy": 0.20833333333333334,
+      "bag_proxy": 0.25,
+      "cloth_proxy": 0.16666666666666666
+    },
+    "mean_success": 0.20833333333333334,
+    "visibility_integral": 48.42640474935373,
+    "corridor_availability": 0.8251730443702804,
+    "reocclusion_rate": 0.06718750000000001,
+    "persistence_horizon_mae": 0.9353625932762888,
+    "disturbance_cost": 0.7097413324647479
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Reveal Proxy Benchmark
+## backbone_clip
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+- mean_success: 0.333
+- visibility_integral: 5.091
+- corridor_availability: 0.302
+- reocclusion_rate: 0.014
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.361
+- foliage_proxy_success: 0.292
+- bag_proxy_success: 0.417
+- cloth_proxy_success: 0.292
+## reveal_clip
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
+- mean_success: 0.208
+- visibility_integral: 48.426
+- corridor_availability: 0.825
+- reocclusion_rate: 0.067
+- persistence_horizon_mae: 0.935
+- disturbance_cost: 0.710
+- foliage_proxy_success: 0.208
+- bag_proxy_success: 0.250
+- cloth_proxy_success: 0.167

artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.5833333333333334
+    },
+    "mean_success": 0.5138888888888888,
+    "visibility_integral": 32.35977659953965,
+    "corridor_availability": 0.8802236508991983,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.1419724687506017,
+    "disturbance_cost": 0.49480460506553453
+  },
+  "backbone": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 30.58145251042313,
+    "corridor_availability": 0.8679845299985673,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.47382067630274427
+  },
+  "reveal": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5555555555555555,
+    "visibility_integral": 29.508656750122707,
+    "corridor_availability": 0.8612986240122054,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 2.3659667054579057,
+    "disturbance_cost": 0.47035404020506477
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.514
+- visibility_integral: 32.360
+- corridor_availability: 0.880
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.142
+- disturbance_cost: 0.495
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.583
+## backbone
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 30.581
+- corridor_availability: 0.868
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.474
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.625
+## reveal
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
+- mean_success: 0.556
+- visibility_integral: 29.509
+- corridor_availability: 0.861
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 2.366
+- disturbance_cost: 0.470
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.667

artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5416666666666666,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 32.95856812927458,
+    "corridor_availability": 0.8741476759314537,
+    "reocclusion_rate": 0.0006944444444444445,
+    "persistence_horizon_mae": 1.1703627435402033,
+    "disturbance_cost": 0.42908077666329014
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 32.959
+- corridor_availability: 0.874
+- reocclusion_rate: 0.001
+- persistence_horizon_mae: 1.170
+- disturbance_cost: 0.429
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.542
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "interaction_clip": {
+    "per_task_success": {
+      "foliage_proxy": 0.2916666666666667,
+      "bag_proxy": 0.2916666666666667,
+      "cloth_proxy": 0.3333333333333333
+    },
+    "mean_success": 0.3055555555555556,
+    "visibility_integral": 10.379729785852962,
+    "corridor_availability": 0.38910322284532917,
+    "reocclusion_rate": 0.026909722222222224,
+    "persistence_horizon_mae": 3.8014686041765726,
+    "disturbance_cost": 0.392014082081409
+  },
+  "backbone_clip": {
+    "per_task_success": {
+      "foliage_proxy": 0.2916666666666667,
+      "bag_proxy": 0.4166666666666667,
+      "cloth_proxy": 0.2916666666666667
+    },
+    "mean_success": 0.3333333333333333,
+    "visibility_integral": 5.090670637786388,
+    "corridor_availability": 0.30186899772120845,
+    "reocclusion_rate": 0.013541666666666667,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.36051381931045196
+  },
+  "reveal_clip": {
+    "per_task_success": {
+      "foliage_proxy": 0.20833333333333334,
+      "bag_proxy": 0.25,
+      "cloth_proxy": 0.16666666666666666
+    },
+    "mean_success": 0.20833333333333334,
+    "visibility_integral": 48.426281129320465,
+    "corridor_availability": 0.8251730443702804,
+    "reocclusion_rate": 0.06718750000000001,
+    "persistence_horizon_mae": 0.9353624902194482,
+    "disturbance_cost": 0.709741123020649
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Reveal Proxy Benchmark
+## interaction_clip
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
+- mean_success: 0.306
+- visibility_integral: 10.380
+- corridor_availability: 0.389
+- reocclusion_rate: 0.027
+- persistence_horizon_mae: 3.801
+- disturbance_cost: 0.392
+- foliage_proxy_success: 0.292
+- bag_proxy_success: 0.292
+- cloth_proxy_success: 0.333
+## backbone_clip
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+- mean_success: 0.333
+- visibility_integral: 5.091
+- corridor_availability: 0.302
+- reocclusion_rate: 0.014
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.361
+- foliage_proxy_success: 0.292
+- bag_proxy_success: 0.417
+- cloth_proxy_success: 0.292
+## reveal_clip
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
+- mean_success: 0.208
+- visibility_integral: 48.426
+- corridor_availability: 0.825
+- reocclusion_rate: 0.067
+- persistence_horizon_mae: 0.935
+- disturbance_cost: 0.710
+- foliage_proxy_success: 0.208
+- bag_proxy_success: 0.250
+- cloth_proxy_success: 0.167

artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "interaction": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.5833333333333334
+    },
+    "mean_success": 0.5277777777777778,
+    "visibility_integral": 31.56379758318265,
+    "corridor_availability": 0.8745781282583872,
+    "reocclusion_rate": 0.0,
+    "persistence_horizon_mae": 1.0204093086471828,
+    "disturbance_cost": 0.4148087627771828
+  },
+  "backbone": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.625,
+      "cloth_proxy": 0.6666666666666666
+    },
+    "mean_success": 0.5694444444444445,
+    "visibility_integral": 28.655961725446915,
+    "corridor_availability": 0.7943478326002756,
+    "reocclusion_rate": 0.07666819352674617,
+    "persistence_horizon_mae": 0.0,
+    "disturbance_cost": 0.3941483147856262
+  },
+  "reveal": {
+    "per_task_success": {
+      "foliage_proxy": 0.4166666666666667,
+      "bag_proxy": 0.5833333333333334,
+      "cloth_proxy": 0.625
+    },
+    "mean_success": 0.5416666666666666,
+    "visibility_integral": 30.121625943316353,
+    "corridor_availability": 0.8142780106928613,
+    "reocclusion_rate": 0.051547468734968724,
+    "persistence_horizon_mae": 2.102369644222497,
+    "disturbance_cost": 0.42389609825073016
+  }
+}

artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Reveal Proxy Benchmark
+## interaction
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
+- mean_success: 0.528
+- visibility_integral: 31.564
+- corridor_availability: 0.875
+- reocclusion_rate: 0.000
+- persistence_horizon_mae: 1.020
+- disturbance_cost: 0.415
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.583
+## backbone
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
+- mean_success: 0.569
+- visibility_integral: 28.656
+- corridor_availability: 0.794
+- reocclusion_rate: 0.077
+- persistence_horizon_mae: 0.000
+- disturbance_cost: 0.394
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.625
+- cloth_proxy_success: 0.667
+## reveal
+- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
+- mean_success: 0.542
+- visibility_integral: 30.122
+- corridor_availability: 0.814
+- reocclusion_rate: 0.052
+- persistence_horizon_mae: 2.102
+- disturbance_cost: 0.424
+- foliage_proxy_success: 0.417
+- bag_proxy_success: 0.583
+- cloth_proxy_success: 0.625

artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json ADDED Viewed

	@@ -0,0 +1,157 @@

+{
+  "proxy": {
+    "losses": {
+      "action": 0.7160568833351135,
+      "phase": 1.4088108539581299,
+      "arm_role": 1.5109761953353882,
+      "support_mode": 1.3240256309509277,
+      "corridor": 0.6577756404876709,
+      "persistence": 1.7929389476776123,
+      "disturbance": 0.17437878251075745,
+      "belief": 0.5924442410469055,
+      "reocclusion": 0.780390202999115,
+      "uncertainty": 0.7132201194763184,
+      "world_model": 3.2925755977630615,
+      "planner_success": 0.7400866150856018,
+      "planner_risk": 0.032505519688129425,
+      "planner_ranking": 0.2076394408941269,
+      "proposal_reconstruction": 0.8539058566093445,
+      "proposal_success": 0.6675869226455688,
+      "proposal_ranking": 0.12013991177082062,
+      "total": 1.9776512384414673
+    },
+    "grad_norm": 3.735501527786255,
+    "candidate_shape": [
+      2,
+      4,
+      4,
+      14
+    ],
+    "rollout_phase_shape": [
+      2,
+      4,
+      4,
+      5
+    ]
+  },
+  "rlbench": {
+    "losses": {
+      "action": 0.6058900356292725,
+      "world_model": 0.0,
+      "planner_success": 0.0,
+      "planner_risk": 0.0,
+      "planner_ranking": 0.0,
+      "proposal_reconstruction": 0.0,
+      "proposal_success": 0.0,
+      "proposal_ranking": 0.0,
+      "total": 0.6058900356292725
+    },
+    "grad_norm": 2.581531286239624,
+    "candidate_shape": [
+      2,
+      4,
+      4,
+      14
+    ],
+    "rollout_phase_shape": [
+      2,
+      4,
+      4,
+      5
+    ],
+    "planner_enabled_for_eval": true,
+    "frozen_modules": [
+      "interaction_head",
+      "world_model",
+      "planner"
+    ]
+  },
+  "policy_config": {
+    "backbone": {
+      "model_name": "openai/clip-vit-base-patch32",
+      "hidden_dim": 64,
+      "max_text_tokens": 32,
+      "freeze_backbone": true,
+      "gradient_checkpointing": false,
+      "use_dummy_backbone": true
+    },
+    "fusion": {
+      "hidden_dim": 64,
+      "num_cameras": 3,
+      "num_layers": 2,
+      "num_heads": 4,
+      "ff_dim": 128,
+      "dropout": 0.1,
+      "proprio_dim": 32,
+      "proprio_tokens": 1
+    },
+    "memory": {
+      "hidden_dim": 64,
+      "action_dim": 14,
+      "history_steps": 6,
+      "num_layers": 2,
+      "dropout": 0.1,
+      "memory_bank_size": 4,
+      "num_heads": 4,
+      "max_history_steps": 8
+    },
+    "decoder": {
+      "hidden_dim": 64,
+      "num_heads": 4,
+      "num_layers": 2,
+      "ff_dim": 128,
+      "dropout": 0.1,
+      "chunk_size": 4,
+      "action_dim": 14,
+      "arm_action_dim": 7,
+      "num_candidates": 4,
+      "num_phases": 5,
+      "num_arm_roles": 4
+    },
+    "reveal_head": {
+      "hidden_dim": 64,
+      "num_support_modes": 3,
+      "num_approach_templates": 32,
+      "rollout_horizon": 3,
+      "belief_map_size": 32,
+      "field_size": 16,
+      "num_heads": 4,
+      "predict_belief_map": true,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "num_interaction_tokens": 8
+    },
+    "world_model": {
+      "hidden_dim": 64,
+      "action_dim": 14,
+      "num_support_modes": 3,
+      "num_approach_templates": 32,
+      "rollout_horizon": 3,
+      "field_size": 16,
+      "num_heads": 4,
+      "num_phases": 5,
+      "num_arm_roles": 4,
+      "num_interaction_tokens": 8,
+      "belief_map_size": 32,
+      "predict_belief_map": true
+    },
+    "planner": {
+      "hidden_dim": 64,
+      "num_candidates": 4,
+      "action_dim": 14,
+      "num_support_modes": 3,
+      "utility_margin": 0.1,
+      "corridor_weight": 1.0,
+      "persistence_weight": 0.5,
+      "proposal_weight": 0.5,
+      "task_progress_weight": 0.75,
+      "disturbance_weight": 0.75,
+      "reocclusion_weight": 0.5,
+      "visibility_weight": 0.25,
+      "num_heads": 4,
+      "num_layers": 2,
+      "num_phases": 5,
+      "num_arm_roles": 4
+    }
+  }
+}

code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py CHANGED Viewed

@@ -62,6 +62,7 @@ def main() -> None:
                 texts=moved["texts"],
                 history_images=moved.get("history_images"),
                 history_proprio=moved.get("history_proprio"),
                 plan=True,
                 candidate_chunks_override=moved["candidate_action_chunks"],
             )

                 texts=moved["texts"],
                 history_images=moved.get("history_images"),
                 history_proprio=moved.get("history_proprio"),
+                history_actions=moved.get("history_actions"),
                 plan=True,
                 candidate_chunks_override=moved["candidate_action_chunks"],
             )

code/reveal_vla_bimanual/eval/run_reveal_benchmark.py CHANGED Viewed

@@ -53,7 +53,18 @@ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch
     policy_config = _policy_config_from_dict(checkpoint["policy_config"])
     trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
     model = build_policy(policy_config, trainer_config).to(device)
-    model.load_state_dict(checkpoint["state_dict"])
     model.eval()
     return model, checkpoint
@@ -63,11 +74,13 @@ def _prepare_batch(
     device: torch.device,
     history_images: list[np.ndarray] | None = None,
     history_proprio: list[np.ndarray] | None = None,
 ) -> dict[str, Any]:
     images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
     proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
     history_images = history_images or []
     history_proprio = history_proprio or []
     if history_images:
         history_images_tensor = (
             torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
@@ -81,10 +94,15 @@ def _prepare_batch(
         history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
     else:
         history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
     return {
         "images": images.to(device),
         "history_images": history_images_tensor.to(device),
         "history_proprio": history_proprio_tensor.to(device),
         "proprio": proprio.to(device),
         "texts": [observation["text"]],
     }
@@ -109,6 +127,7 @@ def select_chunk(
         "images": images,
         "history_images": batch.get("history_images"),
         "history_proprio": batch.get("history_proprio"),
         "proprio": batch["proprio"],
         "texts": batch["texts"],
     }
@@ -153,6 +172,7 @@ def evaluate_model(
     episodes: int,
     resolution: int,
     ablation: str | None = None,
 ) -> BenchmarkMetrics:
     per_task_success: dict[str, float] = {}
     visibility_scores = []
@@ -176,6 +196,7 @@ def evaluate_model(
             episode_disturbance = [float(privileged_state["disturbance_cost"])]
             history_images: list[np.ndarray] = []
             history_proprio: list[np.ndarray] = []
             done = False
             while not done:
                 batch = _prepare_batch(
@@ -183,20 +204,10 @@ def evaluate_model(
                     device=device,
                     history_images=history_images,
                     history_proprio=history_proprio,
                 )
                 with torch.no_grad():
                     chunk, outputs = select_chunk(model, batch, ablation=ablation)
-                action = chunk[0, 0].detach().cpu().numpy()
-                if history_steps > 0:
-                    if len(history_images) >= history_steps:
-                        history_images = history_images[-history_steps + 1 :]
-                        history_proprio = history_proprio[-history_steps + 1 :]
-                    history_images.append(observation["images"])
-                    history_proprio.append(observation["proprio"])
-                observation, _, terminated, truncated, privileged_state = env.step(action)
-                episode_visibility.append(float(privileged_state["visibility"]))
-                episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
-                episode_disturbance.append(float(privileged_state["disturbance_cost"]))
                 state_output = outputs.get("interaction_state")
                 if state_output is None:
                     state_output = outputs.get("reveal_state")
@@ -207,7 +218,26 @@ def evaluate_model(
                             privileged_state["persistence_horizon"],
                         )
                     )
-                done = bool(terminated or truncated)
             successes.append(float(privileged_state["retrieval_success"]))
             visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
             corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
@@ -246,6 +276,7 @@ def main() -> None:
     parser.add_argument("--ablation", default=None)
     parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
     parser.add_argument("--proxies", nargs="*", default=None)
     args = parser.parse_args()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -266,6 +297,7 @@ def main() -> None:
             episodes=args.episodes,
             resolution=resolution,
             ablation=args.ablation,
         )
         raw_metrics[label] = _metrics_to_dict(metrics)
         sections[label] = {

     policy_config = _policy_config_from_dict(checkpoint["policy_config"])
     trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
     model = build_policy(policy_config, trainer_config).to(device)
+    incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
+    allowed_missing = {
+        key
+        for key in incompatible.missing_keys
+        if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
+    }
+    missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
+    if missing_other or incompatible.unexpected_keys:
+        raise RuntimeError(
+            "Checkpoint load failed due to incompatible weights. "
+            f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
+        )
     model.eval()
     return model, checkpoint
     device: torch.device,
     history_images: list[np.ndarray] | None = None,
     history_proprio: list[np.ndarray] | None = None,
+    history_actions: list[np.ndarray] | None = None,
 ) -> dict[str, Any]:
     images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
     proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
     history_images = history_images or []
     history_proprio = history_proprio or []
+    history_actions = history_actions or []
     if history_images:
         history_images_tensor = (
             torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
         history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
     else:
         history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
+    if history_actions:
+        history_actions_tensor = torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).float()
+    else:
+        history_actions_tensor = torch.zeros((1, 0, 14), dtype=torch.float32)
     return {
         "images": images.to(device),
         "history_images": history_images_tensor.to(device),
         "history_proprio": history_proprio_tensor.to(device),
+        "history_actions": history_actions_tensor.to(device),
         "proprio": proprio.to(device),
         "texts": [observation["text"]],
     }
         "images": images,
         "history_images": batch.get("history_images"),
         "history_proprio": batch.get("history_proprio"),
+        "history_actions": batch.get("history_actions"),
         "proprio": batch["proprio"],
         "texts": batch["texts"],
     }
     episodes: int,
     resolution: int,
     ablation: str | None = None,
+    chunk_commit_steps: int | None = None,
 ) -> BenchmarkMetrics:
     per_task_success: dict[str, float] = {}
     visibility_scores = []
             episode_disturbance = [float(privileged_state["disturbance_cost"])]
             history_images: list[np.ndarray] = []
             history_proprio: list[np.ndarray] = []
+            history_actions: list[np.ndarray] = []
             done = False
             while not done:
                 batch = _prepare_batch(
                     device=device,
                     history_images=history_images,
                     history_proprio=history_proprio,
+                    history_actions=history_actions,
                 )
                 with torch.no_grad():
                     chunk, outputs = select_chunk(model, batch, ablation=ablation)
                 state_output = outputs.get("interaction_state")
                 if state_output is None:
                     state_output = outputs.get("reveal_state")
                             privileged_state["persistence_horizon"],
                         )
                     )
+                chunk_np = chunk[0].detach().cpu().numpy()
+                commit_steps = chunk_np.shape[0] if chunk_commit_steps is None else min(chunk_commit_steps, chunk_np.shape[0])
+                for action in chunk_np[:commit_steps]:
+                    if history_steps > 0:
+                        if len(history_images) >= history_steps:
+                            history_images = history_images[-history_steps + 1 :]
+                            history_proprio = history_proprio[-history_steps + 1 :]
+                            history_actions = history_actions[-history_steps + 1 :]
+                        history_images.append(observation["images"])
+                        history_proprio.append(observation["proprio"])
+                        history_actions.append(action.astype(np.float32))
+                    observation, _, terminated, truncated, privileged_state = env.step(action)
+                    episode_visibility.append(float(privileged_state["visibility"]))
+                    episode_corridor.append(
+                        float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())
+                    )
+                    episode_disturbance.append(float(privileged_state["disturbance_cost"]))
+                    done = bool(terminated or truncated)
+                    if done:
+                        break
             successes.append(float(privileged_state["retrieval_success"]))
             visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
             corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
     parser.add_argument("--ablation", default=None)
     parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
     parser.add_argument("--proxies", nargs="*", default=None)
+    parser.add_argument("--chunk-commit-steps", type=int, default=0)
     args = parser.parse_args()
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
             episodes=args.episodes,
             resolution=resolution,
             ablation=args.ablation,
+            chunk_commit_steps=(None if args.chunk_commit_steps <= 0 else args.chunk_commit_steps),
         )
         raw_metrics[label] = _metrics_to_dict(metrics)
         sections[label] = {

code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py CHANGED Viewed

@@ -52,17 +52,66 @@ def _episode_language_goal(descriptions: Sequence[str]) -> str:
     return str(descriptions[0]) if descriptions else ""
-def _step_bimanual_chunk(task: Any, obs: Any, delta_action: np.ndarray) -> tuple[Any, float, bool]:
     total_reward = 0.0
     done = False
     next_obs = obs
     for arm_name in ("right", "left"):
-        env_action = single_arm_absolute_action_from_delta(next_obs, delta_action, arm_name, ignore_collisions=True)
-        next_obs, reward, done = task.step(env_action, arm_name)
         total_reward += float(reward)
         if reward >= 1.0 or done:
             break
-    return next_obs, total_reward, done
 def main() -> None:
@@ -78,6 +127,7 @@ def main() -> None:
     parser.add_argument("--allow-unsupervised-planning", action="store_true")
     parser.add_argument("--disable-support-mode-conditioning", action="store_true")
     parser.add_argument("--headless", action="store_true", default=True)
     args = parser.parse_args()
     checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
@@ -85,7 +135,18 @@ def main() -> None:
     trainer_config = _trainer_config_from_checkpoint(checkpoint)
     device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
     model = build_policy(policy_config, trainer_config).to(device)
-    model.load_state_dict(checkpoint["state_dict"], strict=True)
     model.eval()
     plan_requested = bool(args.plan)
     plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
@@ -144,10 +205,14 @@ def main() -> None:
                 language_goal = _episode_language_goal(descriptions)
                 total_reward = 0.0
                 success = 0.0
                 history_images: list[np.ndarray] = []
                 history_proprio: list[np.ndarray] = []
                 history_steps = int(getattr(policy_config.memory, "history_steps", 0))
-                for timestep in range(args.episode_length):
                     images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
                     proprio = torch.from_numpy(
                         bimanual_proprio_from_obs(
@@ -164,6 +229,9 @@ def main() -> None:
                         history_proprio_tensor = (
                             torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
                         )
                     else:
                         history_images_tensor = torch.zeros(
                             (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
@@ -175,6 +243,11 @@ def main() -> None:
                             device=device,
                             dtype=proprio.dtype,
                         )
                     with torch.no_grad():
                         if policy_supports_planning(trainer_config.policy_type):
                             outputs = model(
@@ -183,6 +256,7 @@ def main() -> None:
                                 texts=[language_goal],
                                 history_images=history_images_tensor,
                                 history_proprio=history_proprio_tensor,
                                 plan=plan_applied,
                                 support_mode_conditioning=not args.disable_support_mode_conditioning,
                             )
@@ -193,22 +267,40 @@ def main() -> None:
                                 texts=[language_goal],
                                 history_images=history_images_tensor,
                                 history_proprio=history_proprio_tensor,
                             )
                     chosen_chunk = outputs["action_mean"]
                     if plan_applied and "planned_chunk" in outputs:
                         chosen_chunk = outputs["planned_chunk"]
-                    step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
-                    if history_steps > 0:
-                        if len(history_images) >= history_steps:
-                            keep = max(history_steps - 1, 0)
-                            history_images = history_images[-keep:] if keep > 0 else []
-                            history_proprio = history_proprio[-keep:] if keep > 0 else []
-                        history_images.append(images[0].detach().cpu().numpy())
-                        history_proprio.append(proprio[0].detach().cpu().numpy())
-                    obs, reward, done = _step_bimanual_chunk(task, obs, step_action)
-                    total_reward += float(reward)
-                    if reward >= 1.0:
-                        success = 1.0
                     if done or success >= 1.0:
                         break
                 task_successes.append(success)
@@ -217,6 +309,8 @@ def main() -> None:
                 "task_class": task_class.__name__,
                 "successes": task_successes,
                 "returns": task_returns,
                 "mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
                 "mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
             }

     return str(descriptions[0]) if descriptions else ""
+def _noop_arm_action(obs: Any, arm_name: str) -> np.ndarray:
+    if arm_name == "right":
+        pose = np.asarray(obs.gripper_right_pose, dtype=np.float32)
+        gripper_open = float(obs.gripper_right_open)
+    elif arm_name == "left":
+        pose = np.asarray(obs.gripper_left_pose, dtype=np.float32)
+        gripper_open = float(obs.gripper_left_open)
+    else:  # pragma: no cover - defensive guard
+        raise ValueError(f"Unsupported arm: {arm_name}")
+    return np.concatenate([pose, np.array([gripper_open, 1.0], dtype=np.float32)], axis=0)
+def _scaled_single_arm_delta(delta_action: np.ndarray, arm_name: str, scale: float) -> np.ndarray:
+    scaled = np.asarray(delta_action, dtype=np.float32).copy()
+    arm_index = {"right": 0, "left": 1}[arm_name]
+    offset = arm_index * 7
+    scaled[offset : offset + 6] *= float(scale)
+    return scaled
+def _step_bimanual_chunk(task: Any, obs: Any, delta_action: np.ndarray) -> tuple[Any, float, bool, int, int]:
     total_reward = 0.0
     done = False
     next_obs = obs
+    recovered_steps = 0
+    noop_fallbacks = 0
     for arm_name in ("right", "left"):
+        reward = 0.0
+        last_error: Exception | None = None
+        stepped = False
+        for scale in (1.0, 0.5, 0.25, 0.1):
+            try:
+                env_action = single_arm_absolute_action_from_delta(
+                    next_obs,
+                    _scaled_single_arm_delta(delta_action, arm_name, scale),
+                    arm_name,
+                    ignore_collisions=True,
+                )
+                next_obs, reward, done = task.step(env_action, arm_name)
+                if scale < 1.0:
+                    recovered_steps += 1
+                stepped = True
+                break
+            except Exception as exc:  # pragma: no cover - live RLBench failure path
+                last_error = exc
+        if not stepped:
+            try:
+                next_obs, reward, done = task.step(_noop_arm_action(next_obs, arm_name), arm_name)
+                noop_fallbacks += 1
+                stepped = True
+            except Exception as exc:  # pragma: no cover - live RLBench failure path
+                last_error = exc
+        if not stepped:
+            if last_error is not None:
+                raise last_error
+            raise RuntimeError(f"Failed to step arm '{arm_name}' for unknown reasons.")
         total_reward += float(reward)
         if reward >= 1.0 or done:
             break
+    return next_obs, total_reward, done, recovered_steps, noop_fallbacks
 def main() -> None:
     parser.add_argument("--allow-unsupervised-planning", action="store_true")
     parser.add_argument("--disable-support-mode-conditioning", action="store_true")
     parser.add_argument("--headless", action="store_true", default=True)
+    parser.add_argument("--chunk-commit-steps", type=int, default=0)
     args = parser.parse_args()
     checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
     trainer_config = _trainer_config_from_checkpoint(checkpoint)
     device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
     model = build_policy(policy_config, trainer_config).to(device)
+    incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
+    allowed_missing = {
+        key
+        for key in incompatible.missing_keys
+        if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
+    }
+    missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
+    if missing_other or incompatible.unexpected_keys:
+        raise RuntimeError(
+            "Checkpoint load failed due to incompatible weights. "
+            f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
+        )
     model.eval()
     plan_requested = bool(args.plan)
     plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
                 language_goal = _episode_language_goal(descriptions)
                 total_reward = 0.0
                 success = 0.0
+                episode_recoveries = 0
+                episode_noop_fallbacks = 0
                 history_images: list[np.ndarray] = []
                 history_proprio: list[np.ndarray] = []
+                history_actions: list[np.ndarray] = []
                 history_steps = int(getattr(policy_config.memory, "history_steps", 0))
+                timestep = 0
+                while timestep < args.episode_length:
                     images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
                     proprio = torch.from_numpy(
                         bimanual_proprio_from_obs(
                         history_proprio_tensor = (
                             torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
                         )
+                        history_actions_tensor = (
+                            torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).to(device)
+                        )
                     else:
                         history_images_tensor = torch.zeros(
                             (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
                             device=device,
                             dtype=proprio.dtype,
                         )
+                        history_actions_tensor = torch.zeros(
+                            (1, 0, policy_config.decoder.action_dim),
+                            device=device,
+                            dtype=proprio.dtype,
+                        )
                     with torch.no_grad():
                         if policy_supports_planning(trainer_config.policy_type):
                             outputs = model(
                                 texts=[language_goal],
                                 history_images=history_images_tensor,
                                 history_proprio=history_proprio_tensor,
+                                history_actions=history_actions_tensor,
                                 plan=plan_applied,
                                 support_mode_conditioning=not args.disable_support_mode_conditioning,
                             )
                                 texts=[language_goal],
                                 history_images=history_images_tensor,
                                 history_proprio=history_proprio_tensor,
+                                history_actions=history_actions_tensor,
                             )
                     chosen_chunk = outputs["action_mean"]
                     if plan_applied and "planned_chunk" in outputs:
                         chosen_chunk = outputs["planned_chunk"]
+                    chunk_np = chosen_chunk[0].detach().float().cpu().numpy()
+                    commit_steps = chunk_np.shape[0] if args.chunk_commit_steps <= 0 else min(args.chunk_commit_steps, chunk_np.shape[0])
+                    done = False
+                    for step_action in chunk_np[:commit_steps]:
+                        live_images = stack_live_rgb_obs(obs, resolution=args.resolution).detach().cpu().numpy()
+                        live_proprio = bimanual_proprio_from_obs(
+                            obs,
+                            timestep=timestep,
+                            episode_length=args.episode_length,
+                            target_dim=policy_config.fusion.proprio_dim,
+                        ).astype(np.float32)
+                        if history_steps > 0:
+                            if len(history_images) >= history_steps:
+                                keep = max(history_steps - 1, 0)
+                                history_images = history_images[-keep:] if keep > 0 else []
+                                history_proprio = history_proprio[-keep:] if keep > 0 else []
+                                history_actions = history_actions[-keep:] if keep > 0 else []
+                            history_images.append(live_images)
+                            history_proprio.append(live_proprio)
+                            history_actions.append(step_action.astype(np.float32))
+                        obs, reward, done, recovered_steps, noop_fallbacks = _step_bimanual_chunk(task, obs, step_action)
+                        episode_recoveries += recovered_steps
+                        episode_noop_fallbacks += noop_fallbacks
+                        total_reward += float(reward)
+                        timestep += 1
+                        if reward >= 1.0:
+                            success = 1.0
+                        if done or success >= 1.0 or timestep >= args.episode_length:
+                            break
                     if done or success >= 1.0:
                         break
                 task_successes.append(success)
                 "task_class": task_class.__name__,
                 "successes": task_successes,
                 "returns": task_returns,
+                "path_recoveries": episode_recoveries if args.episodes_per_task == 1 else None,
+                "noop_fallbacks": episode_noop_fallbacks if args.episodes_per_task == 1 else None,
                 "mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
                 "mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
             }

code/reveal_vla_bimanual/models/backbones.py CHANGED Viewed

@@ -48,7 +48,7 @@ class FrozenVLBackbone(nn.Module):
         else:
             from transformers import AutoTokenizer, CLIPModel
-            clip_model = CLIPModel.from_pretrained(config.model_name)
             self.vision_model = clip_model.vision_model
             self.text_model = clip_model.text_model
             self.visual_projection = clip_model.visual_projection

         else:
             from transformers import AutoTokenizer, CLIPModel
+            clip_model = CLIPModel.from_pretrained(config.model_name, use_safetensors=True)
             self.vision_model = clip_model.vision_model
             self.text_model = clip_model.text_model
             self.visual_projection = clip_model.visual_projection

code/reveal_vla_bimanual/models/observation_memory.py CHANGED Viewed

@@ -9,6 +9,7 @@ from torch import Tensor, nn
 @dataclass
 class ObservationMemoryConfig:
     hidden_dim: int = 512
     history_steps: int = 2
     num_layers: int = 1
     dropout: float = 0.1
@@ -33,6 +34,11 @@ class ObservationMemory(nn.Module):
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
         )
         self.uncertainty_head = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, 1),
@@ -42,10 +48,14 @@ class ObservationMemory(nn.Module):
         self,
         scene_tokens: Tensor,
         history_scene_tokens: Tensor | None = None,
     ) -> dict[str, Tensor]:
         pooled_current = scene_tokens.mean(dim=1, keepdim=True)
         if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_pooled = history_scene_tokens.mean(dim=2)
             sequence = torch.cat([history_pooled, pooled_current], dim=1)
         else:
             sequence = pooled_current
@@ -94,11 +104,24 @@ class InteractionObservationMemory(nn.Module):
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
         )
         self.uncertainty_head = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, 1),
         )
     def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
         if history_scene_tokens is None or history_scene_tokens.numel() == 0:
             return history_scene_tokens
@@ -110,11 +133,21 @@ class InteractionObservationMemory(nn.Module):
         self,
         scene_tokens: Tensor,
         history_scene_tokens: Tensor | None = None,
     ) -> dict[str, Tensor]:
         pooled_current = scene_tokens.mean(dim=1, keepdim=True)
         history_scene_tokens = self._truncate_history(history_scene_tokens)
         if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_pooled = history_scene_tokens.mean(dim=2)
             sequence = torch.cat([history_pooled, pooled_current], dim=1)
         else:
             sequence = pooled_current
@@ -126,11 +159,13 @@ class InteractionObservationMemory(nn.Module):
             )
         encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
         batch_size = encoded.shape[0]
-        queries = self.bank_queries.unsqueeze(0).expand(batch_size, -1, -1)
         bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
         bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
-        projected_bank = self.token_proj(bank_tokens)
-        pooled_bank = projected_bank.mean(dim=1)
         return {
             "memory_sequence": encoded,
             "memory_state": encoded[:, -1],

 @dataclass
 class ObservationMemoryConfig:
     hidden_dim: int = 512
+    action_dim: int = 14
     history_steps: int = 2
     num_layers: int = 1
     dropout: float = 0.1
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
         )
+        self.action_proj = nn.Sequential(
+            nn.LayerNorm(config.action_dim),
+            nn.Linear(config.action_dim, config.hidden_dim),
+            nn.GELU(),
+        )
         self.uncertainty_head = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, 1),
         self,
         scene_tokens: Tensor,
         history_scene_tokens: Tensor | None = None,
+        history_actions: Tensor | None = None,
     ) -> dict[str, Tensor]:
         pooled_current = scene_tokens.mean(dim=1, keepdim=True)
         if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_pooled = history_scene_tokens.mean(dim=2)
+            if history_actions is not None and history_actions.numel() > 0:
+                history_action_tokens = self.action_proj(history_actions[:, -history_pooled.shape[1] :])
+                history_pooled = history_pooled + history_action_tokens
             sequence = torch.cat([history_pooled, pooled_current], dim=1)
         else:
             sequence = pooled_current
             nn.Linear(config.hidden_dim, config.hidden_dim),
             nn.GELU(),
         )
+        self.action_proj = nn.Sequential(
+            nn.LayerNorm(config.action_dim),
+            nn.Linear(config.action_dim, config.hidden_dim),
+            nn.GELU(),
+        )
         self.uncertainty_head = nn.Sequential(
             nn.LayerNorm(config.hidden_dim),
             nn.Linear(config.hidden_dim, 1),
         )
+    def _recency_weights(self, length: int, device: torch.device, dtype: torch.dtype) -> Tensor:
+        if length <= 0:
+            return torch.zeros((0,), device=device, dtype=dtype)
+        positions = torch.arange(length, device=device, dtype=dtype)
+        distances = (length - 1) - positions
+        weights = torch.exp(-0.5 * distances)
+        return weights / weights.sum().clamp_min(1e-6)
     def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
         if history_scene_tokens is None or history_scene_tokens.numel() == 0:
             return history_scene_tokens
         self,
         scene_tokens: Tensor,
         history_scene_tokens: Tensor | None = None,
+        history_actions: Tensor | None = None,
     ) -> dict[str, Tensor]:
         pooled_current = scene_tokens.mean(dim=1, keepdim=True)
         history_scene_tokens = self._truncate_history(history_scene_tokens)
         if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_pooled = history_scene_tokens.mean(dim=2)
+            if history_actions is not None and history_actions.numel() > 0:
+                truncated_actions = history_actions[:, -history_pooled.shape[1] :]
+                history_pooled = history_pooled + self.action_proj(truncated_actions)
+            recency_weights = self._recency_weights(
+                history_pooled.shape[1],
+                device=history_pooled.device,
+                dtype=history_pooled.dtype,
+            ).view(1, -1, 1)
+            history_pooled = history_pooled * recency_weights * float(history_pooled.shape[1])
             sequence = torch.cat([history_pooled, pooled_current], dim=1)
         else:
             sequence = pooled_current
             )
         encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
         batch_size = encoded.shape[0]
+        recent_window = min(max(1, self.config.memory_bank_size // 2), encoded.shape[1])
+        recent_summary = encoded[:, -recent_window:].mean(dim=1, keepdim=True)
+        queries = self.bank_queries.unsqueeze(0).expand(batch_size, -1, -1) + recent_summary
         bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
         bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
+        projected_bank = self.token_proj(bank_tokens + recent_summary)
+        pooled_bank = projected_bank.mean(dim=1) + 0.25 * recent_summary.squeeze(1)
         return {
             "memory_sequence": encoded,
             "memory_state": encoded[:, -1],

code/reveal_vla_bimanual/models/policy.py CHANGED Viewed

@@ -111,6 +111,7 @@ class BackboneOnlyPolicy(nn.Module):
         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
     ) -> dict[str, Tensor]:
         scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
         history_scene_tokens = self.encode_history(
@@ -119,7 +120,11 @@ class BackboneOnlyPolicy(nn.Module):
             texts=texts,
             language_tokens=language_tokens,
         )
-        memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
         decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
         decoded["scene_tokens"] = scene_tokens
         decoded["history_scene_tokens"] = history_scene_tokens
@@ -142,6 +147,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
         candidate_chunks_override: Tensor | None = None,
@@ -153,6 +159,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
             language_tokens=language_tokens,
             history_images=history_images,
             history_proprio=history_proprio,
         )
         reveal_state = self.reveal_head(
             outputs["scene_tokens"],
@@ -232,6 +239,7 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
         candidate_chunks_override: Tensor | None = None,
@@ -248,7 +256,11 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
         )
         if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
-        memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
         interaction_state = None
         if use_interaction_head:

         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
+        history_actions: Tensor | None = None,
     ) -> dict[str, Tensor]:
         scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
         history_scene_tokens = self.encode_history(
             texts=texts,
             language_tokens=language_tokens,
         )
+        memory_output = self.memory(
+            scene_tokens,
+            history_scene_tokens=history_scene_tokens,
+            history_actions=history_actions,
+        )
         decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
         decoded["scene_tokens"] = scene_tokens
         decoded["history_scene_tokens"] = history_scene_tokens
         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
+        history_actions: Tensor | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
         candidate_chunks_override: Tensor | None = None,
             language_tokens=language_tokens,
             history_images=history_images,
             history_proprio=history_proprio,
+            history_actions=history_actions,
         )
         reveal_state = self.reveal_head(
             outputs["scene_tokens"],
         language_tokens: dict[str, Tensor] | None = None,
         history_images: Tensor | None = None,
         history_proprio: Tensor | None = None,
+        history_actions: Tensor | None = None,
         plan: bool = True,
         support_mode_conditioning: bool = True,
         candidate_chunks_override: Tensor | None = None,
         )
         if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
             history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
+        memory_output = self.memory(
+            scene_tokens,
+            history_scene_tokens=history_scene_tokens,
+            history_actions=history_actions,
+        )
         interaction_state = None
         if use_interaction_head:

code/reveal_vla_bimanual/sim_reveal/dataset.py CHANGED Viewed

@@ -11,7 +11,7 @@ import numpy as np
 from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
-NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v4_noleak_counterfactual"
 LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
     {
         "target_template",
@@ -74,6 +74,7 @@ def collect_teacher_dataset(
                 )
                 padded_history_render_states = []
                 padded_history_proprio = []
                 history_count = min(history_steps, len(history_buffer))
                 pad_count = history_steps - history_count
                 if history_count > 0:
@@ -83,9 +84,11 @@ def collect_teacher_dataset(
                 for _ in range(pad_count):
                     padded_history_render_states.append(env.render_state(privileged_state))
                     padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
                 for item in recent_history:
                     padded_history_render_states.append(item["render_state"])
                     padded_history_proprio.append(item["proprio"])
                 samples.append(
                     {
                         "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
@@ -108,16 +111,21 @@ def collect_teacher_dataset(
                         "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
                         if padded_history_proprio
                         else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
                         "candidate_action_chunks": candidate_action_chunks.astype("float32"),
                         **candidate_outcomes,
                     }
                 )
                 proxy_samples += 1
-                _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
                 history_buffer.append(
                     {
                         "render_state": env.render_state(privileged_state),
                         "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
                     }
                 )
                 if terminated:
@@ -203,6 +211,13 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
             "images": stacked,
             "history_images": history_stacked,
             "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
             "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
             "texts": sample["language_goal"],
             "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),

 from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
+NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v5_noleak_actionhist"
 LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
     {
         "target_template",
                 )
                 padded_history_render_states = []
                 padded_history_proprio = []
+                padded_history_actions = []
                 history_count = min(history_steps, len(history_buffer))
                 pad_count = history_steps - history_count
                 if history_count > 0:
                 for _ in range(pad_count):
                     padded_history_render_states.append(env.render_state(privileged_state))
                     padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
+                    padded_history_actions.append(np.zeros((action_chunk.shape[-1],), dtype=np.float32))
                 for item in recent_history:
                     padded_history_render_states.append(item["render_state"])
                     padded_history_proprio.append(item["proprio"])
+                    padded_history_actions.append(item["action"])
                 samples.append(
                     {
                         "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
                         "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
                         if padded_history_proprio
                         else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
+                        "history_actions": np.stack(padded_history_actions, axis=0).astype("float32")
+                        if padded_history_actions
+                        else np.zeros((0, action_chunk.shape[-1]), dtype=np.float32),
                         "candidate_action_chunks": candidate_action_chunks.astype("float32"),
                         **candidate_outcomes,
                     }
                 )
                 proxy_samples += 1
+                executed_action = env.teacher_action().astype("float32")
+                _, _, terminated, truncated, privileged_state = env.step(executed_action)
                 history_buffer.append(
                     {
                         "render_state": env.render_state(privileged_state),
                         "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
+                        "action": executed_action,
                     }
                 )
                 if terminated:
             "images": stacked,
             "history_images": history_stacked,
             "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
+            "history_actions": torch.as_tensor(
+                sample.get(
+                    "history_actions",
+                    np.zeros((len(sample.get("history_render_states", [])), sample["action_chunk"].shape[-1]), dtype=np.float32),
+                ),
+                dtype=torch.float32,
+            ),
             "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
             "texts": sample["language_goal"],
             "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),

code/reveal_vla_bimanual/sim_rlbench/dataset.py CHANGED Viewed

@@ -14,6 +14,16 @@ from sim_rlbench.camera_spec import canonical_to_upstream_camera
 THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
 def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
@@ -196,8 +206,8 @@ def single_arm_absolute_action_from_delta(
     arm_index = {"right": 0, "left": 1}[arm_name]
     current_pose = _arm_pose(current_obs, arm_name)
     offset = arm_index * 7
-    delta_position = delta_action[offset : offset + 3]
-    delta_rotvec = delta_action[offset + 3 : offset + 6]
     gripper = float(delta_action[offset + 6] > 0.5)
     current_quat = _xyzw_to_wxyz(current_pose[3:])
     delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
@@ -379,6 +389,20 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
             )
         return torch.stack(history_features, dim=0)
     def __getitem__(self, index: int) -> dict[str, Any]:
         sample = self._samples[index]
         episode = self._episodes[sample.episode_key]
@@ -388,6 +412,7 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
             "images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
             "history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
             "history_proprio": self._history_proprio(observations, sample.step_index),
             "proprio": torch.from_numpy(
                 bimanual_proprio_from_obs(
                     obs,

 THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
+MAX_RLBENCH_POSITION_DELTA = 0.05
+MAX_RLBENCH_ROTATION_DELTA = 0.35
+def _clip_vector_norm(vector: Sequence[float], max_norm: float) -> np.ndarray:
+    clipped = np.asarray(vector, dtype=np.float32)
+    norm = float(np.linalg.norm(clipped))
+    if norm <= max_norm or norm < 1e-8:
+        return clipped
+    return clipped * (max_norm / norm)
 def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
     arm_index = {"right": 0, "left": 1}[arm_name]
     current_pose = _arm_pose(current_obs, arm_name)
     offset = arm_index * 7
+    delta_position = _clip_vector_norm(delta_action[offset : offset + 3], max_norm=MAX_RLBENCH_POSITION_DELTA)
+    delta_rotvec = _clip_vector_norm(delta_action[offset + 3 : offset + 6], max_norm=MAX_RLBENCH_ROTATION_DELTA)
     gripper = float(delta_action[offset + 6] > 0.5)
     current_quat = _xyzw_to_wxyz(current_pose[3:])
     delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
             )
         return torch.stack(history_features, dim=0)
+    def _history_actions(self, observations: Any, step_index: int) -> torch.Tensor:
+        if self.history_steps <= 0:
+            return torch.zeros((0, 14), dtype=torch.float32)
+        history_actions = []
+        for history_offset in range(self.history_steps, 0, -1):
+            history_index = step_index - history_offset
+            if history_index < 0:
+                history_actions.append(torch.zeros((14,), dtype=torch.float32))
+            else:
+                history_actions.append(
+                    torch.from_numpy(delta_action_from_transition(observations[history_index], observations[history_index + 1]))
+                )
+        return torch.stack(history_actions, dim=0)
     def __getitem__(self, index: int) -> dict[str, Any]:
         sample = self._samples[index]
         episode = self._episodes[sample.episode_key]
             "images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
             "history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
             "history_proprio": self._history_proprio(observations, sample.step_index),
+            "history_actions": self._history_actions(observations, sample.step_index),
             "proprio": torch.from_numpy(
                 bimanual_proprio_from_obs(
                     obs,

code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml ADDED Viewed

	@@ -0,0 +1,125 @@

+experiment_name: proxy_interaction_state_actionhist
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 13
+data:
+  proxies: [foliage_proxy, bag_proxy, cloth_proxy]
+  resolution: 96
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
+  rebuild_dataset: true
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 13
+optim:
+  epochs: 10
+  batch_size: 16
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 128
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  planner:
+    hidden_dim: 128
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.25
+  belief: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1

code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml ADDED Viewed

	@@ -0,0 +1,125 @@

+experiment_name: proxy_interaction_state_actionhist_smoke
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 13
+data:
+  proxies: [foliage_proxy, bag_proxy, cloth_proxy]
+  resolution: 64
+  train_episodes_per_proxy: 6
+  val_episodes_per_proxy: 2
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
+  rebuild_dataset: true
+  chunk_horizon: 4
+  rollout_horizon: 3
+  history_steps: 6
+  planner_candidates: 4
+  seed: 13
+optim:
+  epochs: 4
+  batch_size: 8
+  num_workers: 0
+  lr: 0.001
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 64
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 64
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 128
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 64
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 64
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 128
+    dropout: 0.1
+    chunk_size: 4
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 4
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 64
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 64
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 3
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  planner:
+    hidden_dim: 64
+    num_candidates: 4
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.15
+  arm_role: 0.2
+  support_mode: 0.15
+  corridor: 0.2
+  persistence: 0.1
+  disturbance: 0.1
+  world_model: 0.25
+  belief: 0.05
+  planner_success: 0.2
+  planner_risk: 0.1
+  planner_ranking: 0.1
+  proposal_reconstruction: 0.2
+  proposal_success: 0.1
+  proposal_ranking: 0.1

code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml ADDED Viewed

	@@ -0,0 +1,129 @@

+experiment_name: proxy_interaction_state_clip_actionhist
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 7
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
+init_strict: false
+data:
+  proxies: [foliage_proxy, bag_proxy, cloth_proxy]
+  resolution: 224
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 7
+optim:
+  epochs: 4
+  batch_size: 2
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 512
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: false
+  fusion:
+    hidden_dim: 512
+    num_cameras: 3
+    num_layers: 4
+    num_heads: 8
+    ff_dim: 2048
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 512
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 8
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 512
+    num_heads: 8
+    num_layers: 4
+    ff_dim: 2048
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 512
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 8
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 512
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 8
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+    belief_map_size: 32
+    predict_belief_map: true
+  planner:
+    hidden_dim: 512
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 8
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2

code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml ADDED Viewed

	@@ -0,0 +1,127 @@

+experiment_name: proxy_interaction_state_recency_oracleft
+output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
+device: cuda
+seed: 13
+init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
+init_strict: true
+data:
+  proxies: [foliage_proxy, bag_proxy, cloth_proxy]
+  resolution: 96
+  train_episodes_per_proxy: 48
+  val_episodes_per_proxy: 16
+  train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
+  val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
+  rebuild_dataset: false
+  chunk_horizon: 8
+  rollout_horizon: 5
+  history_steps: 6
+  planner_candidates: 8
+  seed: 13
+optim:
+  epochs: 8
+  batch_size: 16
+  num_workers: 0
+  lr: 0.0003
+  weight_decay: 0.0001
+trainer:
+  policy_type: interaction_state
+  use_bf16: true
+  grad_clip_norm: 1.0
+  freeze_backbone: true
+  gradient_checkpointing: false
+  plan_during_train: true
+  plan_during_eval: true
+  support_mode_conditioning: true
+  planner_mode: trainable
+policy:
+  backbone:
+    model_name: openai/clip-vit-base-patch32
+    hidden_dim: 128
+    max_text_tokens: 32
+    freeze_backbone: true
+    gradient_checkpointing: false
+    use_dummy_backbone: true
+  fusion:
+    hidden_dim: 128
+    num_cameras: 3
+    num_layers: 2
+    num_heads: 4
+    ff_dim: 256
+    dropout: 0.1
+    proprio_dim: 32
+    proprio_tokens: 1
+  memory:
+    hidden_dim: 128
+    action_dim: 14
+    history_steps: 6
+    num_layers: 2
+    dropout: 0.1
+    memory_bank_size: 4
+    num_heads: 4
+    max_history_steps: 8
+  decoder:
+    hidden_dim: 128
+    num_heads: 4
+    num_layers: 2
+    ff_dim: 256
+    dropout: 0.1
+    chunk_size: 8
+    action_dim: 14
+    arm_action_dim: 7
+    num_candidates: 8
+    num_phases: 5
+    num_arm_roles: 4
+  reveal_head:
+    hidden_dim: 128
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    belief_map_size: 32
+    field_size: 16
+    num_heads: 4
+    predict_belief_map: true
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  world_model:
+    hidden_dim: 128
+    action_dim: 14
+    num_support_modes: 3
+    num_approach_templates: 32
+    rollout_horizon: 5
+    field_size: 16
+    num_heads: 4
+    num_phases: 5
+    num_arm_roles: 4
+    num_interaction_tokens: 8
+  planner:
+    hidden_dim: 128
+    num_candidates: 8
+    action_dim: 14
+    num_support_modes: 3
+    utility_margin: 0.1
+    num_heads: 4
+    num_layers: 2
+    num_phases: 5
+    num_arm_roles: 4
+loss_weights:
+  action: 1.0
+  phase: 0.1
+  arm_role: 0.15
+  support_mode: 0.1
+  corridor: 0.15
+  persistence: 0.05
+  disturbance: 0.05
+  world_model: 0.2
+  belief: 0.05
+  planner_success: 0.25
+  planner_risk: 0.1
+  planner_ranking: 0.2
+  proposal_reconstruction: 0.1
+  proposal_success: 0.15
+  proposal_ranking: 0.2

code/reveal_vla_bimanual/train/losses.py CHANGED Viewed

@@ -34,18 +34,48 @@ def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | N
     return loss.mean()
 def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
     losses = {}
     if "phase_logits" in pred:
-        phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
-        phase_target = phase_map[target["support_mode"].long()]
         losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
     else:
         losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
     if "arm_role_logits" in pred:
-        batch_size = pred["arm_role_logits"].shape[0]
-        role_target = torch.as_tensor([1, 2], device=pred["arm_role_logits"].device, dtype=torch.long)
-        role_target = role_target.unsqueeze(0).expand(batch_size, -1)
         role_ce = F.cross_entropy(
             pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
             role_target.reshape(-1),
@@ -106,8 +136,9 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
         "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
         "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
         "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
     }
-    return (
         F.cross_entropy(
             pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
             target_rollout["support_mode"].reshape(-1).long(),
@@ -119,6 +150,19 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
         + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
         + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
     )
 def compute_total_loss(
@@ -161,6 +205,7 @@ def compute_total_loss(
                 "corridor_feasible": batch["candidate_rollout_corridor_feasible"],
                 "persistence_horizon": batch["candidate_rollout_persistence_horizon"],
                 "disturbance_cost": batch["candidate_rollout_disturbance_cost"],
             }
         else:
             rollout_target = {
@@ -168,6 +213,7 @@ def compute_total_loss(
                 "corridor_feasible": batch["rollout_corridor_feasible"],
                 "persistence_horizon": batch["rollout_persistence_horizon"],
                 "disturbance_cost": batch["rollout_disturbance_cost"],
             }
         world_model_loss = world_model_rollout_consistency_loss(
             model_output["planned_rollout"],
@@ -199,6 +245,9 @@ def compute_total_loss(
             ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
         else:
             ranking_loss = model_output["planner_scores"].new_tensor(0.0)
         losses["planner_success"] = success_loss
         losses["planner_risk"] = risk_loss
         losses["planner_ranking"] = ranking_loss
@@ -259,6 +308,9 @@ def compute_total_loss(
             ].mean()
         else:
             proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
         losses["proposal_success"] = proposal_success_loss
         losses["proposal_ranking"] = proposal_ranking_loss
         total = (

     return loss.mean()
+def _command_probability(command: Tensor) -> Tensor:
+    return (torch.tanh(command) + 1.0) * 0.5
+def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor:
+    open_cmd = action_chunk[..., 0]
+    actor_reach = _command_probability(action_chunk[..., 8])
+    retrieve_cmd = _command_probability(action_chunk[..., 13])
+    retrieve = retrieve_cmd >= 0.55
+    recover = open_cmd <= -0.10
+    reveal = open_cmd > 0.35
+    hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55)
+    phase_target = torch.zeros_like(open_cmd, dtype=torch.long)
+    phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target)
+    phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target)
+    phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target)
+    phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target)
+    return phase_target
+def _role_targets_like(arm_role_logits: Tensor) -> Tensor:
+    role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long)
+    expand_shape = [1] * (arm_role_logits.ndim - 2) + [2]
+    return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1])
 def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
     losses = {}
     if "phase_logits" in pred:
+        action_chunk = target.get("action_chunk")
+        if action_chunk is not None:
+            phase_target = infer_phase_targets_from_actions(action_chunk[:, 0])
+        else:
+            phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
+            phase_target = phase_map[target["support_mode"].long()]
         losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
     else:
         losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
     if "arm_role_logits" in pred:
+        role_target = _role_targets_like(pred["arm_role_logits"])
         role_ce = F.cross_entropy(
             pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
             role_target.reshape(-1),
         "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
         "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
         "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
+        "action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]),
     }
+    loss = (
         F.cross_entropy(
             pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
             target_rollout["support_mode"].reshape(-1).long(),
         + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
         + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
     )
+    if "phase_logits" in pred_rollout:
+        phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"])
+        loss = loss + 0.5 * F.cross_entropy(
+            pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]),
+            phase_target.reshape(-1),
+        )
+    if "arm_role_logits" in pred_rollout:
+        role_target = _role_targets_like(pred_rollout["arm_role_logits"])
+        loss = loss + 0.25 * F.cross_entropy(
+            pred_rollout["arm_role_logits"].reshape(-1, pred_rollout["arm_role_logits"].shape[-1]),
+            role_target.reshape(-1),
+        )
+    return loss
 def compute_total_loss(
                 "corridor_feasible": batch["candidate_rollout_corridor_feasible"],
                 "persistence_horizon": batch["candidate_rollout_persistence_horizon"],
                 "disturbance_cost": batch["candidate_rollout_disturbance_cost"],
+                "action_chunk": batch["candidate_action_chunks"],
             }
         else:
             rollout_target = {
                 "corridor_feasible": batch["rollout_corridor_feasible"],
                 "persistence_horizon": batch["rollout_persistence_horizon"],
                 "disturbance_cost": batch["rollout_disturbance_cost"],
+                "action_chunk": batch["action_chunk"],
             }
         world_model_loss = world_model_rollout_consistency_loss(
             model_output["planned_rollout"],
             ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
         else:
             ranking_loss = model_output["planner_scores"].new_tensor(0.0)
+        oracle_target = utility_target.argmax(dim=-1)
+        oracle_loss = F.cross_entropy(model_output["planner_scores"], oracle_target)
+        ranking_loss = ranking_loss + 0.5 * oracle_loss
         losses["planner_success"] = success_loss
         losses["planner_risk"] = risk_loss
         losses["planner_ranking"] = ranking_loss
             ].mean()
         else:
             proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
+        proposal_oracle_target = proposal_utility.argmax(dim=-1)
+        proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target)
+        proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss
         losses["proposal_success"] = proposal_success_loss
         losses["proposal_ranking"] = proposal_ranking_loss
         total = (

code/reveal_vla_bimanual/train/run_experiment.py CHANGED Viewed

@@ -61,6 +61,32 @@ def _loss_weights_from_omega(cfg: Any) -> LossWeights:
     return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
 def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
     dataset_path = data_cfg.get(f"{split}_dataset_path")
     if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
@@ -138,6 +164,7 @@ def main() -> None:
     trainer_config = _trainer_config_from_omega(cfg.trainer)
     loss_weights = _loss_weights_from_omega(cfg.loss_weights)
     model = build_policy(policy_config, trainer_config).to(device)
     frozen_modules = apply_planner_mode(model, trainer_config)
     trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
     optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
@@ -166,6 +193,7 @@ def main() -> None:
                     "texts": moved["texts"],
                     "history_images": moved.get("history_images"),
                     "history_proprio": moved.get("history_proprio"),
                 }
                 if policy_supports_planning(trainer_config.policy_type):
                     forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
@@ -195,6 +223,7 @@ def main() -> None:
                     "state_dict": model.state_dict(),
                     "history": history,
                     "data_resolution": int(cfg.data.resolution),
                 },
                 best_checkpoint,
             )
@@ -212,7 +241,9 @@ def main() -> None:
         "num_val_samples": len(val_bundle["samples"]),
         "planner_mode": trainer_config.planner_mode,
         "frozen_modules": frozen_modules,
     }
     print(json.dumps(summary, indent=2))

     return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
+def _load_init_checkpoint(model: torch.nn.Module, checkpoint_path: str | None, strict: bool) -> dict[str, Any] | None:
+    if not checkpoint_path:
+        return None
+    checkpoint = torch.load(Path(checkpoint_path), map_location="cpu", weights_only=False)
+    state_dict = checkpoint["state_dict"]
+    filtered_state_dict = state_dict
+    skipped_keys: list[str] = []
+    if not strict:
+        current_state = model.state_dict()
+        filtered_state_dict = {}
+        for key, value in state_dict.items():
+            current_value = current_state.get(key)
+            if current_value is None or current_value.shape != value.shape:
+                skipped_keys.append(key)
+                continue
+            filtered_state_dict[key] = value
+    incompatible = model.load_state_dict(filtered_state_dict, strict=strict)
+    return {
+        "path": str(checkpoint_path),
+        "loaded_keys": len(filtered_state_dict),
+        "skipped_shape_mismatch_keys": skipped_keys,
+        "missing_keys": list(incompatible.missing_keys),
+        "unexpected_keys": list(incompatible.unexpected_keys),
+    }
 def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
     dataset_path = data_cfg.get(f"{split}_dataset_path")
     if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
     trainer_config = _trainer_config_from_omega(cfg.trainer)
     loss_weights = _loss_weights_from_omega(cfg.loss_weights)
     model = build_policy(policy_config, trainer_config).to(device)
+    init_info = _load_init_checkpoint(model, cfg.get("init_checkpoint"), bool(cfg.get("init_strict", False)))
     frozen_modules = apply_planner_mode(model, trainer_config)
     trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
     optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
                     "texts": moved["texts"],
                     "history_images": moved.get("history_images"),
                     "history_proprio": moved.get("history_proprio"),
+                    "history_actions": moved.get("history_actions"),
                 }
                 if policy_supports_planning(trainer_config.policy_type):
                     forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
                     "state_dict": model.state_dict(),
                     "history": history,
                     "data_resolution": int(cfg.data.resolution),
+                    "init_info": init_info,
                 },
                 best_checkpoint,
             )
         "num_val_samples": len(val_bundle["samples"]),
         "planner_mode": trainer_config.planner_mode,
         "frozen_modules": frozen_modules,
+        "init_info": init_info,
     }
+    (output_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
     print(json.dumps(summary, indent=2))

code/reveal_vla_bimanual/train/run_rlbench_experiment.py CHANGED Viewed

@@ -133,6 +133,7 @@ def main() -> None:
                     "texts": moved["texts"],
                     "history_images": moved.get("history_images"),
                     "history_proprio": moved.get("history_proprio"),
                 }
                 if policy_supports_planning(trainer_config.policy_type):
                     forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)

                     "texts": moved["texts"],
                     "history_images": moved.get("history_images"),
                     "history_proprio": moved.get("history_proprio"),
+                    "history_actions": moved.get("history_actions"),
                 }
                 if policy_supports_planning(trainer_config.policy_type):
                     forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)

code/reveal_vla_bimanual/train/smoke_checks.py CHANGED Viewed

@@ -139,11 +139,13 @@ def _synthetic_rlbench_batch(
     history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
     proprio = torch.rand(batch_size, 32, device=device)
     history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
     action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
     return {
         "images": images,
         "history_images": history_images,
         "history_proprio": history_proprio,
         "proprio": proprio,
         "texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
         "action_chunk": action_chunk,
@@ -207,6 +209,7 @@ def main() -> None:
         texts=proxy_batch["texts"],
         history_images=proxy_batch.get("history_images"),
         history_proprio=proxy_batch.get("history_proprio"),
         plan=True,
         candidate_chunks_override=proxy_batch["candidate_action_chunks"],
     )
@@ -245,6 +248,7 @@ def main() -> None:
         texts=rlbench_batch["texts"],
         history_images=rlbench_batch.get("history_images"),
         history_proprio=rlbench_batch.get("history_proprio"),
         plan=True,
     )
     _check_output_shapes(

     history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
     proprio = torch.rand(batch_size, 32, device=device)
     history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
+    history_actions = torch.rand(batch_size, history_steps, 14, device=device)
     action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
     return {
         "images": images,
         "history_images": history_images,
         "history_proprio": history_proprio,
+        "history_actions": history_actions,
         "proprio": proprio,
         "texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
         "action_chunk": action_chunk,
         texts=proxy_batch["texts"],
         history_images=proxy_batch.get("history_images"),
         history_proprio=proxy_batch.get("history_proprio"),
+        history_actions=proxy_batch.get("history_actions"),
         plan=True,
         candidate_chunks_override=proxy_batch["candidate_action_chunks"],
     )
         texts=rlbench_batch["texts"],
         history_images=rlbench_batch.get("history_images"),
         history_proprio=rlbench_batch.get("history_proprio"),
+        history_actions=rlbench_batch.get("history_actions"),
         plan=True,
     )
     _check_output_shapes(

code/reveal_vla_bimanual/train/trainer.py CHANGED Viewed

@@ -86,6 +86,7 @@ class BimanualTrainer:
                 "language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
                 "history_images": batch.get("history_images"),
                 "history_proprio": batch.get("history_proprio"),
             }
             if policy_supports_planning(self.config.policy_type):
                 forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)

                 "language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
                 "history_images": batch.get("history_images"),
                 "history_proprio": batch.get("history_proprio"),
+                "history_actions": batch.get("history_actions"),
             }
             if policy_supports_planning(self.config.policy_type):
                 forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)

environment/validate_same_machine.sh CHANGED Viewed

@@ -14,6 +14,27 @@ RUNTIME_DIR="${ROOT_DIR}/runtime"
 mkdir -p "${RUNTIME_DIR}"
 chmod 700 "${RUNTIME_DIR}"
 run_in_rlbench_env() {
   local driver_version=""
   local driver_branch=""
@@ -34,6 +55,8 @@ run_in_rlbench_env() {
     "${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
 }
 echo "Display check"
 DISPLAY="${DISPLAY}" glxinfo -B

 mkdir -p "${RUNTIME_DIR}"
 chmod 700 "${RUNTIME_DIR}"
+ensure_rlbench_display() {
+  if DISPLAY="${DISPLAY}" xdpyinfo >/dev/null 2>&1; then
+    return 0
+  fi
+  local driver_version=""
+  local driver_branch=""
+  if command -v nvidia-smi >/dev/null 2>&1; then
+    driver_version="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 || true)"
+    driver_branch="${driver_version%%.*}"
+  fi
+  if [[ -n "${driver_branch}" && ! -f "${ROOT_DIR}/system_shims/nvidia${driver_branch}/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so" ]]; then
+    echo "RLBench X shims missing; installing headless X prerequisites"
+    ROOT_DIR="${ROOT_DIR}" "${PROJECT_DIR}/scripts/setup_rlbench_headless_x.sh"
+  fi
+  echo "Starting RLBench X server on ${DISPLAY}"
+  ROOT_DIR="${ROOT_DIR}" DISPLAY_NUM="${DISPLAY_NUM}" "${PROJECT_DIR}/scripts/start_rlbench_x.sh"
+}
 run_in_rlbench_env() {
   local driver_version=""
   local driver_branch=""
     "${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
 }
+ensure_rlbench_display
 echo "Display check"
 DISPLAY="${DISPLAY}" glxinfo -B