Add files using upload-large-folder tool
Browse files- FILE_MANIFEST.txt +72 -0
- MODEL_INDEX.md +75 -0
- README.md +76 -0
- artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json +15 -0
- artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md +13 -0
- artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json +15 -0
- artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md +13 -0
- artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md +13 -0
- artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json +15 -0
- artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md +13 -0
- artifacts/outputs/interaction_debug/chunk_debug_trace.json +140 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json +7 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json +174 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml +127 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json +7 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json +174 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json +573 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml +125 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json +7 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json +346 -0
- artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json +16 -0
- artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json +28 -0
- artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md +25 -0
- artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json +41 -0
- artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md +37 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json +15 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md +13 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json +41 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md +37 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json +41 -0
- artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md +37 -0
- artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json +157 -0
- code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py +1 -0
- code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +45 -13
- code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py +112 -18
- code/reveal_vla_bimanual/models/backbones.py +1 -1
- code/reveal_vla_bimanual/models/observation_memory.py +38 -3
- code/reveal_vla_bimanual/models/policy.py +14 -2
- code/reveal_vla_bimanual/sim_reveal/dataset.py +17 -2
- code/reveal_vla_bimanual/sim_rlbench/dataset.py +27 -2
- code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml +125 -0
- code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml +125 -0
- code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml +129 -0
- code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml +127 -0
- code/reveal_vla_bimanual/train/losses.py +58 -6
- code/reveal_vla_bimanual/train/run_experiment.py +31 -0
- code/reveal_vla_bimanual/train/run_rlbench_experiment.py +1 -0
- code/reveal_vla_bimanual/train/smoke_checks.py +4 -0
- code/reveal_vla_bimanual/train/trainer.py +1 -0
- environment/validate_same_machine.sh +23 -0
FILE_MANIFEST.txt
CHANGED
|
@@ -3,11 +3,17 @@
|
|
| 3 |
./MODEL_INDEX.md
|
| 4 |
./README.md
|
| 5 |
./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
|
|
|
|
| 6 |
./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
|
|
|
|
| 7 |
./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
|
|
|
|
| 8 |
./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
|
|
|
|
| 9 |
./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
|
|
|
|
| 10 |
./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
|
|
|
|
| 11 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
|
| 12 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
|
| 13 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
|
|
@@ -98,6 +104,68 @@
|
|
| 98 |
./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
|
| 99 |
./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
|
| 100 |
./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
|
| 102 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
|
| 103 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
|
|
@@ -256,6 +324,10 @@
|
|
| 256 |
./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
|
| 257 |
./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
|
| 258 |
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
|
| 260 |
./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
|
| 261 |
./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
|
|
|
|
| 3 |
./MODEL_INDEX.md
|
| 4 |
./README.md
|
| 5 |
./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
|
| 6 |
+
./artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
|
| 7 |
./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
|
| 8 |
+
./artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
|
| 9 |
./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
|
| 10 |
+
./artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
|
| 11 |
./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
|
| 12 |
+
./artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
|
| 13 |
./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
|
| 14 |
+
./artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
|
| 15 |
./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
|
| 16 |
+
./artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
|
| 17 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
|
| 18 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
|
| 19 |
./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
|
|
|
|
| 104 |
./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
|
| 105 |
./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
|
| 106 |
./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
|
| 107 |
+
./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json
|
| 108 |
+
./artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.md
|
| 109 |
+
./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json
|
| 110 |
+
./artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md
|
| 111 |
+
./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json
|
| 112 |
+
./artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md
|
| 113 |
+
./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json
|
| 114 |
+
./artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md
|
| 115 |
+
./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json
|
| 116 |
+
./artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.md
|
| 117 |
+
./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json
|
| 118 |
+
./artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md
|
| 119 |
+
./artifacts/outputs/interaction_debug/chunk_debug_trace.json
|
| 120 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 121 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/config_resolved.yaml
|
| 122 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json
|
| 123 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json
|
| 124 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt
|
| 125 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/config_resolved.yaml
|
| 126 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json
|
| 127 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json
|
| 128 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
|
| 129 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml
|
| 130 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json
|
| 131 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json
|
| 132 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json
|
| 133 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
|
| 134 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml
|
| 135 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json
|
| 136 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json
|
| 137 |
+
./artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json
|
| 138 |
+
./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json
|
| 139 |
+
./artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md
|
| 140 |
+
./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json
|
| 141 |
+
./artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md
|
| 142 |
+
./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.json
|
| 143 |
+
./artifacts/outputs/interaction_debug/reveal_eval_commit8_compare/reveal_benchmark.md
|
| 144 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json
|
| 145 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.md
|
| 146 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json
|
| 147 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md
|
| 148 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.json
|
| 149 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke/reveal_benchmark.md
|
| 150 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.json
|
| 151 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_smoke_commit4_short/reveal_benchmark.md
|
| 152 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json
|
| 153 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md
|
| 154 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json
|
| 155 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md
|
| 156 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.json
|
| 157 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_full_commit4/reveal_benchmark.md
|
| 158 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.json
|
| 159 |
+
./artifacts/outputs/interaction_debug/reveal_eval_interaction_rolefix_smoke_commit4_short/reveal_benchmark.md
|
| 160 |
+
./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json
|
| 161 |
+
./artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.md
|
| 162 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json
|
| 163 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.md
|
| 164 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json
|
| 165 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.md
|
| 166 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json
|
| 167 |
+
./artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.md
|
| 168 |
+
./artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json
|
| 169 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
|
| 170 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
|
| 171 |
./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
|
|
|
|
| 324 |
./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
|
| 325 |
./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
|
| 326 |
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
|
| 327 |
+
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml
|
| 328 |
+
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml
|
| 329 |
+
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml
|
| 330 |
+
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml
|
| 331 |
./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
|
| 332 |
./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
|
| 333 |
./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
|
MODEL_INDEX.md
CHANGED
|
@@ -40,6 +40,18 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
|
|
| 40 |
- `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
|
| 41 |
- smoke val dataset
|
| 42 |
- `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
## Raw Benchmark Reports
|
| 45 |
|
|
@@ -95,6 +107,63 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
|
|
| 95 |
- interaction-state rolefix full benchmark JSON
|
| 96 |
- `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
## RLBench Two-Robot Smoke Outputs
|
| 99 |
|
| 100 |
- import smoke JSON
|
|
@@ -115,6 +184,12 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
|
|
| 115 |
- `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
|
| 116 |
- RLBench open_drawer rollout eval Markdown
|
| 117 |
- `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
- RLBench smoke dataset root
|
| 119 |
- `artifacts/data/rlbench_smoke_open_drawer/`
|
| 120 |
|
|
|
|
| 40 |
- `artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt`
|
| 41 |
- smoke val dataset
|
| 42 |
- `artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt`
|
| 43 |
+
- actionhist train dataset
|
| 44 |
+
- `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
|
| 45 |
+
- actionhist val dataset
|
| 46 |
+
- `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
|
| 47 |
+
- actionhist smoke train dataset
|
| 48 |
+
- `artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt`
|
| 49 |
+
- actionhist smoke val dataset
|
| 50 |
+
- `artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt`
|
| 51 |
+
- CLIP actionhist train dataset
|
| 52 |
+
- `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
|
| 53 |
+
- CLIP actionhist val dataset
|
| 54 |
+
- `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
|
| 55 |
|
| 56 |
## Raw Benchmark Reports
|
| 57 |
|
|
|
|
| 107 |
- interaction-state rolefix full benchmark JSON
|
| 108 |
- `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
|
| 109 |
|
| 110 |
+
## Interaction Debug Outputs
|
| 111 |
+
|
| 112 |
+
- actionhist smoke checkpoint
|
| 113 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt`
|
| 114 |
+
- actionhist smoke metrics
|
| 115 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json`
|
| 116 |
+
- actionhist smoke diagnostics
|
| 117 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json`
|
| 118 |
+
- actionhist full checkpoint
|
| 119 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt`
|
| 120 |
+
- actionhist full metrics
|
| 121 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json`
|
| 122 |
+
- actionhist full diagnostics
|
| 123 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json`
|
| 124 |
+
- recency-oracleft full checkpoint
|
| 125 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt`
|
| 126 |
+
- recency-oracleft full metrics
|
| 127 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json`
|
| 128 |
+
- recency-oracleft full summary
|
| 129 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json`
|
| 130 |
+
- recency-oracleft diagnostics
|
| 131 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json`
|
| 132 |
+
- CLIP actionhist full checkpoint
|
| 133 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt`
|
| 134 |
+
- CLIP actionhist full metrics
|
| 135 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json`
|
| 136 |
+
- CLIP actionhist full summary
|
| 137 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json`
|
| 138 |
+
- CLIP actionhist diagnostics
|
| 139 |
+
- `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json`
|
| 140 |
+
- corrected interaction benchmark JSON
|
| 141 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
|
| 142 |
+
- corrected baseline compare benchmark JSON
|
| 143 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
|
| 144 |
+
- corrected CLIP baseline benchmark JSON
|
| 145 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
|
| 146 |
+
- corrected CLIP interaction compare benchmark JSON
|
| 147 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
|
| 148 |
+
- corrected recency-oracleft compare benchmark JSON
|
| 149 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json`
|
| 150 |
+
- actionhist ablation full benchmark JSON
|
| 151 |
+
- `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
|
| 152 |
+
- actionhist ablation no-interaction-head benchmark JSON
|
| 153 |
+
- `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
|
| 154 |
+
- actionhist ablation no-world-model benchmark JSON
|
| 155 |
+
- `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
|
| 156 |
+
- actionhist ablation no-planner benchmark JSON
|
| 157 |
+
- `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
|
| 158 |
+
- actionhist ablation no-role-tokens benchmark JSON
|
| 159 |
+
- `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
|
| 160 |
+
- actionhist ablation short-history benchmark JSON
|
| 161 |
+
- `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
|
| 162 |
+
- chunk debug trace
|
| 163 |
+
- `artifacts/outputs/interaction_debug/chunk_debug_trace.json`
|
| 164 |
+
- actionhist smoke checks
|
| 165 |
+
- `artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json`
|
| 166 |
+
|
| 167 |
## RLBench Two-Robot Smoke Outputs
|
| 168 |
|
| 169 |
- import smoke JSON
|
|
|
|
| 184 |
- `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
|
| 185 |
- RLBench open_drawer rollout eval Markdown
|
| 186 |
- `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
|
| 187 |
+
- RLBench open_drawer rollout eval rerun JSON
|
| 188 |
+
- `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json`
|
| 189 |
+
- RLBench open_drawer rollout eval clipped JSON
|
| 190 |
+
- `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_clipped/rollout_eval.json`
|
| 191 |
+
- RLBench open_drawer rollout eval recovered JSON
|
| 192 |
+
- `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json`
|
| 193 |
- RLBench smoke dataset root
|
| 194 |
- `artifacts/data/rlbench_smoke_open_drawer/`
|
| 195 |
|
README.md
CHANGED
|
@@ -166,8 +166,84 @@ The smoke output file is:
|
|
| 166 |
- `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
|
| 167 |
- `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
|
| 168 |
- `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
- `artifacts/data/rlbench_smoke_open_drawer/`
|
| 170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
## Recreate The Same Software Layout
|
| 172 |
|
| 173 |
Use:
|
|
|
|
| 166 |
- `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
|
| 167 |
- `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
|
| 168 |
- `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
|
| 169 |
+
- `artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt`
|
| 170 |
+
- `artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt`
|
| 171 |
+
- `artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt`
|
| 172 |
+
- `artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt`
|
| 173 |
- `artifacts/data/rlbench_smoke_open_drawer/`
|
| 174 |
|
| 175 |
+
## Raw Follow-Up Interaction Runs
|
| 176 |
+
|
| 177 |
+
### Proxy Training Endpoints
|
| 178 |
+
|
| 179 |
+
| Run | Checkpoint | Final train total | Final val total | Metrics or summary |
|
| 180 |
+
| --- | --- | ---: | ---: | --- |
|
| 181 |
+
| interaction-state actionhist smoke | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/checkpoint_best.pt` | 1.229741208255291 | 1.1121365427970886 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json` |
|
| 182 |
+
| interaction-state actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt` | 0.7432626067979089 | 0.8655468797630735 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/metrics.json` |
|
| 183 |
+
| interaction-state recency oracleft full | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt` | 0.9377426480253538 | 1.211510909928216 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json` |
|
| 184 |
+
| interaction-state CLIP actionhist full | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt` | 1.2094011244349454 | 1.1205205075324527 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json` |
|
| 185 |
+
|
| 186 |
+
### Proxy Benchmark Results With Committed-Chunk Evaluator
|
| 187 |
+
|
| 188 |
+
Source files:
|
| 189 |
+
|
| 190 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_commit4/reveal_benchmark.json`
|
| 191 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_old_no_leak_baselines_commit4/reveal_benchmark.json`
|
| 192 |
+
|
| 193 |
+
| Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
|
| 194 |
+
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 195 |
+
| interaction | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
|
| 196 |
+
| backbone | 0.5555555555555555 | 0.4166666666666667 | 0.5833333333333334 | 0.6666666666666666 | 29.27436817354626 | 0.7935162136952082 | 0.07854136604136604 | 0.0 | 0.4006388829503622 |
|
| 197 |
+
| reveal | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 30.107333534293705 | 0.8134206715557311 | 0.05241552429052429 | 2.0996421982129196 | 0.42389288420478505 |
|
| 198 |
+
|
| 199 |
+
### Frozen CLIP Proxy Benchmark Results With Committed-Chunk Evaluator
|
| 200 |
+
|
| 201 |
+
Source files:
|
| 202 |
+
|
| 203 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json`
|
| 204 |
+
- `artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json`
|
| 205 |
+
|
| 206 |
+
| Model | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
|
| 207 |
+
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 208 |
+
| interaction_clip | 0.3055555555555556 | 0.2916666666666667 | 0.2916666666666667 | 0.3333333333333333 | 10.379729785852962 | 0.38910322284532917 | 0.026909722222222224 | 3.8014686041765726 | 0.392014082081409 |
|
| 209 |
+
| backbone_clip | 0.3333333333333333 | 0.2916666666666667 | 0.4166666666666667 | 0.2916666666666667 | 5.090670637786388 | 0.30186899772120845 | 0.013541666666666667 | 0.0 | 0.36051381931045196 |
|
| 210 |
+
| reveal_clip | 0.20833333333333334 | 0.20833333333333334 | 0.25 | 0.16666666666666666 | 48.426281129320465 | 0.8251730443702804 | 0.06718750000000001 | 0.9353624902194482 | 0.709741123020649 |
|
| 211 |
+
|
| 212 |
+
### Proxy Diagnostics
|
| 213 |
+
|
| 214 |
+
| Run | Planner top-1 accuracy | Planner regret | Risk calibration MSE | Role collapse rate | Samples | JSON |
|
| 215 |
+
| --- | ---: | ---: | ---: | ---: | ---: | --- |
|
| 216 |
+
| interaction-state actionhist full | 0.1984732824427481 | 0.07150506228208542 | 0.009851997718214989 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/diagnostics/proxy_diagnostics.json` |
|
| 217 |
+
| interaction-state recency oracleft full | 0.2824427480916031 | 0.24119873344898224 | 0.009003574028611183 | 0.0 | 131 | `artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json` |
|
| 218 |
+
| interaction-state CLIP actionhist full | 0.3253968253968254 | 0.1786193549633026 | 0.01645304262638092 | 0.0 | 126 | `artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json` |
|
| 219 |
+
|
| 220 |
+
### Proxy Ablation Results For Actionhist Checkpoint
|
| 221 |
+
|
| 222 |
+
Source files:
|
| 223 |
+
|
| 224 |
+
- `artifacts/outputs/interaction_debug/ablation_none_actionhist/reveal_benchmark.json`
|
| 225 |
+
- `artifacts/outputs/interaction_debug/ablation_no_interaction_head_actionhist/reveal_benchmark.json`
|
| 226 |
+
- `artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.json`
|
| 227 |
+
- `artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json`
|
| 228 |
+
- `artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json`
|
| 229 |
+
- `artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json`
|
| 230 |
+
|
| 231 |
+
| Ablation | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
|
| 232 |
+
| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
|
| 233 |
+
| full_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.84789120488696 | 0.8711970953477753 | 0.003125 | 1.1544888946683267 | 0.4288607043110662 |
|
| 234 |
+
| no_interaction_head | 0.38888888888888884 | 0.16666666666666666 | 0.5 | 0.5 | 42.193298303418686 | 0.9207814501391517 | 0.016840277777777777 | 0.0 | 0.5719093395810988 |
|
| 235 |
+
| no_world_model | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
|
| 236 |
+
| no_planner | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 32.94181125528283 | 0.8710797395971086 | 0.003125 | 1.1577362408331497 | 0.42711537962572443 |
|
| 237 |
+
| no_role_tokens | 0.5277777777777778 | 0.4166666666666667 | 0.5416666666666666 | 0.625 | 33.69023843109608 | 0.8873094982571073 | 0.0 | 1.165569365169578 | 0.4185725698868434 |
|
| 238 |
+
| short_history | 0.5416666666666666 | 0.4166666666666667 | 0.5833333333333334 | 0.625 | 31.347230527136063 | 0.875287824206882 | 0.0 | 3.0816725173931325 | 0.459634010369579 |
|
| 239 |
+
|
| 240 |
+
### RLBench Open Drawer Rollout Reruns
|
| 241 |
+
|
| 242 |
+
| Output | Raw values | File |
|
| 243 |
+
| --- | --- | --- |
|
| 244 |
+
| rollout rerun with path error | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.error="A path could not be found because the target is outside of workspace."`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_rerun/rollout_eval.json` |
|
| 245 |
+
| rollout rerun after display and path recovery fixes | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.path_recoveries=0`, `tasks.open_drawer.noop_fallbacks=0`, `mean_success=0.0` | `artifacts/outputs/interaction_debug/rlbench_open_drawer_rollout_eval_commit4_recovered/rollout_eval.json` |
|
| 246 |
+
|
| 247 |
## Recreate The Same Software Layout
|
| 248 |
|
| 249 |
Use:
|
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5416666666666666,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5277777777777778,
|
| 9 |
+
"visibility_integral": 32.94181125528283,
|
| 10 |
+
"corridor_availability": 0.8710797395971086,
|
| 11 |
+
"reocclusion_rate": 0.003125,
|
| 12 |
+
"persistence_horizon_mae": 1.1577362408331497,
|
| 13 |
+
"disturbance_cost": 0.42711537962572443
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/interaction_debug/ablation_no_planner_actionhist/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.528
|
| 6 |
+
- visibility_integral: 32.942
|
| 7 |
+
- corridor_availability: 0.871
|
| 8 |
+
- reocclusion_rate: 0.003
|
| 9 |
+
- persistence_horizon_mae: 1.158
|
| 10 |
+
- disturbance_cost: 0.427
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5416666666666666,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5277777777777778,
|
| 9 |
+
"visibility_integral": 33.69023843109608,
|
| 10 |
+
"corridor_availability": 0.8873094982571073,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.165569365169578,
|
| 13 |
+
"disturbance_cost": 0.4185725698868434
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/interaction_debug/ablation_no_role_tokens_actionhist/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.528
|
| 6 |
+
- visibility_integral: 33.690
|
| 7 |
+
- corridor_availability: 0.887
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.166
|
| 10 |
+
- disturbance_cost: 0.419
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/ablation_no_world_model_actionhist/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.528
|
| 6 |
+
- visibility_integral: 32.942
|
| 7 |
+
- corridor_availability: 0.871
|
| 8 |
+
- reocclusion_rate: 0.003
|
| 9 |
+
- persistence_horizon_mae: 1.158
|
| 10 |
+
- disturbance_cost: 0.427
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5416666666666666,
|
| 9 |
+
"visibility_integral": 31.347230527136063,
|
| 10 |
+
"corridor_availability": 0.875287824206882,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 3.0816725173931325,
|
| 13 |
+
"disturbance_cost": 0.459634010369579
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/interaction_debug/ablation_short_history_actionhist/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.542
|
| 6 |
+
- visibility_integral: 31.347
|
| 7 |
+
- corridor_availability: 0.875
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 3.082
|
| 10 |
+
- disturbance_cost: 0.460
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/chunk_debug_trace.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"label": "rolefix_smoke_old",
|
| 4 |
+
"proxy": "foliage_proxy",
|
| 5 |
+
"best_candidate_index": 2,
|
| 6 |
+
"retrieve_sequence": [
|
| 7 |
+
0.22872358560562134,
|
| 8 |
+
0.7541071176528931,
|
| 9 |
+
0.6303636431694031,
|
| 10 |
+
0.4685209095478058
|
| 11 |
+
],
|
| 12 |
+
"open_sequence": [
|
| 13 |
+
1.2554516792297363,
|
| 14 |
+
0.8975364565849304,
|
| 15 |
+
0.5596103668212891,
|
| 16 |
+
0.4779726266860962
|
| 17 |
+
],
|
| 18 |
+
"template_sequence": [
|
| 19 |
+
0.47042158246040344,
|
| 20 |
+
0.6467143297195435,
|
| 21 |
+
0.5085114240646362,
|
| 22 |
+
0.478359580039978
|
| 23 |
+
]
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"label": "rolefix_smoke_old",
|
| 27 |
+
"proxy": "bag_proxy",
|
| 28 |
+
"best_candidate_index": 2,
|
| 29 |
+
"retrieve_sequence": [
|
| 30 |
+
0.2374069094657898,
|
| 31 |
+
0.7521002292633057,
|
| 32 |
+
0.6305321455001831,
|
| 33 |
+
0.4743019640445709
|
| 34 |
+
],
|
| 35 |
+
"open_sequence": [
|
| 36 |
+
1.257965326309204,
|
| 37 |
+
0.896579384803772,
|
| 38 |
+
0.5625595450401306,
|
| 39 |
+
0.4776189923286438
|
| 40 |
+
],
|
| 41 |
+
"template_sequence": [
|
| 42 |
+
0.47550493478775024,
|
| 43 |
+
0.6366342306137085,
|
| 44 |
+
0.5038254261016846,
|
| 45 |
+
0.4769764542579651
|
| 46 |
+
]
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"label": "rolefix_smoke_old",
|
| 50 |
+
"proxy": "cloth_proxy",
|
| 51 |
+
"best_candidate_index": 2,
|
| 52 |
+
"retrieve_sequence": [
|
| 53 |
+
0.24050980806350708,
|
| 54 |
+
0.7626074552536011,
|
| 55 |
+
0.6310772895812988,
|
| 56 |
+
0.47661182284355164
|
| 57 |
+
],
|
| 58 |
+
"open_sequence": [
|
| 59 |
+
1.2510802745819092,
|
| 60 |
+
0.8940063714981079,
|
| 61 |
+
0.5478025078773499,
|
| 62 |
+
0.470864862203598
|
| 63 |
+
],
|
| 64 |
+
"template_sequence": [
|
| 65 |
+
0.46881186962127686,
|
| 66 |
+
0.6378085613250732,
|
| 67 |
+
0.504069447517395,
|
| 68 |
+
0.4773429036140442
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"label": "actionhist_smoke_new",
|
| 73 |
+
"proxy": "foliage_proxy",
|
| 74 |
+
"best_candidate_index": 0,
|
| 75 |
+
"retrieve_sequence": [
|
| 76 |
+
0.23512092232704163,
|
| 77 |
+
0.5730606317520142,
|
| 78 |
+
0.5967459678649902,
|
| 79 |
+
0.4731495678424835
|
| 80 |
+
],
|
| 81 |
+
"open_sequence": [
|
| 82 |
+
0.6600309014320374,
|
| 83 |
+
0.43168342113494873,
|
| 84 |
+
0.15955285727977753,
|
| 85 |
+
-0.09488785266876221
|
| 86 |
+
],
|
| 87 |
+
"template_sequence": [
|
| 88 |
+
-0.017185214906930923,
|
| 89 |
+
0.017828624695539474,
|
| 90 |
+
0.013375137001276016,
|
| 91 |
+
-0.01390126720070839
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"label": "actionhist_smoke_new",
|
| 96 |
+
"proxy": "bag_proxy",
|
| 97 |
+
"best_candidate_index": 0,
|
| 98 |
+
"retrieve_sequence": [
|
| 99 |
+
0.2351658046245575,
|
| 100 |
+
0.572963535785675,
|
| 101 |
+
0.5971102714538574,
|
| 102 |
+
0.4758695065975189
|
| 103 |
+
],
|
| 104 |
+
"open_sequence": [
|
| 105 |
+
0.6608113646507263,
|
| 106 |
+
0.4318099617958069,
|
| 107 |
+
0.16285540163516998,
|
| 108 |
+
-0.09124644100666046
|
| 109 |
+
],
|
| 110 |
+
"template_sequence": [
|
| 111 |
+
-0.018705788999795914,
|
| 112 |
+
0.016191553324460983,
|
| 113 |
+
0.012765157967805862,
|
| 114 |
+
-0.016781020909547806
|
| 115 |
+
]
|
| 116 |
+
},
|
| 117 |
+
{
|
| 118 |
+
"label": "actionhist_smoke_new",
|
| 119 |
+
"proxy": "cloth_proxy",
|
| 120 |
+
"best_candidate_index": 0,
|
| 121 |
+
"retrieve_sequence": [
|
| 122 |
+
0.23625126481056213,
|
| 123 |
+
0.5730390548706055,
|
| 124 |
+
0.59672611951828,
|
| 125 |
+
0.4727664887905121
|
| 126 |
+
],
|
| 127 |
+
"open_sequence": [
|
| 128 |
+
0.6570022106170654,
|
| 129 |
+
0.4338717460632324,
|
| 130 |
+
0.15934017300605774,
|
| 131 |
+
-0.09580504149198532
|
| 132 |
+
],
|
| 133 |
+
"template_sequence": [
|
| 134 |
+
-0.028799299150705338,
|
| 135 |
+
0.006899785250425339,
|
| 136 |
+
0.004223380237817764,
|
| 137 |
+
-0.026467766612768173
|
| 138 |
+
]
|
| 139 |
+
}
|
| 140 |
+
]
|
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/diagnostics/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.35294117647058826,
|
| 3 |
+
"planner_regret": 0.017080334946513176,
|
| 4 |
+
"risk_calibration_mse": 0.00906219333410263,
|
| 5 |
+
"role_collapse_rate": 0.0,
|
| 6 |
+
"num_samples": 17
|
| 7 |
+
}
|
artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist_smoke/metrics.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.23455160359541574,
|
| 6 |
+
"arm_role": 1.2069129049777985,
|
| 7 |
+
"belief": 0.48631568253040314,
|
| 8 |
+
"corridor": 0.5782903432846069,
|
| 9 |
+
"disturbance": 0.17786112676064172,
|
| 10 |
+
"persistence": 1.815186083316803,
|
| 11 |
+
"phase": 1.3141004741191864,
|
| 12 |
+
"planner_ranking": 0.15019067749381065,
|
| 13 |
+
"planner_risk": 0.05527863139286637,
|
| 14 |
+
"planner_success": 0.6984443863232931,
|
| 15 |
+
"proposal_ranking": 0.10006876041491826,
|
| 16 |
+
"proposal_reconstruction": 0.3053521513938904,
|
| 17 |
+
"proposal_success": 0.6853575110435486,
|
| 18 |
+
"reocclusion": 0.6961739559968313,
|
| 19 |
+
"support_mode": 0.8659396668275198,
|
| 20 |
+
"total": 2.116169492403666,
|
| 21 |
+
"uncertainty": 0.6137877206007639,
|
| 22 |
+
"world_model": 2.6161614656448364
|
| 23 |
+
},
|
| 24 |
+
"val": {
|
| 25 |
+
"action": 0.07151262213786443,
|
| 26 |
+
"arm_role": 0.6764164765675863,
|
| 27 |
+
"belief": 0.36398513118426007,
|
| 28 |
+
"corridor": 0.4683004717032115,
|
| 29 |
+
"disturbance": 0.102281058828036,
|
| 30 |
+
"persistence": 2.114008625348409,
|
| 31 |
+
"phase": 0.9027760624885559,
|
| 32 |
+
"planner_ranking": 0.09026545286178589,
|
| 33 |
+
"planner_risk": 0.02189356298185885,
|
| 34 |
+
"planner_success": 0.6435574690500895,
|
| 35 |
+
"proposal_ranking": 0.16597949465115866,
|
| 36 |
+
"proposal_reconstruction": 0.11828663945198059,
|
| 37 |
+
"proposal_success": 0.6095772981643677,
|
| 38 |
+
"reocclusion": 0.7000808914502462,
|
| 39 |
+
"support_mode": 0.6359505653381348,
|
| 40 |
+
"total": 1.4241125186284382,
|
| 41 |
+
"uncertainty": 0.5725147326787313,
|
| 42 |
+
"world_model": 1.5686078071594238
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1,
|
| 47 |
+
"train": {
|
| 48 |
+
"action": 0.07887393422424793,
|
| 49 |
+
"arm_role": 0.4496926615635554,
|
| 50 |
+
"belief": 0.28958051403363544,
|
| 51 |
+
"corridor": 0.3720829039812088,
|
| 52 |
+
"disturbance": 0.07337014439205329,
|
| 53 |
+
"persistence": 1.7143786152203877,
|
| 54 |
+
"phase": 0.777398000160853,
|
| 55 |
+
"planner_ranking": 0.14400668690601984,
|
| 56 |
+
"planner_risk": 0.016193983455499012,
|
| 57 |
+
"planner_success": 0.6361206471920013,
|
| 58 |
+
"proposal_ranking": 0.11434461300571759,
|
| 59 |
+
"proposal_reconstruction": 0.11045620342095692,
|
| 60 |
+
"proposal_success": 0.6260021726290385,
|
| 61 |
+
"reocclusion": 0.6881168782711029,
|
| 62 |
+
"support_mode": 0.784478078285853,
|
| 63 |
+
"total": 1.2963247100512187,
|
| 64 |
+
"uncertainty": 0.5047676662604014,
|
| 65 |
+
"world_model": 1.4695208072662354
|
| 66 |
+
},
|
| 67 |
+
"val": {
|
| 68 |
+
"action": 0.05061729749043783,
|
| 69 |
+
"arm_role": 0.2217621256907781,
|
| 70 |
+
"belief": 0.19144149124622345,
|
| 71 |
+
"corridor": 0.33698633313179016,
|
| 72 |
+
"disturbance": 0.019655164952079456,
|
| 73 |
+
"persistence": 2.276299834251404,
|
| 74 |
+
"phase": 0.7830212910970052,
|
| 75 |
+
"planner_ranking": 0.10330406576395035,
|
| 76 |
+
"planner_risk": 0.012047629677302515,
|
| 77 |
+
"planner_success": 0.46883141497770947,
|
| 78 |
+
"proposal_ranking": 0.16881480813026428,
|
| 79 |
+
"proposal_reconstruction": 0.08914910753568013,
|
| 80 |
+
"proposal_success": 0.5338547825813293,
|
| 81 |
+
"reocclusion": 0.7235203385353088,
|
| 82 |
+
"support_mode": 0.6643315752347311,
|
| 83 |
+
"total": 1.1495283842086792,
|
| 84 |
+
"uncertainty": 0.36858222881952923,
|
| 85 |
+
"world_model": 1.2773457169532776
|
| 86 |
+
}
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 2,
|
| 90 |
+
"train": {
|
| 91 |
+
"action": 0.0648206224044164,
|
| 92 |
+
"arm_role": 0.1347198486328125,
|
| 93 |
+
"belief": 0.14715169121821722,
|
| 94 |
+
"corridor": 0.2695915202299754,
|
| 95 |
+
"disturbance": 0.010349508646565178,
|
| 96 |
+
"persistence": 1.7063330213228862,
|
| 97 |
+
"phase": 0.726386179526647,
|
| 98 |
+
"planner_ranking": 0.11673471455772717,
|
| 99 |
+
"planner_risk": 0.009400874686737856,
|
| 100 |
+
"planner_success": 0.6698183119297028,
|
| 101 |
+
"proposal_ranking": 0.10080837706724803,
|
| 102 |
+
"proposal_reconstruction": 0.10316941390434901,
|
| 103 |
+
"proposal_success": 0.6286104818185171,
|
| 104 |
+
"reocclusion": 0.6681396464506785,
|
| 105 |
+
"support_mode": 0.6904432475566864,
|
| 106 |
+
"total": 1.1366514563560486,
|
| 107 |
+
"uncertainty": 0.27301351229349774,
|
| 108 |
+
"world_model": 1.372689664363861
|
| 109 |
+
},
|
| 110 |
+
"val": {
|
| 111 |
+
"action": 0.05020085473855337,
|
| 112 |
+
"arm_role": 0.054195716977119446,
|
| 113 |
+
"belief": 0.12719580034414926,
|
| 114 |
+
"corridor": 0.33358681698640186,
|
| 115 |
+
"disturbance": 0.0010723281108463805,
|
| 116 |
+
"persistence": 2.3125662008921304,
|
| 117 |
+
"phase": 0.7737143238385519,
|
| 118 |
+
"planner_ranking": 0.12118598818778992,
|
| 119 |
+
"planner_risk": 0.008284708329786858,
|
| 120 |
+
"planner_success": 0.6051804622014364,
|
| 121 |
+
"proposal_ranking": 0.1250954990585645,
|
| 122 |
+
"proposal_reconstruction": 0.08273230989774068,
|
| 123 |
+
"proposal_success": 0.5201686124006907,
|
| 124 |
+
"reocclusion": 0.6809982657432556,
|
| 125 |
+
"support_mode": 0.5777197976907095,
|
| 126 |
+
"total": 1.1349389950434368,
|
| 127 |
+
"uncertainty": 0.17320589224497476,
|
| 128 |
+
"world_model": 1.3453394174575806
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 3,
|
| 133 |
+
"train": {
|
| 134 |
+
"action": 0.055803545440236725,
|
| 135 |
+
"arm_role": 0.033050537109375,
|
| 136 |
+
"belief": 0.11564020191629727,
|
| 137 |
+
"corridor": 0.256190650165081,
|
| 138 |
+
"disturbance": 0.002490642402941982,
|
| 139 |
+
"persistence": 1.711540162563324,
|
| 140 |
+
"phase": 0.681098093589147,
|
| 141 |
+
"planner_ranking": 0.10920613507429759,
|
| 142 |
+
"planner_risk": 0.010532331497718891,
|
| 143 |
+
"planner_success": 0.6514300604661306,
|
| 144 |
+
"proposal_ranking": 0.08523762846986453,
|
| 145 |
+
"proposal_reconstruction": 0.08513934289415677,
|
| 146 |
+
"proposal_success": 0.6457574268182119,
|
| 147 |
+
"reocclusion": 0.6691893935203552,
|
| 148 |
+
"support_mode": 0.6864420572916666,
|
| 149 |
+
"total": 1.0746445059776306,
|
| 150 |
+
"uncertainty": 0.1379331536591053,
|
| 151 |
+
"world_model": 1.3261052171389263
|
| 152 |
+
},
|
| 153 |
+
"val": {
|
| 154 |
+
"action": 0.04372807095448176,
|
| 155 |
+
"arm_role": 0.014572909101843834,
|
| 156 |
+
"belief": 0.12325718998908997,
|
| 157 |
+
"corridor": 0.344586377342542,
|
| 158 |
+
"disturbance": 0.002586025783481697,
|
| 159 |
+
"persistence": 2.2659462292989097,
|
| 160 |
+
"phase": 0.712437629699707,
|
| 161 |
+
"planner_ranking": 0.1231433277328809,
|
| 162 |
+
"planner_risk": 0.00803024492536982,
|
| 163 |
+
"planner_success": 0.5179306268692017,
|
| 164 |
+
"proposal_ranking": 0.11125253637631734,
|
| 165 |
+
"proposal_reconstruction": 0.07622659454743068,
|
| 166 |
+
"proposal_success": 0.5146457950274149,
|
| 167 |
+
"reocclusion": 0.6703451077143351,
|
| 168 |
+
"support_mode": 0.6071783800919851,
|
| 169 |
+
"total": 1.0756589968999226,
|
| 170 |
+
"uncertainty": 0.10349630812803905,
|
| 171 |
+
"world_model": 1.2806402842203777
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
]
|
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/config_resolved.yaml
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_clip_actionhist
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
data:
|
| 8 |
+
proxies:
|
| 9 |
+
- foliage_proxy
|
| 10 |
+
- bag_proxy
|
| 11 |
+
- cloth_proxy
|
| 12 |
+
resolution: 224
|
| 13 |
+
train_episodes_per_proxy: 48
|
| 14 |
+
val_episodes_per_proxy: 16
|
| 15 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
|
| 16 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
|
| 17 |
+
rebuild_dataset: false
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
rollout_horizon: 5
|
| 20 |
+
history_steps: 6
|
| 21 |
+
planner_candidates: 8
|
| 22 |
+
seed: 7
|
| 23 |
+
optim:
|
| 24 |
+
epochs: 4
|
| 25 |
+
batch_size: 2
|
| 26 |
+
num_workers: 0
|
| 27 |
+
lr: 0.0003
|
| 28 |
+
weight_decay: 0.0001
|
| 29 |
+
trainer:
|
| 30 |
+
policy_type: interaction_state
|
| 31 |
+
use_bf16: true
|
| 32 |
+
grad_clip_norm: 1.0
|
| 33 |
+
freeze_backbone: true
|
| 34 |
+
gradient_checkpointing: false
|
| 35 |
+
plan_during_train: true
|
| 36 |
+
plan_during_eval: true
|
| 37 |
+
support_mode_conditioning: true
|
| 38 |
+
planner_mode: trainable
|
| 39 |
+
policy:
|
| 40 |
+
backbone:
|
| 41 |
+
model_name: openai/clip-vit-base-patch32
|
| 42 |
+
hidden_dim: 512
|
| 43 |
+
max_text_tokens: 32
|
| 44 |
+
freeze_backbone: true
|
| 45 |
+
gradient_checkpointing: false
|
| 46 |
+
use_dummy_backbone: false
|
| 47 |
+
fusion:
|
| 48 |
+
hidden_dim: 512
|
| 49 |
+
num_cameras: 3
|
| 50 |
+
num_layers: 4
|
| 51 |
+
num_heads: 8
|
| 52 |
+
ff_dim: 2048
|
| 53 |
+
dropout: 0.1
|
| 54 |
+
proprio_dim: 32
|
| 55 |
+
proprio_tokens: 1
|
| 56 |
+
memory:
|
| 57 |
+
hidden_dim: 512
|
| 58 |
+
action_dim: 14
|
| 59 |
+
history_steps: 6
|
| 60 |
+
num_layers: 2
|
| 61 |
+
dropout: 0.1
|
| 62 |
+
memory_bank_size: 4
|
| 63 |
+
num_heads: 8
|
| 64 |
+
max_history_steps: 8
|
| 65 |
+
decoder:
|
| 66 |
+
hidden_dim: 512
|
| 67 |
+
num_heads: 8
|
| 68 |
+
num_layers: 4
|
| 69 |
+
ff_dim: 2048
|
| 70 |
+
dropout: 0.1
|
| 71 |
+
chunk_size: 8
|
| 72 |
+
action_dim: 14
|
| 73 |
+
arm_action_dim: 7
|
| 74 |
+
num_candidates: 8
|
| 75 |
+
num_phases: 5
|
| 76 |
+
num_arm_roles: 4
|
| 77 |
+
reveal_head:
|
| 78 |
+
hidden_dim: 512
|
| 79 |
+
num_support_modes: 3
|
| 80 |
+
num_approach_templates: 32
|
| 81 |
+
rollout_horizon: 5
|
| 82 |
+
belief_map_size: 32
|
| 83 |
+
field_size: 16
|
| 84 |
+
num_heads: 8
|
| 85 |
+
predict_belief_map: true
|
| 86 |
+
num_phases: 5
|
| 87 |
+
num_arm_roles: 4
|
| 88 |
+
num_interaction_tokens: 8
|
| 89 |
+
world_model:
|
| 90 |
+
hidden_dim: 512
|
| 91 |
+
action_dim: 14
|
| 92 |
+
num_support_modes: 3
|
| 93 |
+
num_approach_templates: 32
|
| 94 |
+
rollout_horizon: 5
|
| 95 |
+
field_size: 16
|
| 96 |
+
num_heads: 8
|
| 97 |
+
num_phases: 5
|
| 98 |
+
num_arm_roles: 4
|
| 99 |
+
num_interaction_tokens: 8
|
| 100 |
+
belief_map_size: 32
|
| 101 |
+
predict_belief_map: true
|
| 102 |
+
planner:
|
| 103 |
+
hidden_dim: 512
|
| 104 |
+
num_candidates: 8
|
| 105 |
+
action_dim: 14
|
| 106 |
+
num_support_modes: 3
|
| 107 |
+
utility_margin: 0.1
|
| 108 |
+
num_heads: 8
|
| 109 |
+
num_layers: 2
|
| 110 |
+
num_phases: 5
|
| 111 |
+
num_arm_roles: 4
|
| 112 |
+
loss_weights:
|
| 113 |
+
action: 1.0
|
| 114 |
+
phase: 0.1
|
| 115 |
+
arm_role: 0.15
|
| 116 |
+
support_mode: 0.1
|
| 117 |
+
corridor: 0.15
|
| 118 |
+
persistence: 0.05
|
| 119 |
+
disturbance: 0.05
|
| 120 |
+
world_model: 0.2
|
| 121 |
+
belief: 0.05
|
| 122 |
+
planner_success: 0.25
|
| 123 |
+
planner_risk: 0.1
|
| 124 |
+
planner_ranking: 0.2
|
| 125 |
+
proposal_reconstruction: 0.1
|
| 126 |
+
proposal_success: 0.15
|
| 127 |
+
proposal_ranking: 0.2
|
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/diagnostics/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.3253968253968254,
|
| 3 |
+
"planner_regret": 0.1786193549633026,
|
| 4 |
+
"risk_calibration_mse": 0.01645304262638092,
|
| 5 |
+
"role_collapse_rate": 0.0,
|
| 6 |
+
"num_samples": 126
|
| 7 |
+
}
|
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/metrics.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.17748002509016017,
|
| 6 |
+
"arm_role": 0.01832497369556527,
|
| 7 |
+
"belief": 0.12835299933612035,
|
| 8 |
+
"corridor": 0.2547702425456952,
|
| 9 |
+
"disturbance": 0.00850862705773346,
|
| 10 |
+
"persistence": 4.974573742500774,
|
| 11 |
+
"phase": 0.7463235106143652,
|
| 12 |
+
"planner_ranking": 1.3405994254881175,
|
| 13 |
+
"planner_risk": 0.024703218532160547,
|
| 14 |
+
"planner_success": 0.7777972318115035,
|
| 15 |
+
"proposal_ranking": 1.165930202494117,
|
| 16 |
+
"proposal_reconstruction": 0.2531185241035766,
|
| 17 |
+
"proposal_success": 0.6786430877540748,
|
| 18 |
+
"reocclusion": 0.7147265204584411,
|
| 19 |
+
"support_mode": 0.7602155595549738,
|
| 20 |
+
"total": 2.0788989903415063,
|
| 21 |
+
"uncertainty": 0.03309597126671469,
|
| 22 |
+
"world_model": 3.071348061112209
|
| 23 |
+
},
|
| 24 |
+
"val": {
|
| 25 |
+
"action": 0.03192901705938672,
|
| 26 |
+
"arm_role": 6.15250448592835e-06,
|
| 27 |
+
"belief": 0.10559089872099105,
|
| 28 |
+
"corridor": 0.23193429670636617,
|
| 29 |
+
"disturbance": 0.0022747389350750756,
|
| 30 |
+
"persistence": 3.85837465619284,
|
| 31 |
+
"phase": 0.6875752177503374,
|
| 32 |
+
"planner_ranking": 1.1088495595114571,
|
| 33 |
+
"planner_risk": 0.018587306145549057,
|
| 34 |
+
"planner_success": 0.6127710470131466,
|
| 35 |
+
"proposal_ranking": 1.1232511202494304,
|
| 36 |
+
"proposal_reconstruction": 0.08394021162438015,
|
| 37 |
+
"proposal_success": 0.681461288815453,
|
| 38 |
+
"reocclusion": 0.6769484205851479,
|
| 39 |
+
"support_mode": 0.6654504603809781,
|
| 40 |
+
"total": 1.5210873153474596,
|
| 41 |
+
"uncertainty": 0.011785898017623121,
|
| 42 |
+
"world_model": 1.9750548638994732
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1,
|
| 47 |
+
"train": {
|
| 48 |
+
"action": 0.030109174476439102,
|
| 49 |
+
"arm_role": 8.612091004536414e-06,
|
| 50 |
+
"belief": 0.104316227781679,
|
| 51 |
+
"corridor": 0.23850143234689197,
|
| 52 |
+
"disturbance": 0.0025595128212472823,
|
| 53 |
+
"persistence": 3.9934506887540766,
|
| 54 |
+
"phase": 0.6901740428664922,
|
| 55 |
+
"planner_ranking": 1.239893207687358,
|
| 56 |
+
"planner_risk": 0.026462018369155793,
|
| 57 |
+
"planner_success": 0.664632208528319,
|
| 58 |
+
"proposal_ranking": 1.1259761543174065,
|
| 59 |
+
"proposal_reconstruction": 0.08132225903072907,
|
| 60 |
+
"proposal_success": 0.6764243753792728,
|
| 61 |
+
"reocclusion": 0.6790863540784227,
|
| 62 |
+
"support_mode": 0.6789359047774869,
|
| 63 |
+
"total": 1.550120969093283,
|
| 64 |
+
"uncertainty": 0.007208701525449128,
|
| 65 |
+
"world_model": 1.8854006223029491
|
| 66 |
+
},
|
| 67 |
+
"val": {
|
| 68 |
+
"action": 0.02197206175575654,
|
| 69 |
+
"arm_role": 2.089118947517977e-05,
|
| 70 |
+
"belief": 0.09741538857656812,
|
| 71 |
+
"corridor": 0.22761633885758265,
|
| 72 |
+
"disturbance": 0.0017140347070323067,
|
| 73 |
+
"persistence": 3.6565530148763505,
|
| 74 |
+
"phase": 0.6668311646060338,
|
| 75 |
+
"planner_ranking": 1.1634496355813646,
|
| 76 |
+
"planner_risk": 0.047890776559518324,
|
| 77 |
+
"planner_success": 0.5928089713293409,
|
| 78 |
+
"proposal_ranking": 1.1224727725225783,
|
| 79 |
+
"proposal_reconstruction": 0.06971718163953887,
|
| 80 |
+
"proposal_success": 0.6724110945822701,
|
| 81 |
+
"reocclusion": 0.6611922624565306,
|
| 82 |
+
"support_mode": 0.6766654224622817,
|
| 83 |
+
"total": 1.4845811980111259,
|
| 84 |
+
"uncertainty": 0.004251384046963519,
|
| 85 |
+
"world_model": 1.875271028942532
|
| 86 |
+
}
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 2,
|
| 90 |
+
"train": {
|
| 91 |
+
"action": 0.02331933839470928,
|
| 92 |
+
"arm_role": 8.285201656880802e-06,
|
| 93 |
+
"belief": 0.1041115006695243,
|
| 94 |
+
"corridor": 0.2380418391820258,
|
| 95 |
+
"disturbance": 0.002577872130260731,
|
| 96 |
+
"persistence": 3.555448654902543,
|
| 97 |
+
"phase": 0.6753773314790575,
|
| 98 |
+
"planner_ranking": 1.1668821538930163,
|
| 99 |
+
"planner_risk": 0.020309378087023242,
|
| 100 |
+
"planner_success": 0.623614322296612,
|
| 101 |
+
"proposal_ranking": 1.1245252312165905,
|
| 102 |
+
"proposal_reconstruction": 0.07289492924019929,
|
| 103 |
+
"proposal_success": 0.6749192248464255,
|
| 104 |
+
"reocclusion": 0.6692662537097931,
|
| 105 |
+
"support_mode": 0.6756738792539267,
|
| 106 |
+
"total": 1.4613653153025044,
|
| 107 |
+
"uncertainty": 0.012397505843296725,
|
| 108 |
+
"world_model": 1.7293687263084332
|
| 109 |
+
},
|
| 110 |
+
"val": {
|
| 111 |
+
"action": 0.03152821023785879,
|
| 112 |
+
"arm_role": 1.913968098564048e-06,
|
| 113 |
+
"belief": 0.10549203495657633,
|
| 114 |
+
"corridor": 0.20762673824552505,
|
| 115 |
+
"disturbance": 0.0014280516678275214,
|
| 116 |
+
"persistence": 2.0710838323547724,
|
| 117 |
+
"phase": 0.6628126601378123,
|
| 118 |
+
"planner_ranking": 1.0928319522312708,
|
| 119 |
+
"planner_risk": 0.021120590453464833,
|
| 120 |
+
"planner_success": 0.5570865495810433,
|
| 121 |
+
"proposal_ranking": 1.1183055109447904,
|
| 122 |
+
"proposal_reconstruction": 0.08380144739907885,
|
| 123 |
+
"proposal_success": 0.6772379392669314,
|
| 124 |
+
"reocclusion": 0.6509462926122878,
|
| 125 |
+
"support_mode": 0.6650945194183834,
|
| 126 |
+
"total": 1.3528291233002194,
|
| 127 |
+
"uncertainty": 0.0025819382726839403,
|
| 128 |
+
"world_model": 1.7092195824971275
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 3,
|
| 133 |
+
"train": {
|
| 134 |
+
"action": 0.021615957470698506,
|
| 135 |
+
"arm_role": 9.251202588306048e-07,
|
| 136 |
+
"belief": 0.10970319874818725,
|
| 137 |
+
"corridor": 0.2036819358732704,
|
| 138 |
+
"disturbance": 0.002751460597729129,
|
| 139 |
+
"persistence": 1.0053820329420464,
|
| 140 |
+
"phase": 0.4392661486620678,
|
| 141 |
+
"planner_ranking": 1.1170655027109915,
|
| 142 |
+
"planner_risk": 0.023540541935585323,
|
| 143 |
+
"planner_success": 0.574678816408387,
|
| 144 |
+
"proposal_ranking": 1.1232363391297027,
|
| 145 |
+
"proposal_reconstruction": 0.07163417897143289,
|
| 146 |
+
"proposal_success": 0.6759519848523964,
|
| 147 |
+
"reocclusion": 0.3594565280497986,
|
| 148 |
+
"support_mode": 0.1658484423971925,
|
| 149 |
+
"total": 1.2094011244349454,
|
| 150 |
+
"uncertainty": 0.001485606099231278,
|
| 151 |
+
"world_model": 1.6549255024076133
|
| 152 |
+
},
|
| 153 |
+
"val": {
|
| 154 |
+
"action": 0.01307902658092124,
|
| 155 |
+
"arm_role": 3.7938821602466983e-07,
|
| 156 |
+
"belief": 0.10557046154188732,
|
| 157 |
+
"corridor": 0.18899264949418249,
|
| 158 |
+
"disturbance": 0.003063943787498237,
|
| 159 |
+
"persistence": 0.6038030874915421,
|
| 160 |
+
"phase": 0.19549169234694944,
|
| 161 |
+
"planner_ranking": 1.1149483919143677,
|
| 162 |
+
"planner_risk": 0.01645888195424858,
|
| 163 |
+
"planner_success": 0.5231598180437845,
|
| 164 |
+
"proposal_ranking": 1.1176083068999032,
|
| 165 |
+
"proposal_reconstruction": 0.05967588533484747,
|
| 166 |
+
"proposal_success": 0.6721902480201115,
|
| 167 |
+
"reocclusion": 0.1391045902338293,
|
| 168 |
+
"support_mode": 0.0005700885616649415,
|
| 169 |
+
"total": 1.1205205075324527,
|
| 170 |
+
"uncertainty": 0.0005439088721946714,
|
| 171 |
+
"world_model": 1.6766679949230618
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
]
|
artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/summary.json
ADDED
|
@@ -0,0 +1,573 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_state_clip_actionhist",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 1.2094011244349454,
|
| 6 |
+
"final_val_total": 1.1205205075324527,
|
| 7 |
+
"num_train_samples": 382,
|
| 8 |
+
"num_val_samples": 126,
|
| 9 |
+
"planner_mode": "trainable",
|
| 10 |
+
"frozen_modules": [],
|
| 11 |
+
"init_info": {
|
| 12 |
+
"path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
|
| 13 |
+
"loaded_keys": 467,
|
| 14 |
+
"skipped_shape_mismatch_keys": [
|
| 15 |
+
"memory.gru.weight_ih_l0",
|
| 16 |
+
"memory.gru.weight_hh_l0",
|
| 17 |
+
"memory.gru.bias_ih_l0",
|
| 18 |
+
"memory.gru.bias_hh_l0",
|
| 19 |
+
"decoder.actor_role_bias",
|
| 20 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
|
| 21 |
+
"decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
|
| 22 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
|
| 23 |
+
"decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
|
| 24 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 25 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 26 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 27 |
+
"decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 28 |
+
"decoder.revealer_decoder.layers.0.linear1.weight",
|
| 29 |
+
"decoder.revealer_decoder.layers.0.linear1.bias",
|
| 30 |
+
"decoder.revealer_decoder.layers.0.linear2.weight",
|
| 31 |
+
"decoder.revealer_decoder.layers.0.linear2.bias",
|
| 32 |
+
"decoder.revealer_decoder.layers.0.norm1.weight",
|
| 33 |
+
"decoder.revealer_decoder.layers.0.norm1.bias",
|
| 34 |
+
"decoder.revealer_decoder.layers.0.norm2.weight",
|
| 35 |
+
"decoder.revealer_decoder.layers.0.norm2.bias",
|
| 36 |
+
"decoder.revealer_decoder.layers.0.norm3.weight",
|
| 37 |
+
"decoder.revealer_decoder.layers.0.norm3.bias",
|
| 38 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
|
| 39 |
+
"decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
|
| 40 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
|
| 41 |
+
"decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
|
| 42 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 43 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 44 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 45 |
+
"decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 46 |
+
"decoder.revealer_decoder.layers.1.linear1.weight",
|
| 47 |
+
"decoder.revealer_decoder.layers.1.linear1.bias",
|
| 48 |
+
"decoder.revealer_decoder.layers.1.linear2.weight",
|
| 49 |
+
"decoder.revealer_decoder.layers.1.linear2.bias",
|
| 50 |
+
"decoder.revealer_decoder.layers.1.norm1.weight",
|
| 51 |
+
"decoder.revealer_decoder.layers.1.norm1.bias",
|
| 52 |
+
"decoder.revealer_decoder.layers.1.norm2.weight",
|
| 53 |
+
"decoder.revealer_decoder.layers.1.norm2.bias",
|
| 54 |
+
"decoder.revealer_decoder.layers.1.norm3.weight",
|
| 55 |
+
"decoder.revealer_decoder.layers.1.norm3.bias",
|
| 56 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
|
| 57 |
+
"decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
|
| 58 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
|
| 59 |
+
"decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
|
| 60 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 61 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 62 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 63 |
+
"decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 64 |
+
"decoder.revealer_decoder.layers.2.linear1.weight",
|
| 65 |
+
"decoder.revealer_decoder.layers.2.linear1.bias",
|
| 66 |
+
"decoder.revealer_decoder.layers.2.linear2.weight",
|
| 67 |
+
"decoder.revealer_decoder.layers.2.linear2.bias",
|
| 68 |
+
"decoder.revealer_decoder.layers.2.norm1.weight",
|
| 69 |
+
"decoder.revealer_decoder.layers.2.norm1.bias",
|
| 70 |
+
"decoder.revealer_decoder.layers.2.norm2.weight",
|
| 71 |
+
"decoder.revealer_decoder.layers.2.norm2.bias",
|
| 72 |
+
"decoder.revealer_decoder.layers.2.norm3.weight",
|
| 73 |
+
"decoder.revealer_decoder.layers.2.norm3.bias",
|
| 74 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
|
| 75 |
+
"decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
|
| 76 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
|
| 77 |
+
"decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
|
| 78 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 79 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 80 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 81 |
+
"decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 82 |
+
"decoder.revealer_decoder.layers.3.linear1.weight",
|
| 83 |
+
"decoder.revealer_decoder.layers.3.linear1.bias",
|
| 84 |
+
"decoder.revealer_decoder.layers.3.linear2.weight",
|
| 85 |
+
"decoder.revealer_decoder.layers.3.linear2.bias",
|
| 86 |
+
"decoder.revealer_decoder.layers.3.norm1.weight",
|
| 87 |
+
"decoder.revealer_decoder.layers.3.norm1.bias",
|
| 88 |
+
"decoder.revealer_decoder.layers.3.norm2.weight",
|
| 89 |
+
"decoder.revealer_decoder.layers.3.norm2.bias",
|
| 90 |
+
"decoder.revealer_decoder.layers.3.norm3.weight",
|
| 91 |
+
"decoder.revealer_decoder.layers.3.norm3.bias",
|
| 92 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
|
| 93 |
+
"decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
|
| 94 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
|
| 95 |
+
"decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
|
| 96 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 97 |
+
"decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 98 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 99 |
+
"decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 100 |
+
"decoder.actor_decoder.layers.0.linear1.weight",
|
| 101 |
+
"decoder.actor_decoder.layers.0.linear1.bias",
|
| 102 |
+
"decoder.actor_decoder.layers.0.linear2.weight",
|
| 103 |
+
"decoder.actor_decoder.layers.0.linear2.bias",
|
| 104 |
+
"decoder.actor_decoder.layers.0.norm1.weight",
|
| 105 |
+
"decoder.actor_decoder.layers.0.norm1.bias",
|
| 106 |
+
"decoder.actor_decoder.layers.0.norm2.weight",
|
| 107 |
+
"decoder.actor_decoder.layers.0.norm2.bias",
|
| 108 |
+
"decoder.actor_decoder.layers.0.norm3.weight",
|
| 109 |
+
"decoder.actor_decoder.layers.0.norm3.bias",
|
| 110 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
|
| 111 |
+
"decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
|
| 112 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
|
| 113 |
+
"decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
|
| 114 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 115 |
+
"decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 116 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 117 |
+
"decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 118 |
+
"decoder.actor_decoder.layers.1.linear1.weight",
|
| 119 |
+
"decoder.actor_decoder.layers.1.linear1.bias",
|
| 120 |
+
"decoder.actor_decoder.layers.1.linear2.weight",
|
| 121 |
+
"decoder.actor_decoder.layers.1.linear2.bias",
|
| 122 |
+
"decoder.actor_decoder.layers.1.norm1.weight",
|
| 123 |
+
"decoder.actor_decoder.layers.1.norm1.bias",
|
| 124 |
+
"decoder.actor_decoder.layers.1.norm2.weight",
|
| 125 |
+
"decoder.actor_decoder.layers.1.norm2.bias",
|
| 126 |
+
"decoder.actor_decoder.layers.1.norm3.weight",
|
| 127 |
+
"decoder.actor_decoder.layers.1.norm3.bias",
|
| 128 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
|
| 129 |
+
"decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
|
| 130 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
|
| 131 |
+
"decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
|
| 132 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 133 |
+
"decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 134 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 135 |
+
"decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 136 |
+
"decoder.actor_decoder.layers.2.linear1.weight",
|
| 137 |
+
"decoder.actor_decoder.layers.2.linear1.bias",
|
| 138 |
+
"decoder.actor_decoder.layers.2.linear2.weight",
|
| 139 |
+
"decoder.actor_decoder.layers.2.linear2.bias",
|
| 140 |
+
"decoder.actor_decoder.layers.2.norm1.weight",
|
| 141 |
+
"decoder.actor_decoder.layers.2.norm1.bias",
|
| 142 |
+
"decoder.actor_decoder.layers.2.norm2.weight",
|
| 143 |
+
"decoder.actor_decoder.layers.2.norm2.bias",
|
| 144 |
+
"decoder.actor_decoder.layers.2.norm3.weight",
|
| 145 |
+
"decoder.actor_decoder.layers.2.norm3.bias",
|
| 146 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
|
| 147 |
+
"decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
|
| 148 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
|
| 149 |
+
"decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
|
| 150 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 151 |
+
"decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 152 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 153 |
+
"decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 154 |
+
"decoder.actor_decoder.layers.3.linear1.weight",
|
| 155 |
+
"decoder.actor_decoder.layers.3.linear1.bias",
|
| 156 |
+
"decoder.actor_decoder.layers.3.linear2.weight",
|
| 157 |
+
"decoder.actor_decoder.layers.3.linear2.bias",
|
| 158 |
+
"decoder.actor_decoder.layers.3.norm1.weight",
|
| 159 |
+
"decoder.actor_decoder.layers.3.norm1.bias",
|
| 160 |
+
"decoder.actor_decoder.layers.3.norm2.weight",
|
| 161 |
+
"decoder.actor_decoder.layers.3.norm2.bias",
|
| 162 |
+
"decoder.actor_decoder.layers.3.norm3.weight",
|
| 163 |
+
"decoder.actor_decoder.layers.3.norm3.bias",
|
| 164 |
+
"decoder.revealer_mean.weight",
|
| 165 |
+
"decoder.revealer_mean.bias",
|
| 166 |
+
"decoder.revealer_log_std.weight",
|
| 167 |
+
"decoder.revealer_log_std.bias",
|
| 168 |
+
"decoder.actor_mean.weight",
|
| 169 |
+
"decoder.actor_mean.bias",
|
| 170 |
+
"decoder.actor_log_std.weight",
|
| 171 |
+
"decoder.actor_log_std.bias",
|
| 172 |
+
"decoder.proposal_score.1.weight",
|
| 173 |
+
"decoder.proposal_score.1.bias"
|
| 174 |
+
],
|
| 175 |
+
"missing_keys": [
|
| 176 |
+
"memory.position_embedding",
|
| 177 |
+
"memory.bank_queries",
|
| 178 |
+
"memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 179 |
+
"memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 180 |
+
"memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 181 |
+
"memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 182 |
+
"memory.sequence_encoder.layers.0.linear1.weight",
|
| 183 |
+
"memory.sequence_encoder.layers.0.linear1.bias",
|
| 184 |
+
"memory.sequence_encoder.layers.0.linear2.weight",
|
| 185 |
+
"memory.sequence_encoder.layers.0.linear2.bias",
|
| 186 |
+
"memory.sequence_encoder.layers.0.norm1.weight",
|
| 187 |
+
"memory.sequence_encoder.layers.0.norm1.bias",
|
| 188 |
+
"memory.sequence_encoder.layers.0.norm2.weight",
|
| 189 |
+
"memory.sequence_encoder.layers.0.norm2.bias",
|
| 190 |
+
"memory.sequence_encoder.layers.1.self_attn.in_proj_weight",
|
| 191 |
+
"memory.sequence_encoder.layers.1.self_attn.in_proj_bias",
|
| 192 |
+
"memory.sequence_encoder.layers.1.self_attn.out_proj.weight",
|
| 193 |
+
"memory.sequence_encoder.layers.1.self_attn.out_proj.bias",
|
| 194 |
+
"memory.sequence_encoder.layers.1.linear1.weight",
|
| 195 |
+
"memory.sequence_encoder.layers.1.linear1.bias",
|
| 196 |
+
"memory.sequence_encoder.layers.1.linear2.weight",
|
| 197 |
+
"memory.sequence_encoder.layers.1.linear2.bias",
|
| 198 |
+
"memory.sequence_encoder.layers.1.norm1.weight",
|
| 199 |
+
"memory.sequence_encoder.layers.1.norm1.bias",
|
| 200 |
+
"memory.sequence_encoder.layers.1.norm2.weight",
|
| 201 |
+
"memory.sequence_encoder.layers.1.norm2.bias",
|
| 202 |
+
"memory.bank_attention.in_proj_weight",
|
| 203 |
+
"memory.bank_attention.in_proj_bias",
|
| 204 |
+
"memory.bank_attention.out_proj.weight",
|
| 205 |
+
"memory.bank_attention.out_proj.bias",
|
| 206 |
+
"memory.bank_mlp.0.weight",
|
| 207 |
+
"memory.bank_mlp.0.bias",
|
| 208 |
+
"memory.bank_mlp.1.weight",
|
| 209 |
+
"memory.bank_mlp.1.bias",
|
| 210 |
+
"memory.bank_mlp.3.weight",
|
| 211 |
+
"memory.bank_mlp.3.bias",
|
| 212 |
+
"memory.action_proj.0.weight",
|
| 213 |
+
"memory.action_proj.0.bias",
|
| 214 |
+
"memory.action_proj.1.weight",
|
| 215 |
+
"memory.action_proj.1.bias",
|
| 216 |
+
"decoder.right_decoder.layers.0.self_attn.in_proj_weight",
|
| 217 |
+
"decoder.right_decoder.layers.0.self_attn.in_proj_bias",
|
| 218 |
+
"decoder.right_decoder.layers.0.self_attn.out_proj.weight",
|
| 219 |
+
"decoder.right_decoder.layers.0.self_attn.out_proj.bias",
|
| 220 |
+
"decoder.right_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 221 |
+
"decoder.right_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 222 |
+
"decoder.right_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 223 |
+
"decoder.right_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 224 |
+
"decoder.right_decoder.layers.0.linear1.weight",
|
| 225 |
+
"decoder.right_decoder.layers.0.linear1.bias",
|
| 226 |
+
"decoder.right_decoder.layers.0.linear2.weight",
|
| 227 |
+
"decoder.right_decoder.layers.0.linear2.bias",
|
| 228 |
+
"decoder.right_decoder.layers.0.norm1.weight",
|
| 229 |
+
"decoder.right_decoder.layers.0.norm1.bias",
|
| 230 |
+
"decoder.right_decoder.layers.0.norm2.weight",
|
| 231 |
+
"decoder.right_decoder.layers.0.norm2.bias",
|
| 232 |
+
"decoder.right_decoder.layers.0.norm3.weight",
|
| 233 |
+
"decoder.right_decoder.layers.0.norm3.bias",
|
| 234 |
+
"decoder.right_decoder.layers.1.self_attn.in_proj_weight",
|
| 235 |
+
"decoder.right_decoder.layers.1.self_attn.in_proj_bias",
|
| 236 |
+
"decoder.right_decoder.layers.1.self_attn.out_proj.weight",
|
| 237 |
+
"decoder.right_decoder.layers.1.self_attn.out_proj.bias",
|
| 238 |
+
"decoder.right_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 239 |
+
"decoder.right_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 240 |
+
"decoder.right_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 241 |
+
"decoder.right_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 242 |
+
"decoder.right_decoder.layers.1.linear1.weight",
|
| 243 |
+
"decoder.right_decoder.layers.1.linear1.bias",
|
| 244 |
+
"decoder.right_decoder.layers.1.linear2.weight",
|
| 245 |
+
"decoder.right_decoder.layers.1.linear2.bias",
|
| 246 |
+
"decoder.right_decoder.layers.1.norm1.weight",
|
| 247 |
+
"decoder.right_decoder.layers.1.norm1.bias",
|
| 248 |
+
"decoder.right_decoder.layers.1.norm2.weight",
|
| 249 |
+
"decoder.right_decoder.layers.1.norm2.bias",
|
| 250 |
+
"decoder.right_decoder.layers.1.norm3.weight",
|
| 251 |
+
"decoder.right_decoder.layers.1.norm3.bias",
|
| 252 |
+
"decoder.right_decoder.layers.2.self_attn.in_proj_weight",
|
| 253 |
+
"decoder.right_decoder.layers.2.self_attn.in_proj_bias",
|
| 254 |
+
"decoder.right_decoder.layers.2.self_attn.out_proj.weight",
|
| 255 |
+
"decoder.right_decoder.layers.2.self_attn.out_proj.bias",
|
| 256 |
+
"decoder.right_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 257 |
+
"decoder.right_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 258 |
+
"decoder.right_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 259 |
+
"decoder.right_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 260 |
+
"decoder.right_decoder.layers.2.linear1.weight",
|
| 261 |
+
"decoder.right_decoder.layers.2.linear1.bias",
|
| 262 |
+
"decoder.right_decoder.layers.2.linear2.weight",
|
| 263 |
+
"decoder.right_decoder.layers.2.linear2.bias",
|
| 264 |
+
"decoder.right_decoder.layers.2.norm1.weight",
|
| 265 |
+
"decoder.right_decoder.layers.2.norm1.bias",
|
| 266 |
+
"decoder.right_decoder.layers.2.norm2.weight",
|
| 267 |
+
"decoder.right_decoder.layers.2.norm2.bias",
|
| 268 |
+
"decoder.right_decoder.layers.2.norm3.weight",
|
| 269 |
+
"decoder.right_decoder.layers.2.norm3.bias",
|
| 270 |
+
"decoder.right_decoder.layers.3.self_attn.in_proj_weight",
|
| 271 |
+
"decoder.right_decoder.layers.3.self_attn.in_proj_bias",
|
| 272 |
+
"decoder.right_decoder.layers.3.self_attn.out_proj.weight",
|
| 273 |
+
"decoder.right_decoder.layers.3.self_attn.out_proj.bias",
|
| 274 |
+
"decoder.right_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 275 |
+
"decoder.right_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 276 |
+
"decoder.right_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 277 |
+
"decoder.right_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 278 |
+
"decoder.right_decoder.layers.3.linear1.weight",
|
| 279 |
+
"decoder.right_decoder.layers.3.linear1.bias",
|
| 280 |
+
"decoder.right_decoder.layers.3.linear2.weight",
|
| 281 |
+
"decoder.right_decoder.layers.3.linear2.bias",
|
| 282 |
+
"decoder.right_decoder.layers.3.norm1.weight",
|
| 283 |
+
"decoder.right_decoder.layers.3.norm1.bias",
|
| 284 |
+
"decoder.right_decoder.layers.3.norm2.weight",
|
| 285 |
+
"decoder.right_decoder.layers.3.norm2.bias",
|
| 286 |
+
"decoder.right_decoder.layers.3.norm3.weight",
|
| 287 |
+
"decoder.right_decoder.layers.3.norm3.bias",
|
| 288 |
+
"decoder.left_decoder.layers.0.self_attn.in_proj_weight",
|
| 289 |
+
"decoder.left_decoder.layers.0.self_attn.in_proj_bias",
|
| 290 |
+
"decoder.left_decoder.layers.0.self_attn.out_proj.weight",
|
| 291 |
+
"decoder.left_decoder.layers.0.self_attn.out_proj.bias",
|
| 292 |
+
"decoder.left_decoder.layers.0.multihead_attn.in_proj_weight",
|
| 293 |
+
"decoder.left_decoder.layers.0.multihead_attn.in_proj_bias",
|
| 294 |
+
"decoder.left_decoder.layers.0.multihead_attn.out_proj.weight",
|
| 295 |
+
"decoder.left_decoder.layers.0.multihead_attn.out_proj.bias",
|
| 296 |
+
"decoder.left_decoder.layers.0.linear1.weight",
|
| 297 |
+
"decoder.left_decoder.layers.0.linear1.bias",
|
| 298 |
+
"decoder.left_decoder.layers.0.linear2.weight",
|
| 299 |
+
"decoder.left_decoder.layers.0.linear2.bias",
|
| 300 |
+
"decoder.left_decoder.layers.0.norm1.weight",
|
| 301 |
+
"decoder.left_decoder.layers.0.norm1.bias",
|
| 302 |
+
"decoder.left_decoder.layers.0.norm2.weight",
|
| 303 |
+
"decoder.left_decoder.layers.0.norm2.bias",
|
| 304 |
+
"decoder.left_decoder.layers.0.norm3.weight",
|
| 305 |
+
"decoder.left_decoder.layers.0.norm3.bias",
|
| 306 |
+
"decoder.left_decoder.layers.1.self_attn.in_proj_weight",
|
| 307 |
+
"decoder.left_decoder.layers.1.self_attn.in_proj_bias",
|
| 308 |
+
"decoder.left_decoder.layers.1.self_attn.out_proj.weight",
|
| 309 |
+
"decoder.left_decoder.layers.1.self_attn.out_proj.bias",
|
| 310 |
+
"decoder.left_decoder.layers.1.multihead_attn.in_proj_weight",
|
| 311 |
+
"decoder.left_decoder.layers.1.multihead_attn.in_proj_bias",
|
| 312 |
+
"decoder.left_decoder.layers.1.multihead_attn.out_proj.weight",
|
| 313 |
+
"decoder.left_decoder.layers.1.multihead_attn.out_proj.bias",
|
| 314 |
+
"decoder.left_decoder.layers.1.linear1.weight",
|
| 315 |
+
"decoder.left_decoder.layers.1.linear1.bias",
|
| 316 |
+
"decoder.left_decoder.layers.1.linear2.weight",
|
| 317 |
+
"decoder.left_decoder.layers.1.linear2.bias",
|
| 318 |
+
"decoder.left_decoder.layers.1.norm1.weight",
|
| 319 |
+
"decoder.left_decoder.layers.1.norm1.bias",
|
| 320 |
+
"decoder.left_decoder.layers.1.norm2.weight",
|
| 321 |
+
"decoder.left_decoder.layers.1.norm2.bias",
|
| 322 |
+
"decoder.left_decoder.layers.1.norm3.weight",
|
| 323 |
+
"decoder.left_decoder.layers.1.norm3.bias",
|
| 324 |
+
"decoder.left_decoder.layers.2.self_attn.in_proj_weight",
|
| 325 |
+
"decoder.left_decoder.layers.2.self_attn.in_proj_bias",
|
| 326 |
+
"decoder.left_decoder.layers.2.self_attn.out_proj.weight",
|
| 327 |
+
"decoder.left_decoder.layers.2.self_attn.out_proj.bias",
|
| 328 |
+
"decoder.left_decoder.layers.2.multihead_attn.in_proj_weight",
|
| 329 |
+
"decoder.left_decoder.layers.2.multihead_attn.in_proj_bias",
|
| 330 |
+
"decoder.left_decoder.layers.2.multihead_attn.out_proj.weight",
|
| 331 |
+
"decoder.left_decoder.layers.2.multihead_attn.out_proj.bias",
|
| 332 |
+
"decoder.left_decoder.layers.2.linear1.weight",
|
| 333 |
+
"decoder.left_decoder.layers.2.linear1.bias",
|
| 334 |
+
"decoder.left_decoder.layers.2.linear2.weight",
|
| 335 |
+
"decoder.left_decoder.layers.2.linear2.bias",
|
| 336 |
+
"decoder.left_decoder.layers.2.norm1.weight",
|
| 337 |
+
"decoder.left_decoder.layers.2.norm1.bias",
|
| 338 |
+
"decoder.left_decoder.layers.2.norm2.weight",
|
| 339 |
+
"decoder.left_decoder.layers.2.norm2.bias",
|
| 340 |
+
"decoder.left_decoder.layers.2.norm3.weight",
|
| 341 |
+
"decoder.left_decoder.layers.2.norm3.bias",
|
| 342 |
+
"decoder.left_decoder.layers.3.self_attn.in_proj_weight",
|
| 343 |
+
"decoder.left_decoder.layers.3.self_attn.in_proj_bias",
|
| 344 |
+
"decoder.left_decoder.layers.3.self_attn.out_proj.weight",
|
| 345 |
+
"decoder.left_decoder.layers.3.self_attn.out_proj.bias",
|
| 346 |
+
"decoder.left_decoder.layers.3.multihead_attn.in_proj_weight",
|
| 347 |
+
"decoder.left_decoder.layers.3.multihead_attn.in_proj_bias",
|
| 348 |
+
"decoder.left_decoder.layers.3.multihead_attn.out_proj.weight",
|
| 349 |
+
"decoder.left_decoder.layers.3.multihead_attn.out_proj.bias",
|
| 350 |
+
"decoder.left_decoder.layers.3.linear1.weight",
|
| 351 |
+
"decoder.left_decoder.layers.3.linear1.bias",
|
| 352 |
+
"decoder.left_decoder.layers.3.linear2.weight",
|
| 353 |
+
"decoder.left_decoder.layers.3.linear2.bias",
|
| 354 |
+
"decoder.left_decoder.layers.3.norm1.weight",
|
| 355 |
+
"decoder.left_decoder.layers.3.norm1.bias",
|
| 356 |
+
"decoder.left_decoder.layers.3.norm2.weight",
|
| 357 |
+
"decoder.left_decoder.layers.3.norm2.bias",
|
| 358 |
+
"decoder.left_decoder.layers.3.norm3.weight",
|
| 359 |
+
"decoder.left_decoder.layers.3.norm3.bias",
|
| 360 |
+
"decoder.proposal_queries.weight",
|
| 361 |
+
"decoder.arm_identity.weight",
|
| 362 |
+
"decoder.phase_adapter.weight",
|
| 363 |
+
"decoder.phase_adapter.bias",
|
| 364 |
+
"decoder.role_adapter.weight",
|
| 365 |
+
"decoder.role_adapter.bias",
|
| 366 |
+
"decoder.context_proj.0.weight",
|
| 367 |
+
"decoder.context_proj.0.bias",
|
| 368 |
+
"decoder.context_proj.1.weight",
|
| 369 |
+
"decoder.context_proj.1.bias",
|
| 370 |
+
"decoder.right_mean.weight",
|
| 371 |
+
"decoder.right_mean.bias",
|
| 372 |
+
"decoder.right_log_std.weight",
|
| 373 |
+
"decoder.right_log_std.bias",
|
| 374 |
+
"decoder.left_mean.weight",
|
| 375 |
+
"decoder.left_mean.bias",
|
| 376 |
+
"decoder.left_log_std.weight",
|
| 377 |
+
"decoder.left_log_std.bias",
|
| 378 |
+
"decoder.proposal_score.1.weight",
|
| 379 |
+
"decoder.proposal_score.1.bias",
|
| 380 |
+
"decoder.proposal_score.3.weight",
|
| 381 |
+
"decoder.proposal_score.3.bias",
|
| 382 |
+
"interaction_head.interaction_queries",
|
| 383 |
+
"interaction_head.interaction_attention.in_proj_weight",
|
| 384 |
+
"interaction_head.interaction_attention.in_proj_bias",
|
| 385 |
+
"interaction_head.interaction_attention.out_proj.weight",
|
| 386 |
+
"interaction_head.interaction_attention.out_proj.bias",
|
| 387 |
+
"interaction_head.interaction_mlp.0.weight",
|
| 388 |
+
"interaction_head.interaction_mlp.0.bias",
|
| 389 |
+
"interaction_head.interaction_mlp.1.weight",
|
| 390 |
+
"interaction_head.interaction_mlp.1.bias",
|
| 391 |
+
"interaction_head.interaction_mlp.3.weight",
|
| 392 |
+
"interaction_head.interaction_mlp.3.bias",
|
| 393 |
+
"interaction_head.decoder.field_queries",
|
| 394 |
+
"interaction_head.decoder.field_attention.in_proj_weight",
|
| 395 |
+
"interaction_head.decoder.field_attention.in_proj_bias",
|
| 396 |
+
"interaction_head.decoder.field_attention.out_proj.weight",
|
| 397 |
+
"interaction_head.decoder.field_attention.out_proj.bias",
|
| 398 |
+
"interaction_head.decoder.field_mlp.0.weight",
|
| 399 |
+
"interaction_head.decoder.field_mlp.0.bias",
|
| 400 |
+
"interaction_head.decoder.field_mlp.1.weight",
|
| 401 |
+
"interaction_head.decoder.field_mlp.1.bias",
|
| 402 |
+
"interaction_head.decoder.field_mlp.3.weight",
|
| 403 |
+
"interaction_head.decoder.field_mlp.3.bias",
|
| 404 |
+
"interaction_head.decoder.summary_proj.0.weight",
|
| 405 |
+
"interaction_head.decoder.summary_proj.0.bias",
|
| 406 |
+
"interaction_head.decoder.summary_proj.1.weight",
|
| 407 |
+
"interaction_head.decoder.summary_proj.1.bias",
|
| 408 |
+
"interaction_head.decoder.phase_head.0.weight",
|
| 409 |
+
"interaction_head.decoder.phase_head.0.bias",
|
| 410 |
+
"interaction_head.decoder.phase_head.1.weight",
|
| 411 |
+
"interaction_head.decoder.phase_head.1.bias",
|
| 412 |
+
"interaction_head.decoder.phase_head.3.weight",
|
| 413 |
+
"interaction_head.decoder.phase_head.3.bias",
|
| 414 |
+
"interaction_head.decoder.arm_role_head.0.weight",
|
| 415 |
+
"interaction_head.decoder.arm_role_head.0.bias",
|
| 416 |
+
"interaction_head.decoder.arm_role_head.1.weight",
|
| 417 |
+
"interaction_head.decoder.arm_role_head.1.bias",
|
| 418 |
+
"interaction_head.decoder.arm_role_head.3.weight",
|
| 419 |
+
"interaction_head.decoder.arm_role_head.3.bias",
|
| 420 |
+
"interaction_head.decoder.arm_identity.weight",
|
| 421 |
+
"interaction_head.decoder.support_mode.0.weight",
|
| 422 |
+
"interaction_head.decoder.support_mode.0.bias",
|
| 423 |
+
"interaction_head.decoder.support_mode.1.weight",
|
| 424 |
+
"interaction_head.decoder.support_mode.1.bias",
|
| 425 |
+
"interaction_head.decoder.support_mode.3.weight",
|
| 426 |
+
"interaction_head.decoder.support_mode.3.bias",
|
| 427 |
+
"interaction_head.decoder.target_field.weight",
|
| 428 |
+
"interaction_head.decoder.target_field.bias",
|
| 429 |
+
"interaction_head.decoder.actor_feasibility_field.weight",
|
| 430 |
+
"interaction_head.decoder.actor_feasibility_field.bias",
|
| 431 |
+
"interaction_head.decoder.persistence_field.weight",
|
| 432 |
+
"interaction_head.decoder.persistence_field.bias",
|
| 433 |
+
"interaction_head.decoder.risk_field.weight",
|
| 434 |
+
"interaction_head.decoder.risk_field.bias",
|
| 435 |
+
"interaction_head.decoder.uncertainty_field.weight",
|
| 436 |
+
"interaction_head.decoder.uncertainty_field.bias",
|
| 437 |
+
"interaction_head.decoder.compat_access_field.weight",
|
| 438 |
+
"interaction_head.decoder.compat_access_field.bias",
|
| 439 |
+
"interaction_head.decoder.compat_persistence.weight",
|
| 440 |
+
"interaction_head.decoder.compat_persistence.bias",
|
| 441 |
+
"interaction_head.decoder.reocclusion_head.0.weight",
|
| 442 |
+
"interaction_head.decoder.reocclusion_head.0.bias",
|
| 443 |
+
"interaction_head.decoder.reocclusion_head.1.weight",
|
| 444 |
+
"interaction_head.decoder.reocclusion_head.1.bias",
|
| 445 |
+
"interaction_head.decoder.reocclusion_head.3.weight",
|
| 446 |
+
"interaction_head.decoder.reocclusion_head.3.bias",
|
| 447 |
+
"world_model.action_encoder.0.weight",
|
| 448 |
+
"world_model.action_encoder.0.bias",
|
| 449 |
+
"world_model.action_encoder.1.weight",
|
| 450 |
+
"world_model.action_encoder.1.bias",
|
| 451 |
+
"world_model.transition.layers.0.self_attn.in_proj_weight",
|
| 452 |
+
"world_model.transition.layers.0.self_attn.in_proj_bias",
|
| 453 |
+
"world_model.transition.layers.0.self_attn.out_proj.weight",
|
| 454 |
+
"world_model.transition.layers.0.self_attn.out_proj.bias",
|
| 455 |
+
"world_model.transition.layers.0.linear1.weight",
|
| 456 |
+
"world_model.transition.layers.0.linear1.bias",
|
| 457 |
+
"world_model.transition.layers.0.linear2.weight",
|
| 458 |
+
"world_model.transition.layers.0.linear2.bias",
|
| 459 |
+
"world_model.transition.layers.0.norm1.weight",
|
| 460 |
+
"world_model.transition.layers.0.norm1.bias",
|
| 461 |
+
"world_model.transition.layers.0.norm2.weight",
|
| 462 |
+
"world_model.transition.layers.0.norm2.bias",
|
| 463 |
+
"world_model.transition.layers.1.self_attn.in_proj_weight",
|
| 464 |
+
"world_model.transition.layers.1.self_attn.in_proj_bias",
|
| 465 |
+
"world_model.transition.layers.1.self_attn.out_proj.weight",
|
| 466 |
+
"world_model.transition.layers.1.self_attn.out_proj.bias",
|
| 467 |
+
"world_model.transition.layers.1.linear1.weight",
|
| 468 |
+
"world_model.transition.layers.1.linear1.bias",
|
| 469 |
+
"world_model.transition.layers.1.linear2.weight",
|
| 470 |
+
"world_model.transition.layers.1.linear2.bias",
|
| 471 |
+
"world_model.transition.layers.1.norm1.weight",
|
| 472 |
+
"world_model.transition.layers.1.norm1.bias",
|
| 473 |
+
"world_model.transition.layers.1.norm2.weight",
|
| 474 |
+
"world_model.transition.layers.1.norm2.bias",
|
| 475 |
+
"world_model.token_update.0.weight",
|
| 476 |
+
"world_model.token_update.0.bias",
|
| 477 |
+
"world_model.token_update.1.weight",
|
| 478 |
+
"world_model.token_update.1.bias",
|
| 479 |
+
"world_model.token_update.3.weight",
|
| 480 |
+
"world_model.token_update.3.bias",
|
| 481 |
+
"world_model.decoder.field_queries",
|
| 482 |
+
"world_model.decoder.field_attention.in_proj_weight",
|
| 483 |
+
"world_model.decoder.field_attention.in_proj_bias",
|
| 484 |
+
"world_model.decoder.field_attention.out_proj.weight",
|
| 485 |
+
"world_model.decoder.field_attention.out_proj.bias",
|
| 486 |
+
"world_model.decoder.field_mlp.0.weight",
|
| 487 |
+
"world_model.decoder.field_mlp.0.bias",
|
| 488 |
+
"world_model.decoder.field_mlp.1.weight",
|
| 489 |
+
"world_model.decoder.field_mlp.1.bias",
|
| 490 |
+
"world_model.decoder.field_mlp.3.weight",
|
| 491 |
+
"world_model.decoder.field_mlp.3.bias",
|
| 492 |
+
"world_model.decoder.summary_proj.0.weight",
|
| 493 |
+
"world_model.decoder.summary_proj.0.bias",
|
| 494 |
+
"world_model.decoder.summary_proj.1.weight",
|
| 495 |
+
"world_model.decoder.summary_proj.1.bias",
|
| 496 |
+
"world_model.decoder.phase_head.0.weight",
|
| 497 |
+
"world_model.decoder.phase_head.0.bias",
|
| 498 |
+
"world_model.decoder.phase_head.1.weight",
|
| 499 |
+
"world_model.decoder.phase_head.1.bias",
|
| 500 |
+
"world_model.decoder.phase_head.3.weight",
|
| 501 |
+
"world_model.decoder.phase_head.3.bias",
|
| 502 |
+
"world_model.decoder.arm_role_head.0.weight",
|
| 503 |
+
"world_model.decoder.arm_role_head.0.bias",
|
| 504 |
+
"world_model.decoder.arm_role_head.1.weight",
|
| 505 |
+
"world_model.decoder.arm_role_head.1.bias",
|
| 506 |
+
"world_model.decoder.arm_role_head.3.weight",
|
| 507 |
+
"world_model.decoder.arm_role_head.3.bias",
|
| 508 |
+
"world_model.decoder.arm_identity.weight",
|
| 509 |
+
"world_model.decoder.support_mode.0.weight",
|
| 510 |
+
"world_model.decoder.support_mode.0.bias",
|
| 511 |
+
"world_model.decoder.support_mode.1.weight",
|
| 512 |
+
"world_model.decoder.support_mode.1.bias",
|
| 513 |
+
"world_model.decoder.support_mode.3.weight",
|
| 514 |
+
"world_model.decoder.support_mode.3.bias",
|
| 515 |
+
"world_model.decoder.target_field.weight",
|
| 516 |
+
"world_model.decoder.target_field.bias",
|
| 517 |
+
"world_model.decoder.actor_feasibility_field.weight",
|
| 518 |
+
"world_model.decoder.actor_feasibility_field.bias",
|
| 519 |
+
"world_model.decoder.persistence_field.weight",
|
| 520 |
+
"world_model.decoder.persistence_field.bias",
|
| 521 |
+
"world_model.decoder.risk_field.weight",
|
| 522 |
+
"world_model.decoder.risk_field.bias",
|
| 523 |
+
"world_model.decoder.uncertainty_field.weight",
|
| 524 |
+
"world_model.decoder.uncertainty_field.bias",
|
| 525 |
+
"world_model.decoder.compat_access_field.weight",
|
| 526 |
+
"world_model.decoder.compat_access_field.bias",
|
| 527 |
+
"world_model.decoder.compat_persistence.weight",
|
| 528 |
+
"world_model.decoder.compat_persistence.bias",
|
| 529 |
+
"world_model.decoder.reocclusion_head.0.weight",
|
| 530 |
+
"world_model.decoder.reocclusion_head.0.bias",
|
| 531 |
+
"world_model.decoder.reocclusion_head.1.weight",
|
| 532 |
+
"world_model.decoder.reocclusion_head.1.bias",
|
| 533 |
+
"world_model.decoder.reocclusion_head.3.weight",
|
| 534 |
+
"world_model.decoder.reocclusion_head.3.bias",
|
| 535 |
+
"planner.cls_token",
|
| 536 |
+
"planner.step_proj.0.weight",
|
| 537 |
+
"planner.step_proj.0.bias",
|
| 538 |
+
"planner.step_proj.1.weight",
|
| 539 |
+
"planner.step_proj.1.bias",
|
| 540 |
+
"planner.sequence_encoder.layers.0.self_attn.in_proj_weight",
|
| 541 |
+
"planner.sequence_encoder.layers.0.self_attn.in_proj_bias",
|
| 542 |
+
"planner.sequence_encoder.layers.0.self_attn.out_proj.weight",
|
| 543 |
+
"planner.sequence_encoder.layers.0.self_attn.out_proj.bias",
|
| 544 |
+
"planner.sequence_encoder.layers.0.linear1.weight",
|
| 545 |
+
"planner.sequence_encoder.layers.0.linear1.bias",
|
| 546 |
+
"planner.sequence_encoder.layers.0.linear2.weight",
|
| 547 |
+
"planner.sequence_encoder.layers.0.linear2.bias",
|
| 548 |
+
"planner.sequence_encoder.layers.0.norm1.weight",
|
| 549 |
+
"planner.sequence_encoder.layers.0.norm1.bias",
|
| 550 |
+
"planner.sequence_encoder.layers.0.norm2.weight",
|
| 551 |
+
"planner.sequence_encoder.layers.0.norm2.bias",
|
| 552 |
+
"planner.sequence_encoder.layers.1.self_attn.in_proj_weight",
|
| 553 |
+
"planner.sequence_encoder.layers.1.self_attn.in_proj_bias",
|
| 554 |
+
"planner.sequence_encoder.layers.1.self_attn.out_proj.weight",
|
| 555 |
+
"planner.sequence_encoder.layers.1.self_attn.out_proj.bias",
|
| 556 |
+
"planner.sequence_encoder.layers.1.linear1.weight",
|
| 557 |
+
"planner.sequence_encoder.layers.1.linear1.bias",
|
| 558 |
+
"planner.sequence_encoder.layers.1.linear2.weight",
|
| 559 |
+
"planner.sequence_encoder.layers.1.linear2.bias",
|
| 560 |
+
"planner.sequence_encoder.layers.1.norm1.weight",
|
| 561 |
+
"planner.sequence_encoder.layers.1.norm1.bias",
|
| 562 |
+
"planner.sequence_encoder.layers.1.norm2.weight",
|
| 563 |
+
"planner.sequence_encoder.layers.1.norm2.bias",
|
| 564 |
+
"planner.success_head.weight",
|
| 565 |
+
"planner.success_head.bias",
|
| 566 |
+
"planner.risk_head.weight",
|
| 567 |
+
"planner.risk_head.bias",
|
| 568 |
+
"planner.score_head.weight",
|
| 569 |
+
"planner.score_head.bias"
|
| 570 |
+
],
|
| 571 |
+
"unexpected_keys": []
|
| 572 |
+
}
|
| 573 |
+
}
|
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/config_resolved.yaml
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_recency_oracleft
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 6 |
+
init_strict: true
|
| 7 |
+
data:
|
| 8 |
+
proxies:
|
| 9 |
+
- foliage_proxy
|
| 10 |
+
- bag_proxy
|
| 11 |
+
- cloth_proxy
|
| 12 |
+
resolution: 96
|
| 13 |
+
train_episodes_per_proxy: 48
|
| 14 |
+
val_episodes_per_proxy: 16
|
| 15 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
|
| 16 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
|
| 17 |
+
rebuild_dataset: false
|
| 18 |
+
chunk_horizon: 8
|
| 19 |
+
rollout_horizon: 5
|
| 20 |
+
history_steps: 6
|
| 21 |
+
planner_candidates: 8
|
| 22 |
+
seed: 13
|
| 23 |
+
optim:
|
| 24 |
+
epochs: 8
|
| 25 |
+
batch_size: 16
|
| 26 |
+
num_workers: 0
|
| 27 |
+
lr: 0.0003
|
| 28 |
+
weight_decay: 0.0001
|
| 29 |
+
trainer:
|
| 30 |
+
policy_type: interaction_state
|
| 31 |
+
use_bf16: true
|
| 32 |
+
grad_clip_norm: 1.0
|
| 33 |
+
freeze_backbone: true
|
| 34 |
+
gradient_checkpointing: false
|
| 35 |
+
plan_during_train: true
|
| 36 |
+
plan_during_eval: true
|
| 37 |
+
support_mode_conditioning: true
|
| 38 |
+
planner_mode: trainable
|
| 39 |
+
policy:
|
| 40 |
+
backbone:
|
| 41 |
+
model_name: openai/clip-vit-base-patch32
|
| 42 |
+
hidden_dim: 128
|
| 43 |
+
max_text_tokens: 32
|
| 44 |
+
freeze_backbone: true
|
| 45 |
+
gradient_checkpointing: false
|
| 46 |
+
use_dummy_backbone: true
|
| 47 |
+
fusion:
|
| 48 |
+
hidden_dim: 128
|
| 49 |
+
num_cameras: 3
|
| 50 |
+
num_layers: 2
|
| 51 |
+
num_heads: 4
|
| 52 |
+
ff_dim: 256
|
| 53 |
+
dropout: 0.1
|
| 54 |
+
proprio_dim: 32
|
| 55 |
+
proprio_tokens: 1
|
| 56 |
+
memory:
|
| 57 |
+
hidden_dim: 128
|
| 58 |
+
action_dim: 14
|
| 59 |
+
history_steps: 6
|
| 60 |
+
num_layers: 2
|
| 61 |
+
dropout: 0.1
|
| 62 |
+
memory_bank_size: 4
|
| 63 |
+
num_heads: 4
|
| 64 |
+
max_history_steps: 8
|
| 65 |
+
decoder:
|
| 66 |
+
hidden_dim: 128
|
| 67 |
+
num_heads: 4
|
| 68 |
+
num_layers: 2
|
| 69 |
+
ff_dim: 256
|
| 70 |
+
dropout: 0.1
|
| 71 |
+
chunk_size: 8
|
| 72 |
+
action_dim: 14
|
| 73 |
+
arm_action_dim: 7
|
| 74 |
+
num_candidates: 8
|
| 75 |
+
num_phases: 5
|
| 76 |
+
num_arm_roles: 4
|
| 77 |
+
reveal_head:
|
| 78 |
+
hidden_dim: 128
|
| 79 |
+
num_support_modes: 3
|
| 80 |
+
num_approach_templates: 32
|
| 81 |
+
rollout_horizon: 5
|
| 82 |
+
belief_map_size: 32
|
| 83 |
+
field_size: 16
|
| 84 |
+
num_heads: 4
|
| 85 |
+
predict_belief_map: true
|
| 86 |
+
num_phases: 5
|
| 87 |
+
num_arm_roles: 4
|
| 88 |
+
num_interaction_tokens: 8
|
| 89 |
+
world_model:
|
| 90 |
+
hidden_dim: 128
|
| 91 |
+
action_dim: 14
|
| 92 |
+
num_support_modes: 3
|
| 93 |
+
num_approach_templates: 32
|
| 94 |
+
rollout_horizon: 5
|
| 95 |
+
field_size: 16
|
| 96 |
+
num_heads: 4
|
| 97 |
+
num_phases: 5
|
| 98 |
+
num_arm_roles: 4
|
| 99 |
+
num_interaction_tokens: 8
|
| 100 |
+
planner:
|
| 101 |
+
hidden_dim: 128
|
| 102 |
+
num_candidates: 8
|
| 103 |
+
action_dim: 14
|
| 104 |
+
num_support_modes: 3
|
| 105 |
+
utility_margin: 0.1
|
| 106 |
+
num_heads: 4
|
| 107 |
+
num_layers: 2
|
| 108 |
+
num_phases: 5
|
| 109 |
+
num_arm_roles: 4
|
| 110 |
+
loss_weights:
|
| 111 |
+
action: 1.0
|
| 112 |
+
phase: 0.1
|
| 113 |
+
arm_role: 0.15
|
| 114 |
+
support_mode: 0.1
|
| 115 |
+
corridor: 0.15
|
| 116 |
+
persistence: 0.05
|
| 117 |
+
disturbance: 0.05
|
| 118 |
+
world_model: 0.2
|
| 119 |
+
belief: 0.05
|
| 120 |
+
planner_success: 0.25
|
| 121 |
+
planner_risk: 0.1
|
| 122 |
+
planner_ranking: 0.2
|
| 123 |
+
proposal_reconstruction: 0.1
|
| 124 |
+
proposal_success: 0.15
|
| 125 |
+
proposal_ranking: 0.2
|
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/diagnostics/proxy_diagnostics.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"planner_top1_accuracy": 0.2824427480916031,
|
| 3 |
+
"planner_regret": 0.24119873344898224,
|
| 4 |
+
"risk_calibration_mse": 0.009003574028611183,
|
| 5 |
+
"role_collapse_rate": 0.0,
|
| 6 |
+
"num_samples": 131
|
| 7 |
+
}
|
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/metrics.json
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"epoch": 0,
|
| 4 |
+
"train": {
|
| 5 |
+
"action": 0.019621784721190732,
|
| 6 |
+
"arm_role": 1.4238473445023677e-05,
|
| 7 |
+
"belief": 0.10273545235395432,
|
| 8 |
+
"corridor": 0.1970261943837007,
|
| 9 |
+
"disturbance": 0.0021691546814205744,
|
| 10 |
+
"persistence": 1.1530707913140457,
|
| 11 |
+
"phase": 0.40012874578436214,
|
| 12 |
+
"planner_ranking": 1.0832201441129048,
|
| 13 |
+
"planner_risk": 0.009513227792922407,
|
| 14 |
+
"planner_success": 0.44357747708757717,
|
| 15 |
+
"proposal_ranking": 1.1302440961201985,
|
| 16 |
+
"proposal_reconstruction": 0.06888884957879782,
|
| 17 |
+
"proposal_success": 0.6472248112161955,
|
| 18 |
+
"reocclusion": 0.23694051212320724,
|
| 19 |
+
"support_mode": 0.0007155667990446091,
|
| 20 |
+
"total": 1.1227939675251644,
|
| 21 |
+
"uncertainty": 0.00357946046278812,
|
| 22 |
+
"world_model": 1.5367356936136882
|
| 23 |
+
},
|
| 24 |
+
"val": {
|
| 25 |
+
"action": 0.01623468690862258,
|
| 26 |
+
"arm_role": 1.7815142427934916e-06,
|
| 27 |
+
"belief": 0.09838261952002843,
|
| 28 |
+
"corridor": 0.19904182685746086,
|
| 29 |
+
"disturbance": 0.0012887230906118122,
|
| 30 |
+
"persistence": 1.2435127298037212,
|
| 31 |
+
"phase": 0.451065621442265,
|
| 32 |
+
"planner_ranking": 1.1237382623884413,
|
| 33 |
+
"planner_risk": 0.00879605039436784,
|
| 34 |
+
"planner_success": 0.5527588526407877,
|
| 35 |
+
"proposal_ranking": 1.1329045295715332,
|
| 36 |
+
"proposal_reconstruction": 0.062247288723786674,
|
| 37 |
+
"proposal_success": 0.6402903331650628,
|
| 38 |
+
"reocclusion": 0.19368870432178178,
|
| 39 |
+
"support_mode": 4.385759530123323e-05,
|
| 40 |
+
"total": 1.1563972632090251,
|
| 41 |
+
"uncertainty": 0.003620341523653931,
|
| 42 |
+
"world_model": 1.507298681471083
|
| 43 |
+
}
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"epoch": 1,
|
| 47 |
+
"train": {
|
| 48 |
+
"action": 0.015102950584453842,
|
| 49 |
+
"arm_role": 7.929694329315377e-07,
|
| 50 |
+
"belief": 0.09922042830536763,
|
| 51 |
+
"corridor": 0.1954052426541845,
|
| 52 |
+
"disturbance": 0.0012760455817139398,
|
| 53 |
+
"persistence": 1.1133080422878265,
|
| 54 |
+
"phase": 0.40078286826610565,
|
| 55 |
+
"planner_ranking": 1.0535631676514943,
|
| 56 |
+
"planner_risk": 0.009265869099181145,
|
| 57 |
+
"planner_success": 0.41223976016044617,
|
| 58 |
+
"proposal_ranking": 1.130059376358986,
|
| 59 |
+
"proposal_reconstruction": 0.061719981798281275,
|
| 60 |
+
"proposal_success": 0.6374408900737762,
|
| 61 |
+
"reocclusion": 0.22066612169146538,
|
| 62 |
+
"support_mode": 3.152040555202499e-05,
|
| 63 |
+
"total": 1.0627698848644893,
|
| 64 |
+
"uncertainty": 0.002360584529621216,
|
| 65 |
+
"world_model": 1.3532413293917973
|
| 66 |
+
},
|
| 67 |
+
"val": {
|
| 68 |
+
"action": 0.014689018225504292,
|
| 69 |
+
"arm_role": 5.302327663356563e-07,
|
| 70 |
+
"belief": 0.09588906251721913,
|
| 71 |
+
"corridor": 0.19485984411504534,
|
| 72 |
+
"disturbance": 0.0013201889879484144,
|
| 73 |
+
"persistence": 1.211418045891656,
|
| 74 |
+
"phase": 0.4520965864260991,
|
| 75 |
+
"planner_ranking": 1.1366683509614732,
|
| 76 |
+
"planner_risk": 0.009635515045374632,
|
| 77 |
+
"planner_success": 0.5696005490091112,
|
| 78 |
+
"proposal_ranking": 1.1199064254760742,
|
| 79 |
+
"proposal_reconstruction": 0.06043942438231574,
|
| 80 |
+
"proposal_success": 0.6389325261116028,
|
| 81 |
+
"reocclusion": 0.18208894692361355,
|
| 82 |
+
"support_mode": 1.9065460340546753e-05,
|
| 83 |
+
"total": 1.1511138545142279,
|
| 84 |
+
"uncertainty": 0.0020409094027450513,
|
| 85 |
+
"world_model": 1.481640590561761
|
| 86 |
+
}
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"epoch": 2,
|
| 90 |
+
"train": {
|
| 91 |
+
"action": 0.013718575122766197,
|
| 92 |
+
"arm_role": 3.225997922129409e-07,
|
| 93 |
+
"belief": 0.09694493561983109,
|
| 94 |
+
"corridor": 0.19660565722733736,
|
| 95 |
+
"disturbance": 0.0012764433243622382,
|
| 96 |
+
"persistence": 1.1350401155650616,
|
| 97 |
+
"phase": 0.4035409850378831,
|
| 98 |
+
"planner_ranking": 1.0236077308654785,
|
| 99 |
+
"planner_risk": 0.009166777638408044,
|
| 100 |
+
"planner_success": 0.3850418192644914,
|
| 101 |
+
"proposal_ranking": 1.129315584897995,
|
| 102 |
+
"proposal_reconstruction": 0.06000282304982344,
|
| 103 |
+
"proposal_success": 0.6322548364599546,
|
| 104 |
+
"reocclusion": 0.22824073505277434,
|
| 105 |
+
"support_mode": 1.4410975078741709e-05,
|
| 106 |
+
"total": 1.0505772059162457,
|
| 107 |
+
"uncertainty": 0.001883886650224061,
|
| 108 |
+
"world_model": 1.3608256032069523
|
| 109 |
+
},
|
| 110 |
+
"val": {
|
| 111 |
+
"action": 0.015656203031539917,
|
| 112 |
+
"arm_role": 3.1802936541048945e-07,
|
| 113 |
+
"belief": 0.09277311464150746,
|
| 114 |
+
"corridor": 0.19478923082351685,
|
| 115 |
+
"disturbance": 0.001490643351442284,
|
| 116 |
+
"persistence": 1.2428188456429377,
|
| 117 |
+
"phase": 0.44441814886199105,
|
| 118 |
+
"planner_ranking": 1.1642935540941026,
|
| 119 |
+
"planner_risk": 0.008580206893384457,
|
| 120 |
+
"planner_success": 0.5712412032816145,
|
| 121 |
+
"proposal_ranking": 1.1173533731036716,
|
| 122 |
+
"proposal_reconstruction": 0.060967493802309036,
|
| 123 |
+
"proposal_success": 0.6362337801191542,
|
| 124 |
+
"reocclusion": 0.18877888905505338,
|
| 125 |
+
"support_mode": 8.590733412145508e-06,
|
| 126 |
+
"total": 1.164333701133728,
|
| 127 |
+
"uncertainty": 0.0018725828914385703,
|
| 128 |
+
"world_model": 1.5141921705669827
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"epoch": 3,
|
| 133 |
+
"train": {
|
| 134 |
+
"action": 0.013463407677287856,
|
| 135 |
+
"arm_role": 2.1441115283238332e-07,
|
| 136 |
+
"belief": 0.09542769007384777,
|
| 137 |
+
"corridor": 0.19438757871588072,
|
| 138 |
+
"disturbance": 0.0012542814802145585,
|
| 139 |
+
"persistence": 1.0960917932291825,
|
| 140 |
+
"phase": 0.39501943811774254,
|
| 141 |
+
"planner_ranking": 0.9881478076179823,
|
| 142 |
+
"planner_risk": 0.008955476262296239,
|
| 143 |
+
"planner_success": 0.37380507588386536,
|
| 144 |
+
"proposal_ranking": 1.1260421325763066,
|
| 145 |
+
"proposal_reconstruction": 0.05954852948586146,
|
| 146 |
+
"proposal_success": 0.6345230092604955,
|
| 147 |
+
"reocclusion": 0.222653156456848,
|
| 148 |
+
"support_mode": 1.0468997061252594e-05,
|
| 149 |
+
"total": 1.029868942995866,
|
| 150 |
+
"uncertainty": 0.001529014749394264,
|
| 151 |
+
"world_model": 1.3265959272782009
|
| 152 |
+
},
|
| 153 |
+
"val": {
|
| 154 |
+
"action": 0.014502381595472494,
|
| 155 |
+
"arm_role": 1.8074554909554132e-07,
|
| 156 |
+
"belief": 0.09227573540475634,
|
| 157 |
+
"corridor": 0.19471332927544913,
|
| 158 |
+
"disturbance": 0.0014174091500333613,
|
| 159 |
+
"persistence": 1.2068392270141177,
|
| 160 |
+
"phase": 0.44181974563333726,
|
| 161 |
+
"planner_ranking": 1.1894211106830173,
|
| 162 |
+
"planner_risk": 0.008801783072865672,
|
| 163 |
+
"planner_success": 0.5882998870478736,
|
| 164 |
+
"proposal_ranking": 1.1234880420896742,
|
| 165 |
+
"proposal_reconstruction": 0.06003963781727685,
|
| 166 |
+
"proposal_success": 0.6316338512632582,
|
| 167 |
+
"reocclusion": 0.18501534023218685,
|
| 168 |
+
"support_mode": 1.0471259353532028e-05,
|
| 169 |
+
"total": 1.1484977669186063,
|
| 170 |
+
"uncertainty": 0.0011159069642114143,
|
| 171 |
+
"world_model": 1.402906020482381
|
| 172 |
+
}
|
| 173 |
+
},
|
| 174 |
+
{
|
| 175 |
+
"epoch": 4,
|
| 176 |
+
"train": {
|
| 177 |
+
"action": 0.013384843982445696,
|
| 178 |
+
"arm_role": 1.8212530328298726e-07,
|
| 179 |
+
"belief": 0.0940939641247193,
|
| 180 |
+
"corridor": 0.19484392801920572,
|
| 181 |
+
"disturbance": 0.0013209530419165578,
|
| 182 |
+
"persistence": 1.1009935376544793,
|
| 183 |
+
"phase": 0.39735961332917213,
|
| 184 |
+
"planner_ranking": 0.9340380703409513,
|
| 185 |
+
"planner_risk": 0.009769223863258958,
|
| 186 |
+
"planner_success": 0.35210378592212993,
|
| 187 |
+
"proposal_ranking": 1.1286269277334213,
|
| 188 |
+
"proposal_reconstruction": 0.05935003887861967,
|
| 189 |
+
"proposal_success": 0.6315460602442423,
|
| 190 |
+
"reocclusion": 0.22644051164388657,
|
| 191 |
+
"support_mode": 8.073221484513246e-06,
|
| 192 |
+
"total": 1.0135142927368481,
|
| 193 |
+
"uncertainty": 0.0014145106833893806,
|
| 194 |
+
"world_model": 1.3229995171229045
|
| 195 |
+
},
|
| 196 |
+
"val": {
|
| 197 |
+
"action": 0.014795408584177494,
|
| 198 |
+
"arm_role": 2.842257956893314e-07,
|
| 199 |
+
"belief": 0.09148034122255114,
|
| 200 |
+
"corridor": 0.1952296942472458,
|
| 201 |
+
"disturbance": 0.0014219412179146376,
|
| 202 |
+
"persistence": 1.2065883709324732,
|
| 203 |
+
"phase": 0.4573909127049976,
|
| 204 |
+
"planner_ranking": 1.264210171169705,
|
| 205 |
+
"planner_risk": 0.008240946154627535,
|
| 206 |
+
"planner_success": 0.6136878695752885,
|
| 207 |
+
"proposal_ranking": 1.1302801105711195,
|
| 208 |
+
"proposal_reconstruction": 0.06015601671404309,
|
| 209 |
+
"proposal_success": 0.6339429616928101,
|
| 210 |
+
"reocclusion": 0.18241143381843963,
|
| 211 |
+
"support_mode": 5.932560725341318e-06,
|
| 212 |
+
"total": 1.1784167952007718,
|
| 213 |
+
"uncertainty": 0.0014715428373569415,
|
| 214 |
+
"world_model": 1.4285426007376776
|
| 215 |
+
}
|
| 216 |
+
},
|
| 217 |
+
{
|
| 218 |
+
"epoch": 5,
|
| 219 |
+
"train": {
|
| 220 |
+
"action": 0.013003619310135642,
|
| 221 |
+
"arm_role": 1.6706892166003703e-07,
|
| 222 |
+
"belief": 0.09372370348622401,
|
| 223 |
+
"corridor": 0.19377528379360834,
|
| 224 |
+
"disturbance": 0.0012515889684436843,
|
| 225 |
+
"persistence": 1.087764959782362,
|
| 226 |
+
"phase": 0.39413714533050853,
|
| 227 |
+
"planner_ranking": 0.8574716374278069,
|
| 228 |
+
"planner_risk": 0.00931960518937558,
|
| 229 |
+
"planner_success": 0.32699467862645787,
|
| 230 |
+
"proposal_ranking": 1.1296403209368389,
|
| 231 |
+
"proposal_reconstruction": 0.058937749825417995,
|
| 232 |
+
"proposal_success": 0.6314020653565725,
|
| 233 |
+
"reocclusion": 0.22137584226826826,
|
| 234 |
+
"support_mode": 6.786340643808823e-06,
|
| 235 |
+
"total": 0.9859138304988543,
|
| 236 |
+
"uncertainty": 0.0011173486830860686,
|
| 237 |
+
"world_model": 1.3007333129644394
|
| 238 |
+
},
|
| 239 |
+
"val": {
|
| 240 |
+
"action": 0.014327830738491483,
|
| 241 |
+
"arm_role": 2.553892981538297e-07,
|
| 242 |
+
"belief": 0.0923299789428711,
|
| 243 |
+
"corridor": 0.19848757651117113,
|
| 244 |
+
"disturbance": 0.0011894687777385116,
|
| 245 |
+
"persistence": 1.2340974575943418,
|
| 246 |
+
"phase": 0.4644339034954707,
|
| 247 |
+
"planner_ranking": 1.3578486972384982,
|
| 248 |
+
"planner_risk": 0.009015874264554845,
|
| 249 |
+
"planner_success": 0.6275921530193753,
|
| 250 |
+
"proposal_ranking": 1.1163699362013075,
|
| 251 |
+
"proposal_reconstruction": 0.05984223840965165,
|
| 252 |
+
"proposal_success": 0.6348666879865859,
|
| 253 |
+
"reocclusion": 0.20307053801500136,
|
| 254 |
+
"support_mode": 3.7443181150188643e-06,
|
| 255 |
+
"total": 1.2028855217827692,
|
| 256 |
+
"uncertainty": 0.0018055843215228783,
|
| 257 |
+
"world_model": 1.4401142862108018
|
| 258 |
+
}
|
| 259 |
+
},
|
| 260 |
+
{
|
| 261 |
+
"epoch": 6,
|
| 262 |
+
"train": {
|
| 263 |
+
"action": 0.012725909279348949,
|
| 264 |
+
"arm_role": 1.4006056699618816e-07,
|
| 265 |
+
"belief": 0.09327782255907853,
|
| 266 |
+
"corridor": 0.19324024704595408,
|
| 267 |
+
"disturbance": 0.0013581588767313708,
|
| 268 |
+
"persistence": 1.0872996002435684,
|
| 269 |
+
"phase": 0.3942833219965299,
|
| 270 |
+
"planner_ranking": 0.8039915859699249,
|
| 271 |
+
"planner_risk": 0.009058927069418132,
|
| 272 |
+
"planner_success": 0.3132968743642171,
|
| 273 |
+
"proposal_ranking": 1.1225138505299885,
|
| 274 |
+
"proposal_reconstruction": 0.058770577888935804,
|
| 275 |
+
"proposal_success": 0.6332228208581606,
|
| 276 |
+
"reocclusion": 0.22015962299580374,
|
| 277 |
+
"support_mode": 4.966122408707936e-06,
|
| 278 |
+
"total": 0.9676197816928228,
|
| 279 |
+
"uncertainty": 0.0011598596538533457,
|
| 280 |
+
"world_model": 1.2878785928090413
|
| 281 |
+
},
|
| 282 |
+
"val": {
|
| 283 |
+
"action": 0.014496596633560128,
|
| 284 |
+
"arm_role": 2.192401922229692e-07,
|
| 285 |
+
"belief": 0.090823319223192,
|
| 286 |
+
"corridor": 0.19339712626404232,
|
| 287 |
+
"disturbance": 0.0016455024532559845,
|
| 288 |
+
"persistence": 1.2035431563854218,
|
| 289 |
+
"phase": 0.45077220764425063,
|
| 290 |
+
"planner_ranking": 1.4061412149005466,
|
| 291 |
+
"planner_risk": 0.008559927913463779,
|
| 292 |
+
"planner_success": 0.6576948232120938,
|
| 293 |
+
"proposal_ranking": 1.115302946832445,
|
| 294 |
+
"proposal_reconstruction": 0.059833423958884344,
|
| 295 |
+
"proposal_success": 0.6364065806070963,
|
| 296 |
+
"reocclusion": 0.1801526459554831,
|
| 297 |
+
"support_mode": 4.350292038503136e-06,
|
| 298 |
+
"total": 1.2042852375242445,
|
| 299 |
+
"uncertainty": 0.0007912304588697023,
|
| 300 |
+
"world_model": 1.3813848230573866
|
| 301 |
+
}
|
| 302 |
+
},
|
| 303 |
+
{
|
| 304 |
+
"epoch": 7,
|
| 305 |
+
"train": {
|
| 306 |
+
"action": 0.012622703972738236,
|
| 307 |
+
"arm_role": 1.0477378964424133e-07,
|
| 308 |
+
"belief": 0.09258855165292819,
|
| 309 |
+
"corridor": 0.19252262574930987,
|
| 310 |
+
"disturbance": 0.0013018598037888296,
|
| 311 |
+
"persistence": 1.066667130837838,
|
| 312 |
+
"phase": 0.3908200403675437,
|
| 313 |
+
"planner_ranking": 0.7140753443042437,
|
| 314 |
+
"planner_risk": 0.009592532160847137,
|
| 315 |
+
"planner_success": 0.2998263432333867,
|
| 316 |
+
"proposal_ranking": 1.125225270787875,
|
| 317 |
+
"proposal_reconstruction": 0.05837386598189672,
|
| 318 |
+
"proposal_success": 0.630388061205546,
|
| 319 |
+
"reocclusion": 0.21032434065515795,
|
| 320 |
+
"support_mode": 4.515569240008214e-06,
|
| 321 |
+
"total": 0.9377426480253538,
|
| 322 |
+
"uncertainty": 0.0009036514068914888,
|
| 323 |
+
"world_model": 1.2537205666303635
|
| 324 |
+
},
|
| 325 |
+
"val": {
|
| 326 |
+
"action": 0.01393873720533318,
|
| 327 |
+
"arm_role": 1.9219735413066195e-07,
|
| 328 |
+
"belief": 0.09070102870464325,
|
| 329 |
+
"corridor": 0.19287915196683672,
|
| 330 |
+
"disturbance": 0.0018688688416861826,
|
| 331 |
+
"persistence": 1.1937838825914595,
|
| 332 |
+
"phase": 0.44357551468743217,
|
| 333 |
+
"planner_ranking": 1.4278014368481107,
|
| 334 |
+
"planner_risk": 0.007952027747200595,
|
| 335 |
+
"planner_success": 0.6735637684663137,
|
| 336 |
+
"proposal_ranking": 1.121930678685506,
|
| 337 |
+
"proposal_reconstruction": 0.059713507278098,
|
| 338 |
+
"proposal_success": 0.6304158767064413,
|
| 339 |
+
"reocclusion": 0.17670889291912317,
|
| 340 |
+
"support_mode": 4.034886008715451e-06,
|
| 341 |
+
"total": 1.211510909928216,
|
| 342 |
+
"uncertainty": 0.0006605643041742345,
|
| 343 |
+
"world_model": 1.383787711461385
|
| 344 |
+
}
|
| 345 |
+
}
|
| 346 |
+
]
|
artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/summary.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"experiment_name": "proxy_interaction_state_recency_oracleft",
|
| 3 |
+
"device": "cuda",
|
| 4 |
+
"best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt",
|
| 5 |
+
"final_train_total": 0.9377426480253538,
|
| 6 |
+
"final_val_total": 1.211510909928216,
|
| 7 |
+
"num_train_samples": 380,
|
| 8 |
+
"num_val_samples": 131,
|
| 9 |
+
"planner_mode": "trainable",
|
| 10 |
+
"frozen_modules": [],
|
| 11 |
+
"init_info": {
|
| 12 |
+
"path": "/workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt",
|
| 13 |
+
"missing_keys": [],
|
| 14 |
+
"unexpected_keys": []
|
| 15 |
+
}
|
| 16 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backbone_clip": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.2916666666666667,
|
| 5 |
+
"bag_proxy": 0.4166666666666667,
|
| 6 |
+
"cloth_proxy": 0.2916666666666667
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.3333333333333333,
|
| 9 |
+
"visibility_integral": 5.090650259620613,
|
| 10 |
+
"corridor_availability": 0.30186899772120845,
|
| 11 |
+
"reocclusion_rate": 0.013541666666666667,
|
| 12 |
+
"persistence_horizon_mae": 0.0,
|
| 13 |
+
"disturbance_cost": 0.36051484262053335
|
| 14 |
+
},
|
| 15 |
+
"reveal_clip": {
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"foliage_proxy": 0.20833333333333334,
|
| 18 |
+
"bag_proxy": 0.25,
|
| 19 |
+
"cloth_proxy": 0.16666666666666666
|
| 20 |
+
},
|
| 21 |
+
"mean_success": 0.20833333333333334,
|
| 22 |
+
"visibility_integral": 48.42640474935373,
|
| 23 |
+
"corridor_availability": 0.8251730443702804,
|
| 24 |
+
"reocclusion_rate": 0.06718750000000001,
|
| 25 |
+
"persistence_horizon_mae": 0.9353625932762888,
|
| 26 |
+
"disturbance_cost": 0.7097413324647479
|
| 27 |
+
}
|
| 28 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_clip_baselines_commit4/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## backbone_clip
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.333
|
| 6 |
+
- visibility_integral: 5.091
|
| 7 |
+
- corridor_availability: 0.302
|
| 8 |
+
- reocclusion_rate: 0.014
|
| 9 |
+
- persistence_horizon_mae: 0.000
|
| 10 |
+
- disturbance_cost: 0.361
|
| 11 |
+
- foliage_proxy_success: 0.292
|
| 12 |
+
- bag_proxy_success: 0.417
|
| 13 |
+
- cloth_proxy_success: 0.292
|
| 14 |
+
|
| 15 |
+
## reveal_clip
|
| 16 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
|
| 17 |
+
- mean_success: 0.208
|
| 18 |
+
- visibility_integral: 48.426
|
| 19 |
+
- corridor_availability: 0.825
|
| 20 |
+
- reocclusion_rate: 0.067
|
| 21 |
+
- persistence_horizon_mae: 0.935
|
| 22 |
+
- disturbance_cost: 0.710
|
| 23 |
+
- foliage_proxy_success: 0.208
|
| 24 |
+
- bag_proxy_success: 0.250
|
| 25 |
+
- cloth_proxy_success: 0.167
|
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5416666666666666,
|
| 6 |
+
"cloth_proxy": 0.5833333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5138888888888888,
|
| 9 |
+
"visibility_integral": 32.35977659953965,
|
| 10 |
+
"corridor_availability": 0.8802236508991983,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.1419724687506017,
|
| 13 |
+
"disturbance_cost": 0.49480460506553453
|
| 14 |
+
},
|
| 15 |
+
"backbone": {
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"foliage_proxy": 0.4166666666666667,
|
| 18 |
+
"bag_proxy": 0.5833333333333334,
|
| 19 |
+
"cloth_proxy": 0.625
|
| 20 |
+
},
|
| 21 |
+
"mean_success": 0.5416666666666666,
|
| 22 |
+
"visibility_integral": 30.58145251042313,
|
| 23 |
+
"corridor_availability": 0.8679845299985673,
|
| 24 |
+
"reocclusion_rate": 0.0,
|
| 25 |
+
"persistence_horizon_mae": 0.0,
|
| 26 |
+
"disturbance_cost": 0.47382067630274427
|
| 27 |
+
},
|
| 28 |
+
"reveal": {
|
| 29 |
+
"per_task_success": {
|
| 30 |
+
"foliage_proxy": 0.4166666666666667,
|
| 31 |
+
"bag_proxy": 0.5833333333333334,
|
| 32 |
+
"cloth_proxy": 0.6666666666666666
|
| 33 |
+
},
|
| 34 |
+
"mean_success": 0.5555555555555555,
|
| 35 |
+
"visibility_integral": 29.508656750122707,
|
| 36 |
+
"corridor_availability": 0.8612986240122054,
|
| 37 |
+
"reocclusion_rate": 0.0,
|
| 38 |
+
"persistence_horizon_mae": 2.3659667054579057,
|
| 39 |
+
"disturbance_cost": 0.47035404020506477
|
| 40 |
+
}
|
| 41 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_commit2_compare/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.514
|
| 6 |
+
- visibility_integral: 32.360
|
| 7 |
+
- corridor_availability: 0.880
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.142
|
| 10 |
+
- disturbance_cost: 0.495
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.583
|
| 14 |
+
|
| 15 |
+
## backbone
|
| 16 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
|
| 17 |
+
- mean_success: 0.542
|
| 18 |
+
- visibility_integral: 30.581
|
| 19 |
+
- corridor_availability: 0.868
|
| 20 |
+
- reocclusion_rate: 0.000
|
| 21 |
+
- persistence_horizon_mae: 0.000
|
| 22 |
+
- disturbance_cost: 0.474
|
| 23 |
+
- foliage_proxy_success: 0.417
|
| 24 |
+
- bag_proxy_success: 0.583
|
| 25 |
+
- cloth_proxy_success: 0.625
|
| 26 |
+
|
| 27 |
+
## reveal
|
| 28 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
|
| 29 |
+
- mean_success: 0.556
|
| 30 |
+
- visibility_integral: 29.509
|
| 31 |
+
- corridor_availability: 0.861
|
| 32 |
+
- reocclusion_rate: 0.000
|
| 33 |
+
- persistence_horizon_mae: 2.366
|
| 34 |
+
- disturbance_cost: 0.470
|
| 35 |
+
- foliage_proxy_success: 0.417
|
| 36 |
+
- bag_proxy_success: 0.583
|
| 37 |
+
- cloth_proxy_success: 0.667
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5416666666666666,
|
| 6 |
+
"cloth_proxy": 0.625
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5277777777777778,
|
| 9 |
+
"visibility_integral": 32.95856812927458,
|
| 10 |
+
"corridor_availability": 0.8741476759314537,
|
| 11 |
+
"reocclusion_rate": 0.0006944444444444445,
|
| 12 |
+
"persistence_horizon_mae": 1.1703627435402033,
|
| 13 |
+
"disturbance_cost": 0.42908077666329014
|
| 14 |
+
}
|
| 15 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_actionhist_recency_evalonly/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.528
|
| 6 |
+
- visibility_integral: 32.959
|
| 7 |
+
- corridor_availability: 0.874
|
| 8 |
+
- reocclusion_rate: 0.001
|
| 9 |
+
- persistence_horizon_mae: 1.170
|
| 10 |
+
- disturbance_cost: 0.429
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.542
|
| 13 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction_clip": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.2916666666666667,
|
| 5 |
+
"bag_proxy": 0.2916666666666667,
|
| 6 |
+
"cloth_proxy": 0.3333333333333333
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.3055555555555556,
|
| 9 |
+
"visibility_integral": 10.379729785852962,
|
| 10 |
+
"corridor_availability": 0.38910322284532917,
|
| 11 |
+
"reocclusion_rate": 0.026909722222222224,
|
| 12 |
+
"persistence_horizon_mae": 3.8014686041765726,
|
| 13 |
+
"disturbance_cost": 0.392014082081409
|
| 14 |
+
},
|
| 15 |
+
"backbone_clip": {
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"foliage_proxy": 0.2916666666666667,
|
| 18 |
+
"bag_proxy": 0.4166666666666667,
|
| 19 |
+
"cloth_proxy": 0.2916666666666667
|
| 20 |
+
},
|
| 21 |
+
"mean_success": 0.3333333333333333,
|
| 22 |
+
"visibility_integral": 5.090670637786388,
|
| 23 |
+
"corridor_availability": 0.30186899772120845,
|
| 24 |
+
"reocclusion_rate": 0.013541666666666667,
|
| 25 |
+
"persistence_horizon_mae": 0.0,
|
| 26 |
+
"disturbance_cost": 0.36051381931045196
|
| 27 |
+
},
|
| 28 |
+
"reveal_clip": {
|
| 29 |
+
"per_task_success": {
|
| 30 |
+
"foliage_proxy": 0.20833333333333334,
|
| 31 |
+
"bag_proxy": 0.25,
|
| 32 |
+
"cloth_proxy": 0.16666666666666666
|
| 33 |
+
},
|
| 34 |
+
"mean_success": 0.20833333333333334,
|
| 35 |
+
"visibility_integral": 48.426281129320465,
|
| 36 |
+
"corridor_availability": 0.8251730443702804,
|
| 37 |
+
"reocclusion_rate": 0.06718750000000001,
|
| 38 |
+
"persistence_horizon_mae": 0.9353624902194482,
|
| 39 |
+
"disturbance_cost": 0.709741123020649
|
| 40 |
+
}
|
| 41 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_clip_commit4_compare/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction_clip
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_clip_actionhist/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.306
|
| 6 |
+
- visibility_integral: 10.380
|
| 7 |
+
- corridor_availability: 0.389
|
| 8 |
+
- reocclusion_rate: 0.027
|
| 9 |
+
- persistence_horizon_mae: 3.801
|
| 10 |
+
- disturbance_cost: 0.392
|
| 11 |
+
- foliage_proxy_success: 0.292
|
| 12 |
+
- bag_proxy_success: 0.292
|
| 13 |
+
- cloth_proxy_success: 0.333
|
| 14 |
+
|
| 15 |
+
## backbone_clip
|
| 16 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 17 |
+
- mean_success: 0.333
|
| 18 |
+
- visibility_integral: 5.091
|
| 19 |
+
- corridor_availability: 0.302
|
| 20 |
+
- reocclusion_rate: 0.014
|
| 21 |
+
- persistence_horizon_mae: 0.000
|
| 22 |
+
- disturbance_cost: 0.361
|
| 23 |
+
- foliage_proxy_success: 0.292
|
| 24 |
+
- bag_proxy_success: 0.417
|
| 25 |
+
- cloth_proxy_success: 0.292
|
| 26 |
+
|
| 27 |
+
## reveal_clip
|
| 28 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
|
| 29 |
+
- mean_success: 0.208
|
| 30 |
+
- visibility_integral: 48.426
|
| 31 |
+
- corridor_availability: 0.825
|
| 32 |
+
- reocclusion_rate: 0.067
|
| 33 |
+
- persistence_horizon_mae: 0.935
|
| 34 |
+
- disturbance_cost: 0.710
|
| 35 |
+
- foliage_proxy_success: 0.208
|
| 36 |
+
- bag_proxy_success: 0.250
|
| 37 |
+
- cloth_proxy_success: 0.167
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"interaction": {
|
| 3 |
+
"per_task_success": {
|
| 4 |
+
"foliage_proxy": 0.4166666666666667,
|
| 5 |
+
"bag_proxy": 0.5833333333333334,
|
| 6 |
+
"cloth_proxy": 0.5833333333333334
|
| 7 |
+
},
|
| 8 |
+
"mean_success": 0.5277777777777778,
|
| 9 |
+
"visibility_integral": 31.56379758318265,
|
| 10 |
+
"corridor_availability": 0.8745781282583872,
|
| 11 |
+
"reocclusion_rate": 0.0,
|
| 12 |
+
"persistence_horizon_mae": 1.0204093086471828,
|
| 13 |
+
"disturbance_cost": 0.4148087627771828
|
| 14 |
+
},
|
| 15 |
+
"backbone": {
|
| 16 |
+
"per_task_success": {
|
| 17 |
+
"foliage_proxy": 0.4166666666666667,
|
| 18 |
+
"bag_proxy": 0.625,
|
| 19 |
+
"cloth_proxy": 0.6666666666666666
|
| 20 |
+
},
|
| 21 |
+
"mean_success": 0.5694444444444445,
|
| 22 |
+
"visibility_integral": 28.655961725446915,
|
| 23 |
+
"corridor_availability": 0.7943478326002756,
|
| 24 |
+
"reocclusion_rate": 0.07666819352674617,
|
| 25 |
+
"persistence_horizon_mae": 0.0,
|
| 26 |
+
"disturbance_cost": 0.3941483147856262
|
| 27 |
+
},
|
| 28 |
+
"reveal": {
|
| 29 |
+
"per_task_success": {
|
| 30 |
+
"foliage_proxy": 0.4166666666666667,
|
| 31 |
+
"bag_proxy": 0.5833333333333334,
|
| 32 |
+
"cloth_proxy": 0.625
|
| 33 |
+
},
|
| 34 |
+
"mean_success": 0.5416666666666666,
|
| 35 |
+
"visibility_integral": 30.121625943316353,
|
| 36 |
+
"corridor_availability": 0.8142780106928613,
|
| 37 |
+
"reocclusion_rate": 0.051547468734968724,
|
| 38 |
+
"persistence_horizon_mae": 2.102369644222497,
|
| 39 |
+
"disturbance_cost": 0.42389609825073016
|
| 40 |
+
}
|
| 41 |
+
}
|
artifacts/outputs/interaction_debug/reveal_eval_interaction_recency_oracleft_commit4_compare/reveal_benchmark.md
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reveal Proxy Benchmark
|
| 2 |
+
|
| 3 |
+
## interaction
|
| 4 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_recency_oracleft/checkpoint_best.pt
|
| 5 |
+
- mean_success: 0.528
|
| 6 |
+
- visibility_integral: 31.564
|
| 7 |
+
- corridor_availability: 0.875
|
| 8 |
+
- reocclusion_rate: 0.000
|
| 9 |
+
- persistence_horizon_mae: 1.020
|
| 10 |
+
- disturbance_cost: 0.415
|
| 11 |
+
- foliage_proxy_success: 0.417
|
| 12 |
+
- bag_proxy_success: 0.583
|
| 13 |
+
- cloth_proxy_success: 0.583
|
| 14 |
+
|
| 15 |
+
## backbone
|
| 16 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
|
| 17 |
+
- mean_success: 0.569
|
| 18 |
+
- visibility_integral: 28.656
|
| 19 |
+
- corridor_availability: 0.794
|
| 20 |
+
- reocclusion_rate: 0.077
|
| 21 |
+
- persistence_horizon_mae: 0.000
|
| 22 |
+
- disturbance_cost: 0.394
|
| 23 |
+
- foliage_proxy_success: 0.417
|
| 24 |
+
- bag_proxy_success: 0.625
|
| 25 |
+
- cloth_proxy_success: 0.667
|
| 26 |
+
|
| 27 |
+
## reveal
|
| 28 |
+
- checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
|
| 29 |
+
- mean_success: 0.542
|
| 30 |
+
- visibility_integral: 30.122
|
| 31 |
+
- corridor_availability: 0.814
|
| 32 |
+
- reocclusion_rate: 0.052
|
| 33 |
+
- persistence_horizon_mae: 2.102
|
| 34 |
+
- disturbance_cost: 0.424
|
| 35 |
+
- foliage_proxy_success: 0.417
|
| 36 |
+
- bag_proxy_success: 0.583
|
| 37 |
+
- cloth_proxy_success: 0.625
|
artifacts/outputs/interaction_debug/smoke_checks_actionhist/smoke_checks.json
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"proxy": {
|
| 3 |
+
"losses": {
|
| 4 |
+
"action": 0.7160568833351135,
|
| 5 |
+
"phase": 1.4088108539581299,
|
| 6 |
+
"arm_role": 1.5109761953353882,
|
| 7 |
+
"support_mode": 1.3240256309509277,
|
| 8 |
+
"corridor": 0.6577756404876709,
|
| 9 |
+
"persistence": 1.7929389476776123,
|
| 10 |
+
"disturbance": 0.17437878251075745,
|
| 11 |
+
"belief": 0.5924442410469055,
|
| 12 |
+
"reocclusion": 0.780390202999115,
|
| 13 |
+
"uncertainty": 0.7132201194763184,
|
| 14 |
+
"world_model": 3.2925755977630615,
|
| 15 |
+
"planner_success": 0.7400866150856018,
|
| 16 |
+
"planner_risk": 0.032505519688129425,
|
| 17 |
+
"planner_ranking": 0.2076394408941269,
|
| 18 |
+
"proposal_reconstruction": 0.8539058566093445,
|
| 19 |
+
"proposal_success": 0.6675869226455688,
|
| 20 |
+
"proposal_ranking": 0.12013991177082062,
|
| 21 |
+
"total": 1.9776512384414673
|
| 22 |
+
},
|
| 23 |
+
"grad_norm": 3.735501527786255,
|
| 24 |
+
"candidate_shape": [
|
| 25 |
+
2,
|
| 26 |
+
4,
|
| 27 |
+
4,
|
| 28 |
+
14
|
| 29 |
+
],
|
| 30 |
+
"rollout_phase_shape": [
|
| 31 |
+
2,
|
| 32 |
+
4,
|
| 33 |
+
4,
|
| 34 |
+
5
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"rlbench": {
|
| 38 |
+
"losses": {
|
| 39 |
+
"action": 0.6058900356292725,
|
| 40 |
+
"world_model": 0.0,
|
| 41 |
+
"planner_success": 0.0,
|
| 42 |
+
"planner_risk": 0.0,
|
| 43 |
+
"planner_ranking": 0.0,
|
| 44 |
+
"proposal_reconstruction": 0.0,
|
| 45 |
+
"proposal_success": 0.0,
|
| 46 |
+
"proposal_ranking": 0.0,
|
| 47 |
+
"total": 0.6058900356292725
|
| 48 |
+
},
|
| 49 |
+
"grad_norm": 2.581531286239624,
|
| 50 |
+
"candidate_shape": [
|
| 51 |
+
2,
|
| 52 |
+
4,
|
| 53 |
+
4,
|
| 54 |
+
14
|
| 55 |
+
],
|
| 56 |
+
"rollout_phase_shape": [
|
| 57 |
+
2,
|
| 58 |
+
4,
|
| 59 |
+
4,
|
| 60 |
+
5
|
| 61 |
+
],
|
| 62 |
+
"planner_enabled_for_eval": true,
|
| 63 |
+
"frozen_modules": [
|
| 64 |
+
"interaction_head",
|
| 65 |
+
"world_model",
|
| 66 |
+
"planner"
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
"policy_config": {
|
| 70 |
+
"backbone": {
|
| 71 |
+
"model_name": "openai/clip-vit-base-patch32",
|
| 72 |
+
"hidden_dim": 64,
|
| 73 |
+
"max_text_tokens": 32,
|
| 74 |
+
"freeze_backbone": true,
|
| 75 |
+
"gradient_checkpointing": false,
|
| 76 |
+
"use_dummy_backbone": true
|
| 77 |
+
},
|
| 78 |
+
"fusion": {
|
| 79 |
+
"hidden_dim": 64,
|
| 80 |
+
"num_cameras": 3,
|
| 81 |
+
"num_layers": 2,
|
| 82 |
+
"num_heads": 4,
|
| 83 |
+
"ff_dim": 128,
|
| 84 |
+
"dropout": 0.1,
|
| 85 |
+
"proprio_dim": 32,
|
| 86 |
+
"proprio_tokens": 1
|
| 87 |
+
},
|
| 88 |
+
"memory": {
|
| 89 |
+
"hidden_dim": 64,
|
| 90 |
+
"action_dim": 14,
|
| 91 |
+
"history_steps": 6,
|
| 92 |
+
"num_layers": 2,
|
| 93 |
+
"dropout": 0.1,
|
| 94 |
+
"memory_bank_size": 4,
|
| 95 |
+
"num_heads": 4,
|
| 96 |
+
"max_history_steps": 8
|
| 97 |
+
},
|
| 98 |
+
"decoder": {
|
| 99 |
+
"hidden_dim": 64,
|
| 100 |
+
"num_heads": 4,
|
| 101 |
+
"num_layers": 2,
|
| 102 |
+
"ff_dim": 128,
|
| 103 |
+
"dropout": 0.1,
|
| 104 |
+
"chunk_size": 4,
|
| 105 |
+
"action_dim": 14,
|
| 106 |
+
"arm_action_dim": 7,
|
| 107 |
+
"num_candidates": 4,
|
| 108 |
+
"num_phases": 5,
|
| 109 |
+
"num_arm_roles": 4
|
| 110 |
+
},
|
| 111 |
+
"reveal_head": {
|
| 112 |
+
"hidden_dim": 64,
|
| 113 |
+
"num_support_modes": 3,
|
| 114 |
+
"num_approach_templates": 32,
|
| 115 |
+
"rollout_horizon": 3,
|
| 116 |
+
"belief_map_size": 32,
|
| 117 |
+
"field_size": 16,
|
| 118 |
+
"num_heads": 4,
|
| 119 |
+
"predict_belief_map": true,
|
| 120 |
+
"num_phases": 5,
|
| 121 |
+
"num_arm_roles": 4,
|
| 122 |
+
"num_interaction_tokens": 8
|
| 123 |
+
},
|
| 124 |
+
"world_model": {
|
| 125 |
+
"hidden_dim": 64,
|
| 126 |
+
"action_dim": 14,
|
| 127 |
+
"num_support_modes": 3,
|
| 128 |
+
"num_approach_templates": 32,
|
| 129 |
+
"rollout_horizon": 3,
|
| 130 |
+
"field_size": 16,
|
| 131 |
+
"num_heads": 4,
|
| 132 |
+
"num_phases": 5,
|
| 133 |
+
"num_arm_roles": 4,
|
| 134 |
+
"num_interaction_tokens": 8,
|
| 135 |
+
"belief_map_size": 32,
|
| 136 |
+
"predict_belief_map": true
|
| 137 |
+
},
|
| 138 |
+
"planner": {
|
| 139 |
+
"hidden_dim": 64,
|
| 140 |
+
"num_candidates": 4,
|
| 141 |
+
"action_dim": 14,
|
| 142 |
+
"num_support_modes": 3,
|
| 143 |
+
"utility_margin": 0.1,
|
| 144 |
+
"corridor_weight": 1.0,
|
| 145 |
+
"persistence_weight": 0.5,
|
| 146 |
+
"proposal_weight": 0.5,
|
| 147 |
+
"task_progress_weight": 0.75,
|
| 148 |
+
"disturbance_weight": 0.75,
|
| 149 |
+
"reocclusion_weight": 0.5,
|
| 150 |
+
"visibility_weight": 0.25,
|
| 151 |
+
"num_heads": 4,
|
| 152 |
+
"num_layers": 2,
|
| 153 |
+
"num_phases": 5,
|
| 154 |
+
"num_arm_roles": 4
|
| 155 |
+
}
|
| 156 |
+
}
|
| 157 |
+
}
|
code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py
CHANGED
|
@@ -62,6 +62,7 @@ def main() -> None:
|
|
| 62 |
texts=moved["texts"],
|
| 63 |
history_images=moved.get("history_images"),
|
| 64 |
history_proprio=moved.get("history_proprio"),
|
|
|
|
| 65 |
plan=True,
|
| 66 |
candidate_chunks_override=moved["candidate_action_chunks"],
|
| 67 |
)
|
|
|
|
| 62 |
texts=moved["texts"],
|
| 63 |
history_images=moved.get("history_images"),
|
| 64 |
history_proprio=moved.get("history_proprio"),
|
| 65 |
+
history_actions=moved.get("history_actions"),
|
| 66 |
plan=True,
|
| 67 |
candidate_chunks_override=moved["candidate_action_chunks"],
|
| 68 |
)
|
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py
CHANGED
|
@@ -53,7 +53,18 @@ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch
|
|
| 53 |
policy_config = _policy_config_from_dict(checkpoint["policy_config"])
|
| 54 |
trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
|
| 55 |
model = build_policy(policy_config, trainer_config).to(device)
|
| 56 |
-
model.load_state_dict(checkpoint["state_dict"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
model.eval()
|
| 58 |
return model, checkpoint
|
| 59 |
|
|
@@ -63,11 +74,13 @@ def _prepare_batch(
|
|
| 63 |
device: torch.device,
|
| 64 |
history_images: list[np.ndarray] | None = None,
|
| 65 |
history_proprio: list[np.ndarray] | None = None,
|
|
|
|
| 66 |
) -> dict[str, Any]:
|
| 67 |
images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
|
| 68 |
proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
|
| 69 |
history_images = history_images or []
|
| 70 |
history_proprio = history_proprio or []
|
|
|
|
| 71 |
if history_images:
|
| 72 |
history_images_tensor = (
|
| 73 |
torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
|
|
@@ -81,10 +94,15 @@ def _prepare_batch(
|
|
| 81 |
history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
|
| 82 |
else:
|
| 83 |
history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
return {
|
| 85 |
"images": images.to(device),
|
| 86 |
"history_images": history_images_tensor.to(device),
|
| 87 |
"history_proprio": history_proprio_tensor.to(device),
|
|
|
|
| 88 |
"proprio": proprio.to(device),
|
| 89 |
"texts": [observation["text"]],
|
| 90 |
}
|
|
@@ -109,6 +127,7 @@ def select_chunk(
|
|
| 109 |
"images": images,
|
| 110 |
"history_images": batch.get("history_images"),
|
| 111 |
"history_proprio": batch.get("history_proprio"),
|
|
|
|
| 112 |
"proprio": batch["proprio"],
|
| 113 |
"texts": batch["texts"],
|
| 114 |
}
|
|
@@ -153,6 +172,7 @@ def evaluate_model(
|
|
| 153 |
episodes: int,
|
| 154 |
resolution: int,
|
| 155 |
ablation: str | None = None,
|
|
|
|
| 156 |
) -> BenchmarkMetrics:
|
| 157 |
per_task_success: dict[str, float] = {}
|
| 158 |
visibility_scores = []
|
|
@@ -176,6 +196,7 @@ def evaluate_model(
|
|
| 176 |
episode_disturbance = [float(privileged_state["disturbance_cost"])]
|
| 177 |
history_images: list[np.ndarray] = []
|
| 178 |
history_proprio: list[np.ndarray] = []
|
|
|
|
| 179 |
done = False
|
| 180 |
while not done:
|
| 181 |
batch = _prepare_batch(
|
|
@@ -183,20 +204,10 @@ def evaluate_model(
|
|
| 183 |
device=device,
|
| 184 |
history_images=history_images,
|
| 185 |
history_proprio=history_proprio,
|
|
|
|
| 186 |
)
|
| 187 |
with torch.no_grad():
|
| 188 |
chunk, outputs = select_chunk(model, batch, ablation=ablation)
|
| 189 |
-
action = chunk[0, 0].detach().cpu().numpy()
|
| 190 |
-
if history_steps > 0:
|
| 191 |
-
if len(history_images) >= history_steps:
|
| 192 |
-
history_images = history_images[-history_steps + 1 :]
|
| 193 |
-
history_proprio = history_proprio[-history_steps + 1 :]
|
| 194 |
-
history_images.append(observation["images"])
|
| 195 |
-
history_proprio.append(observation["proprio"])
|
| 196 |
-
observation, _, terminated, truncated, privileged_state = env.step(action)
|
| 197 |
-
episode_visibility.append(float(privileged_state["visibility"]))
|
| 198 |
-
episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
|
| 199 |
-
episode_disturbance.append(float(privileged_state["disturbance_cost"]))
|
| 200 |
state_output = outputs.get("interaction_state")
|
| 201 |
if state_output is None:
|
| 202 |
state_output = outputs.get("reveal_state")
|
|
@@ -207,7 +218,26 @@ def evaluate_model(
|
|
| 207 |
privileged_state["persistence_horizon"],
|
| 208 |
)
|
| 209 |
)
|
| 210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
successes.append(float(privileged_state["retrieval_success"]))
|
| 212 |
visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
|
| 213 |
corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
|
|
@@ -246,6 +276,7 @@ def main() -> None:
|
|
| 246 |
parser.add_argument("--ablation", default=None)
|
| 247 |
parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
|
| 248 |
parser.add_argument("--proxies", nargs="*", default=None)
|
|
|
|
| 249 |
args = parser.parse_args()
|
| 250 |
|
| 251 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -266,6 +297,7 @@ def main() -> None:
|
|
| 266 |
episodes=args.episodes,
|
| 267 |
resolution=resolution,
|
| 268 |
ablation=args.ablation,
|
|
|
|
| 269 |
)
|
| 270 |
raw_metrics[label] = _metrics_to_dict(metrics)
|
| 271 |
sections[label] = {
|
|
|
|
| 53 |
policy_config = _policy_config_from_dict(checkpoint["policy_config"])
|
| 54 |
trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
|
| 55 |
model = build_policy(policy_config, trainer_config).to(device)
|
| 56 |
+
incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
|
| 57 |
+
allowed_missing = {
|
| 58 |
+
key
|
| 59 |
+
for key in incompatible.missing_keys
|
| 60 |
+
if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
|
| 61 |
+
}
|
| 62 |
+
missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
|
| 63 |
+
if missing_other or incompatible.unexpected_keys:
|
| 64 |
+
raise RuntimeError(
|
| 65 |
+
"Checkpoint load failed due to incompatible weights. "
|
| 66 |
+
f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
|
| 67 |
+
)
|
| 68 |
model.eval()
|
| 69 |
return model, checkpoint
|
| 70 |
|
|
|
|
| 74 |
device: torch.device,
|
| 75 |
history_images: list[np.ndarray] | None = None,
|
| 76 |
history_proprio: list[np.ndarray] | None = None,
|
| 77 |
+
history_actions: list[np.ndarray] | None = None,
|
| 78 |
) -> dict[str, Any]:
|
| 79 |
images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
|
| 80 |
proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
|
| 81 |
history_images = history_images or []
|
| 82 |
history_proprio = history_proprio or []
|
| 83 |
+
history_actions = history_actions or []
|
| 84 |
if history_images:
|
| 85 |
history_images_tensor = (
|
| 86 |
torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
|
|
|
|
| 94 |
history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
|
| 95 |
else:
|
| 96 |
history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
|
| 97 |
+
if history_actions:
|
| 98 |
+
history_actions_tensor = torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).float()
|
| 99 |
+
else:
|
| 100 |
+
history_actions_tensor = torch.zeros((1, 0, 14), dtype=torch.float32)
|
| 101 |
return {
|
| 102 |
"images": images.to(device),
|
| 103 |
"history_images": history_images_tensor.to(device),
|
| 104 |
"history_proprio": history_proprio_tensor.to(device),
|
| 105 |
+
"history_actions": history_actions_tensor.to(device),
|
| 106 |
"proprio": proprio.to(device),
|
| 107 |
"texts": [observation["text"]],
|
| 108 |
}
|
|
|
|
| 127 |
"images": images,
|
| 128 |
"history_images": batch.get("history_images"),
|
| 129 |
"history_proprio": batch.get("history_proprio"),
|
| 130 |
+
"history_actions": batch.get("history_actions"),
|
| 131 |
"proprio": batch["proprio"],
|
| 132 |
"texts": batch["texts"],
|
| 133 |
}
|
|
|
|
| 172 |
episodes: int,
|
| 173 |
resolution: int,
|
| 174 |
ablation: str | None = None,
|
| 175 |
+
chunk_commit_steps: int | None = None,
|
| 176 |
) -> BenchmarkMetrics:
|
| 177 |
per_task_success: dict[str, float] = {}
|
| 178 |
visibility_scores = []
|
|
|
|
| 196 |
episode_disturbance = [float(privileged_state["disturbance_cost"])]
|
| 197 |
history_images: list[np.ndarray] = []
|
| 198 |
history_proprio: list[np.ndarray] = []
|
| 199 |
+
history_actions: list[np.ndarray] = []
|
| 200 |
done = False
|
| 201 |
while not done:
|
| 202 |
batch = _prepare_batch(
|
|
|
|
| 204 |
device=device,
|
| 205 |
history_images=history_images,
|
| 206 |
history_proprio=history_proprio,
|
| 207 |
+
history_actions=history_actions,
|
| 208 |
)
|
| 209 |
with torch.no_grad():
|
| 210 |
chunk, outputs = select_chunk(model, batch, ablation=ablation)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
state_output = outputs.get("interaction_state")
|
| 212 |
if state_output is None:
|
| 213 |
state_output = outputs.get("reveal_state")
|
|
|
|
| 218 |
privileged_state["persistence_horizon"],
|
| 219 |
)
|
| 220 |
)
|
| 221 |
+
chunk_np = chunk[0].detach().cpu().numpy()
|
| 222 |
+
commit_steps = chunk_np.shape[0] if chunk_commit_steps is None else min(chunk_commit_steps, chunk_np.shape[0])
|
| 223 |
+
for action in chunk_np[:commit_steps]:
|
| 224 |
+
if history_steps > 0:
|
| 225 |
+
if len(history_images) >= history_steps:
|
| 226 |
+
history_images = history_images[-history_steps + 1 :]
|
| 227 |
+
history_proprio = history_proprio[-history_steps + 1 :]
|
| 228 |
+
history_actions = history_actions[-history_steps + 1 :]
|
| 229 |
+
history_images.append(observation["images"])
|
| 230 |
+
history_proprio.append(observation["proprio"])
|
| 231 |
+
history_actions.append(action.astype(np.float32))
|
| 232 |
+
observation, _, terminated, truncated, privileged_state = env.step(action)
|
| 233 |
+
episode_visibility.append(float(privileged_state["visibility"]))
|
| 234 |
+
episode_corridor.append(
|
| 235 |
+
float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())
|
| 236 |
+
)
|
| 237 |
+
episode_disturbance.append(float(privileged_state["disturbance_cost"]))
|
| 238 |
+
done = bool(terminated or truncated)
|
| 239 |
+
if done:
|
| 240 |
+
break
|
| 241 |
successes.append(float(privileged_state["retrieval_success"]))
|
| 242 |
visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
|
| 243 |
corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
|
|
|
|
| 276 |
parser.add_argument("--ablation", default=None)
|
| 277 |
parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
|
| 278 |
parser.add_argument("--proxies", nargs="*", default=None)
|
| 279 |
+
parser.add_argument("--chunk-commit-steps", type=int, default=0)
|
| 280 |
args = parser.parse_args()
|
| 281 |
|
| 282 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
|
| 297 |
episodes=args.episodes,
|
| 298 |
resolution=resolution,
|
| 299 |
ablation=args.ablation,
|
| 300 |
+
chunk_commit_steps=(None if args.chunk_commit_steps <= 0 else args.chunk_commit_steps),
|
| 301 |
)
|
| 302 |
raw_metrics[label] = _metrics_to_dict(metrics)
|
| 303 |
sections[label] = {
|
code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py
CHANGED
|
@@ -52,17 +52,66 @@ def _episode_language_goal(descriptions: Sequence[str]) -> str:
|
|
| 52 |
return str(descriptions[0]) if descriptions else ""
|
| 53 |
|
| 54 |
|
| 55 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
total_reward = 0.0
|
| 57 |
done = False
|
| 58 |
next_obs = obs
|
|
|
|
|
|
|
| 59 |
for arm_name in ("right", "left"):
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
total_reward += float(reward)
|
| 63 |
if reward >= 1.0 or done:
|
| 64 |
break
|
| 65 |
-
return next_obs, total_reward, done
|
| 66 |
|
| 67 |
|
| 68 |
def main() -> None:
|
|
@@ -78,6 +127,7 @@ def main() -> None:
|
|
| 78 |
parser.add_argument("--allow-unsupervised-planning", action="store_true")
|
| 79 |
parser.add_argument("--disable-support-mode-conditioning", action="store_true")
|
| 80 |
parser.add_argument("--headless", action="store_true", default=True)
|
|
|
|
| 81 |
args = parser.parse_args()
|
| 82 |
|
| 83 |
checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
|
|
@@ -85,7 +135,18 @@ def main() -> None:
|
|
| 85 |
trainer_config = _trainer_config_from_checkpoint(checkpoint)
|
| 86 |
device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
|
| 87 |
model = build_policy(policy_config, trainer_config).to(device)
|
| 88 |
-
model.load_state_dict(checkpoint["state_dict"], strict=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
model.eval()
|
| 90 |
plan_requested = bool(args.plan)
|
| 91 |
plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
|
|
@@ -144,10 +205,14 @@ def main() -> None:
|
|
| 144 |
language_goal = _episode_language_goal(descriptions)
|
| 145 |
total_reward = 0.0
|
| 146 |
success = 0.0
|
|
|
|
|
|
|
| 147 |
history_images: list[np.ndarray] = []
|
| 148 |
history_proprio: list[np.ndarray] = []
|
|
|
|
| 149 |
history_steps = int(getattr(policy_config.memory, "history_steps", 0))
|
| 150 |
-
|
|
|
|
| 151 |
images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
|
| 152 |
proprio = torch.from_numpy(
|
| 153 |
bimanual_proprio_from_obs(
|
|
@@ -164,6 +229,9 @@ def main() -> None:
|
|
| 164 |
history_proprio_tensor = (
|
| 165 |
torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
|
| 166 |
)
|
|
|
|
|
|
|
|
|
|
| 167 |
else:
|
| 168 |
history_images_tensor = torch.zeros(
|
| 169 |
(1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
|
|
@@ -175,6 +243,11 @@ def main() -> None:
|
|
| 175 |
device=device,
|
| 176 |
dtype=proprio.dtype,
|
| 177 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
with torch.no_grad():
|
| 179 |
if policy_supports_planning(trainer_config.policy_type):
|
| 180 |
outputs = model(
|
|
@@ -183,6 +256,7 @@ def main() -> None:
|
|
| 183 |
texts=[language_goal],
|
| 184 |
history_images=history_images_tensor,
|
| 185 |
history_proprio=history_proprio_tensor,
|
|
|
|
| 186 |
plan=plan_applied,
|
| 187 |
support_mode_conditioning=not args.disable_support_mode_conditioning,
|
| 188 |
)
|
|
@@ -193,22 +267,40 @@ def main() -> None:
|
|
| 193 |
texts=[language_goal],
|
| 194 |
history_images=history_images_tensor,
|
| 195 |
history_proprio=history_proprio_tensor,
|
|
|
|
| 196 |
)
|
| 197 |
chosen_chunk = outputs["action_mean"]
|
| 198 |
if plan_applied and "planned_chunk" in outputs:
|
| 199 |
chosen_chunk = outputs["planned_chunk"]
|
| 200 |
-
|
| 201 |
-
if
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
if done or success >= 1.0:
|
| 213 |
break
|
| 214 |
task_successes.append(success)
|
|
@@ -217,6 +309,8 @@ def main() -> None:
|
|
| 217 |
"task_class": task_class.__name__,
|
| 218 |
"successes": task_successes,
|
| 219 |
"returns": task_returns,
|
|
|
|
|
|
|
| 220 |
"mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
|
| 221 |
"mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
|
| 222 |
}
|
|
|
|
| 52 |
return str(descriptions[0]) if descriptions else ""
|
| 53 |
|
| 54 |
|
| 55 |
+
def _noop_arm_action(obs: Any, arm_name: str) -> np.ndarray:
|
| 56 |
+
if arm_name == "right":
|
| 57 |
+
pose = np.asarray(obs.gripper_right_pose, dtype=np.float32)
|
| 58 |
+
gripper_open = float(obs.gripper_right_open)
|
| 59 |
+
elif arm_name == "left":
|
| 60 |
+
pose = np.asarray(obs.gripper_left_pose, dtype=np.float32)
|
| 61 |
+
gripper_open = float(obs.gripper_left_open)
|
| 62 |
+
else: # pragma: no cover - defensive guard
|
| 63 |
+
raise ValueError(f"Unsupported arm: {arm_name}")
|
| 64 |
+
return np.concatenate([pose, np.array([gripper_open, 1.0], dtype=np.float32)], axis=0)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _scaled_single_arm_delta(delta_action: np.ndarray, arm_name: str, scale: float) -> np.ndarray:
|
| 68 |
+
scaled = np.asarray(delta_action, dtype=np.float32).copy()
|
| 69 |
+
arm_index = {"right": 0, "left": 1}[arm_name]
|
| 70 |
+
offset = arm_index * 7
|
| 71 |
+
scaled[offset : offset + 6] *= float(scale)
|
| 72 |
+
return scaled
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _step_bimanual_chunk(task: Any, obs: Any, delta_action: np.ndarray) -> tuple[Any, float, bool, int, int]:
|
| 76 |
total_reward = 0.0
|
| 77 |
done = False
|
| 78 |
next_obs = obs
|
| 79 |
+
recovered_steps = 0
|
| 80 |
+
noop_fallbacks = 0
|
| 81 |
for arm_name in ("right", "left"):
|
| 82 |
+
reward = 0.0
|
| 83 |
+
last_error: Exception | None = None
|
| 84 |
+
stepped = False
|
| 85 |
+
for scale in (1.0, 0.5, 0.25, 0.1):
|
| 86 |
+
try:
|
| 87 |
+
env_action = single_arm_absolute_action_from_delta(
|
| 88 |
+
next_obs,
|
| 89 |
+
_scaled_single_arm_delta(delta_action, arm_name, scale),
|
| 90 |
+
arm_name,
|
| 91 |
+
ignore_collisions=True,
|
| 92 |
+
)
|
| 93 |
+
next_obs, reward, done = task.step(env_action, arm_name)
|
| 94 |
+
if scale < 1.0:
|
| 95 |
+
recovered_steps += 1
|
| 96 |
+
stepped = True
|
| 97 |
+
break
|
| 98 |
+
except Exception as exc: # pragma: no cover - live RLBench failure path
|
| 99 |
+
last_error = exc
|
| 100 |
+
if not stepped:
|
| 101 |
+
try:
|
| 102 |
+
next_obs, reward, done = task.step(_noop_arm_action(next_obs, arm_name), arm_name)
|
| 103 |
+
noop_fallbacks += 1
|
| 104 |
+
stepped = True
|
| 105 |
+
except Exception as exc: # pragma: no cover - live RLBench failure path
|
| 106 |
+
last_error = exc
|
| 107 |
+
if not stepped:
|
| 108 |
+
if last_error is not None:
|
| 109 |
+
raise last_error
|
| 110 |
+
raise RuntimeError(f"Failed to step arm '{arm_name}' for unknown reasons.")
|
| 111 |
total_reward += float(reward)
|
| 112 |
if reward >= 1.0 or done:
|
| 113 |
break
|
| 114 |
+
return next_obs, total_reward, done, recovered_steps, noop_fallbacks
|
| 115 |
|
| 116 |
|
| 117 |
def main() -> None:
|
|
|
|
| 127 |
parser.add_argument("--allow-unsupervised-planning", action="store_true")
|
| 128 |
parser.add_argument("--disable-support-mode-conditioning", action="store_true")
|
| 129 |
parser.add_argument("--headless", action="store_true", default=True)
|
| 130 |
+
parser.add_argument("--chunk-commit-steps", type=int, default=0)
|
| 131 |
args = parser.parse_args()
|
| 132 |
|
| 133 |
checkpoint = torch.load(Path(args.checkpoint), map_location="cpu", weights_only=False)
|
|
|
|
| 135 |
trainer_config = _trainer_config_from_checkpoint(checkpoint)
|
| 136 |
device = torch.device("cuda" if torch.cuda.is_available() and args.device == "cuda" else "cpu")
|
| 137 |
model = build_policy(policy_config, trainer_config).to(device)
|
| 138 |
+
incompatible = model.load_state_dict(checkpoint["state_dict"], strict=False)
|
| 139 |
+
allowed_missing = {
|
| 140 |
+
key
|
| 141 |
+
for key in incompatible.missing_keys
|
| 142 |
+
if key.startswith("memory.action_proj.") or key.endswith("arm_identity.weight")
|
| 143 |
+
}
|
| 144 |
+
missing_other = sorted(set(incompatible.missing_keys) - allowed_missing)
|
| 145 |
+
if missing_other or incompatible.unexpected_keys:
|
| 146 |
+
raise RuntimeError(
|
| 147 |
+
"Checkpoint load failed due to incompatible weights. "
|
| 148 |
+
f"Missing keys: {missing_other}. Unexpected keys: {list(incompatible.unexpected_keys)}"
|
| 149 |
+
)
|
| 150 |
model.eval()
|
| 151 |
plan_requested = bool(args.plan)
|
| 152 |
plan_applied = plan_requested and planner_enabled(trainer_config, during_eval=True)
|
|
|
|
| 205 |
language_goal = _episode_language_goal(descriptions)
|
| 206 |
total_reward = 0.0
|
| 207 |
success = 0.0
|
| 208 |
+
episode_recoveries = 0
|
| 209 |
+
episode_noop_fallbacks = 0
|
| 210 |
history_images: list[np.ndarray] = []
|
| 211 |
history_proprio: list[np.ndarray] = []
|
| 212 |
+
history_actions: list[np.ndarray] = []
|
| 213 |
history_steps = int(getattr(policy_config.memory, "history_steps", 0))
|
| 214 |
+
timestep = 0
|
| 215 |
+
while timestep < args.episode_length:
|
| 216 |
images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
|
| 217 |
proprio = torch.from_numpy(
|
| 218 |
bimanual_proprio_from_obs(
|
|
|
|
| 229 |
history_proprio_tensor = (
|
| 230 |
torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
|
| 231 |
)
|
| 232 |
+
history_actions_tensor = (
|
| 233 |
+
torch.from_numpy(np.stack(history_actions, axis=0)).unsqueeze(0).to(device)
|
| 234 |
+
)
|
| 235 |
else:
|
| 236 |
history_images_tensor = torch.zeros(
|
| 237 |
(1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
|
|
|
|
| 243 |
device=device,
|
| 244 |
dtype=proprio.dtype,
|
| 245 |
)
|
| 246 |
+
history_actions_tensor = torch.zeros(
|
| 247 |
+
(1, 0, policy_config.decoder.action_dim),
|
| 248 |
+
device=device,
|
| 249 |
+
dtype=proprio.dtype,
|
| 250 |
+
)
|
| 251 |
with torch.no_grad():
|
| 252 |
if policy_supports_planning(trainer_config.policy_type):
|
| 253 |
outputs = model(
|
|
|
|
| 256 |
texts=[language_goal],
|
| 257 |
history_images=history_images_tensor,
|
| 258 |
history_proprio=history_proprio_tensor,
|
| 259 |
+
history_actions=history_actions_tensor,
|
| 260 |
plan=plan_applied,
|
| 261 |
support_mode_conditioning=not args.disable_support_mode_conditioning,
|
| 262 |
)
|
|
|
|
| 267 |
texts=[language_goal],
|
| 268 |
history_images=history_images_tensor,
|
| 269 |
history_proprio=history_proprio_tensor,
|
| 270 |
+
history_actions=history_actions_tensor,
|
| 271 |
)
|
| 272 |
chosen_chunk = outputs["action_mean"]
|
| 273 |
if plan_applied and "planned_chunk" in outputs:
|
| 274 |
chosen_chunk = outputs["planned_chunk"]
|
| 275 |
+
chunk_np = chosen_chunk[0].detach().float().cpu().numpy()
|
| 276 |
+
commit_steps = chunk_np.shape[0] if args.chunk_commit_steps <= 0 else min(args.chunk_commit_steps, chunk_np.shape[0])
|
| 277 |
+
done = False
|
| 278 |
+
for step_action in chunk_np[:commit_steps]:
|
| 279 |
+
live_images = stack_live_rgb_obs(obs, resolution=args.resolution).detach().cpu().numpy()
|
| 280 |
+
live_proprio = bimanual_proprio_from_obs(
|
| 281 |
+
obs,
|
| 282 |
+
timestep=timestep,
|
| 283 |
+
episode_length=args.episode_length,
|
| 284 |
+
target_dim=policy_config.fusion.proprio_dim,
|
| 285 |
+
).astype(np.float32)
|
| 286 |
+
if history_steps > 0:
|
| 287 |
+
if len(history_images) >= history_steps:
|
| 288 |
+
keep = max(history_steps - 1, 0)
|
| 289 |
+
history_images = history_images[-keep:] if keep > 0 else []
|
| 290 |
+
history_proprio = history_proprio[-keep:] if keep > 0 else []
|
| 291 |
+
history_actions = history_actions[-keep:] if keep > 0 else []
|
| 292 |
+
history_images.append(live_images)
|
| 293 |
+
history_proprio.append(live_proprio)
|
| 294 |
+
history_actions.append(step_action.astype(np.float32))
|
| 295 |
+
obs, reward, done, recovered_steps, noop_fallbacks = _step_bimanual_chunk(task, obs, step_action)
|
| 296 |
+
episode_recoveries += recovered_steps
|
| 297 |
+
episode_noop_fallbacks += noop_fallbacks
|
| 298 |
+
total_reward += float(reward)
|
| 299 |
+
timestep += 1
|
| 300 |
+
if reward >= 1.0:
|
| 301 |
+
success = 1.0
|
| 302 |
+
if done or success >= 1.0 or timestep >= args.episode_length:
|
| 303 |
+
break
|
| 304 |
if done or success >= 1.0:
|
| 305 |
break
|
| 306 |
task_successes.append(success)
|
|
|
|
| 309 |
"task_class": task_class.__name__,
|
| 310 |
"successes": task_successes,
|
| 311 |
"returns": task_returns,
|
| 312 |
+
"path_recoveries": episode_recoveries if args.episodes_per_task == 1 else None,
|
| 313 |
+
"noop_fallbacks": episode_noop_fallbacks if args.episodes_per_task == 1 else None,
|
| 314 |
"mean_success": float(np.mean(task_successes)) if task_successes else 0.0,
|
| 315 |
"mean_return": float(np.mean(task_returns)) if task_returns else 0.0,
|
| 316 |
}
|
code/reveal_vla_bimanual/models/backbones.py
CHANGED
|
@@ -48,7 +48,7 @@ class FrozenVLBackbone(nn.Module):
|
|
| 48 |
else:
|
| 49 |
from transformers import AutoTokenizer, CLIPModel
|
| 50 |
|
| 51 |
-
clip_model = CLIPModel.from_pretrained(config.model_name)
|
| 52 |
self.vision_model = clip_model.vision_model
|
| 53 |
self.text_model = clip_model.text_model
|
| 54 |
self.visual_projection = clip_model.visual_projection
|
|
|
|
| 48 |
else:
|
| 49 |
from transformers import AutoTokenizer, CLIPModel
|
| 50 |
|
| 51 |
+
clip_model = CLIPModel.from_pretrained(config.model_name, use_safetensors=True)
|
| 52 |
self.vision_model = clip_model.vision_model
|
| 53 |
self.text_model = clip_model.text_model
|
| 54 |
self.visual_projection = clip_model.visual_projection
|
code/reveal_vla_bimanual/models/observation_memory.py
CHANGED
|
@@ -9,6 +9,7 @@ from torch import Tensor, nn
|
|
| 9 |
@dataclass
|
| 10 |
class ObservationMemoryConfig:
|
| 11 |
hidden_dim: int = 512
|
|
|
|
| 12 |
history_steps: int = 2
|
| 13 |
num_layers: int = 1
|
| 14 |
dropout: float = 0.1
|
|
@@ -33,6 +34,11 @@ class ObservationMemory(nn.Module):
|
|
| 33 |
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 34 |
nn.GELU(),
|
| 35 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
self.uncertainty_head = nn.Sequential(
|
| 37 |
nn.LayerNorm(config.hidden_dim),
|
| 38 |
nn.Linear(config.hidden_dim, 1),
|
|
@@ -42,10 +48,14 @@ class ObservationMemory(nn.Module):
|
|
| 42 |
self,
|
| 43 |
scene_tokens: Tensor,
|
| 44 |
history_scene_tokens: Tensor | None = None,
|
|
|
|
| 45 |
) -> dict[str, Tensor]:
|
| 46 |
pooled_current = scene_tokens.mean(dim=1, keepdim=True)
|
| 47 |
if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 48 |
history_pooled = history_scene_tokens.mean(dim=2)
|
|
|
|
|
|
|
|
|
|
| 49 |
sequence = torch.cat([history_pooled, pooled_current], dim=1)
|
| 50 |
else:
|
| 51 |
sequence = pooled_current
|
|
@@ -94,11 +104,24 @@ class InteractionObservationMemory(nn.Module):
|
|
| 94 |
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 95 |
nn.GELU(),
|
| 96 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
self.uncertainty_head = nn.Sequential(
|
| 98 |
nn.LayerNorm(config.hidden_dim),
|
| 99 |
nn.Linear(config.hidden_dim, 1),
|
| 100 |
)
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
|
| 103 |
if history_scene_tokens is None or history_scene_tokens.numel() == 0:
|
| 104 |
return history_scene_tokens
|
|
@@ -110,11 +133,21 @@ class InteractionObservationMemory(nn.Module):
|
|
| 110 |
self,
|
| 111 |
scene_tokens: Tensor,
|
| 112 |
history_scene_tokens: Tensor | None = None,
|
|
|
|
| 113 |
) -> dict[str, Tensor]:
|
| 114 |
pooled_current = scene_tokens.mean(dim=1, keepdim=True)
|
| 115 |
history_scene_tokens = self._truncate_history(history_scene_tokens)
|
| 116 |
if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 117 |
history_pooled = history_scene_tokens.mean(dim=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
sequence = torch.cat([history_pooled, pooled_current], dim=1)
|
| 119 |
else:
|
| 120 |
sequence = pooled_current
|
|
@@ -126,11 +159,13 @@ class InteractionObservationMemory(nn.Module):
|
|
| 126 |
)
|
| 127 |
encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
|
| 128 |
batch_size = encoded.shape[0]
|
| 129 |
-
|
|
|
|
|
|
|
| 130 |
bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
|
| 131 |
bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
|
| 132 |
-
projected_bank = self.token_proj(bank_tokens)
|
| 133 |
-
pooled_bank = projected_bank.mean(dim=1)
|
| 134 |
return {
|
| 135 |
"memory_sequence": encoded,
|
| 136 |
"memory_state": encoded[:, -1],
|
|
|
|
| 9 |
@dataclass
|
| 10 |
class ObservationMemoryConfig:
|
| 11 |
hidden_dim: int = 512
|
| 12 |
+
action_dim: int = 14
|
| 13 |
history_steps: int = 2
|
| 14 |
num_layers: int = 1
|
| 15 |
dropout: float = 0.1
|
|
|
|
| 34 |
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 35 |
nn.GELU(),
|
| 36 |
)
|
| 37 |
+
self.action_proj = nn.Sequential(
|
| 38 |
+
nn.LayerNorm(config.action_dim),
|
| 39 |
+
nn.Linear(config.action_dim, config.hidden_dim),
|
| 40 |
+
nn.GELU(),
|
| 41 |
+
)
|
| 42 |
self.uncertainty_head = nn.Sequential(
|
| 43 |
nn.LayerNorm(config.hidden_dim),
|
| 44 |
nn.Linear(config.hidden_dim, 1),
|
|
|
|
| 48 |
self,
|
| 49 |
scene_tokens: Tensor,
|
| 50 |
history_scene_tokens: Tensor | None = None,
|
| 51 |
+
history_actions: Tensor | None = None,
|
| 52 |
) -> dict[str, Tensor]:
|
| 53 |
pooled_current = scene_tokens.mean(dim=1, keepdim=True)
|
| 54 |
if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 55 |
history_pooled = history_scene_tokens.mean(dim=2)
|
| 56 |
+
if history_actions is not None and history_actions.numel() > 0:
|
| 57 |
+
history_action_tokens = self.action_proj(history_actions[:, -history_pooled.shape[1] :])
|
| 58 |
+
history_pooled = history_pooled + history_action_tokens
|
| 59 |
sequence = torch.cat([history_pooled, pooled_current], dim=1)
|
| 60 |
else:
|
| 61 |
sequence = pooled_current
|
|
|
|
| 104 |
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 105 |
nn.GELU(),
|
| 106 |
)
|
| 107 |
+
self.action_proj = nn.Sequential(
|
| 108 |
+
nn.LayerNorm(config.action_dim),
|
| 109 |
+
nn.Linear(config.action_dim, config.hidden_dim),
|
| 110 |
+
nn.GELU(),
|
| 111 |
+
)
|
| 112 |
self.uncertainty_head = nn.Sequential(
|
| 113 |
nn.LayerNorm(config.hidden_dim),
|
| 114 |
nn.Linear(config.hidden_dim, 1),
|
| 115 |
)
|
| 116 |
|
| 117 |
+
def _recency_weights(self, length: int, device: torch.device, dtype: torch.dtype) -> Tensor:
|
| 118 |
+
if length <= 0:
|
| 119 |
+
return torch.zeros((0,), device=device, dtype=dtype)
|
| 120 |
+
positions = torch.arange(length, device=device, dtype=dtype)
|
| 121 |
+
distances = (length - 1) - positions
|
| 122 |
+
weights = torch.exp(-0.5 * distances)
|
| 123 |
+
return weights / weights.sum().clamp_min(1e-6)
|
| 124 |
+
|
| 125 |
def _truncate_history(self, history_scene_tokens: Tensor | None) -> Tensor | None:
|
| 126 |
if history_scene_tokens is None or history_scene_tokens.numel() == 0:
|
| 127 |
return history_scene_tokens
|
|
|
|
| 133 |
self,
|
| 134 |
scene_tokens: Tensor,
|
| 135 |
history_scene_tokens: Tensor | None = None,
|
| 136 |
+
history_actions: Tensor | None = None,
|
| 137 |
) -> dict[str, Tensor]:
|
| 138 |
pooled_current = scene_tokens.mean(dim=1, keepdim=True)
|
| 139 |
history_scene_tokens = self._truncate_history(history_scene_tokens)
|
| 140 |
if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 141 |
history_pooled = history_scene_tokens.mean(dim=2)
|
| 142 |
+
if history_actions is not None and history_actions.numel() > 0:
|
| 143 |
+
truncated_actions = history_actions[:, -history_pooled.shape[1] :]
|
| 144 |
+
history_pooled = history_pooled + self.action_proj(truncated_actions)
|
| 145 |
+
recency_weights = self._recency_weights(
|
| 146 |
+
history_pooled.shape[1],
|
| 147 |
+
device=history_pooled.device,
|
| 148 |
+
dtype=history_pooled.dtype,
|
| 149 |
+
).view(1, -1, 1)
|
| 150 |
+
history_pooled = history_pooled * recency_weights * float(history_pooled.shape[1])
|
| 151 |
sequence = torch.cat([history_pooled, pooled_current], dim=1)
|
| 152 |
else:
|
| 153 |
sequence = pooled_current
|
|
|
|
| 159 |
)
|
| 160 |
encoded = self.sequence_encoder(sequence + self.position_embedding[:, :seq_len])
|
| 161 |
batch_size = encoded.shape[0]
|
| 162 |
+
recent_window = min(max(1, self.config.memory_bank_size // 2), encoded.shape[1])
|
| 163 |
+
recent_summary = encoded[:, -recent_window:].mean(dim=1, keepdim=True)
|
| 164 |
+
queries = self.bank_queries.unsqueeze(0).expand(batch_size, -1, -1) + recent_summary
|
| 165 |
bank_tokens, _ = self.bank_attention(queries, encoded, encoded)
|
| 166 |
bank_tokens = bank_tokens + self.bank_mlp(bank_tokens)
|
| 167 |
+
projected_bank = self.token_proj(bank_tokens + recent_summary)
|
| 168 |
+
pooled_bank = projected_bank.mean(dim=1) + 0.25 * recent_summary.squeeze(1)
|
| 169 |
return {
|
| 170 |
"memory_sequence": encoded,
|
| 171 |
"memory_state": encoded[:, -1],
|
code/reveal_vla_bimanual/models/policy.py
CHANGED
|
@@ -111,6 +111,7 @@ class BackboneOnlyPolicy(nn.Module):
|
|
| 111 |
language_tokens: dict[str, Tensor] | None = None,
|
| 112 |
history_images: Tensor | None = None,
|
| 113 |
history_proprio: Tensor | None = None,
|
|
|
|
| 114 |
) -> dict[str, Tensor]:
|
| 115 |
scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
|
| 116 |
history_scene_tokens = self.encode_history(
|
|
@@ -119,7 +120,11 @@ class BackboneOnlyPolicy(nn.Module):
|
|
| 119 |
texts=texts,
|
| 120 |
language_tokens=language_tokens,
|
| 121 |
)
|
| 122 |
-
memory_output = self.memory(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
|
| 124 |
decoded["scene_tokens"] = scene_tokens
|
| 125 |
decoded["history_scene_tokens"] = history_scene_tokens
|
|
@@ -142,6 +147,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
|
|
| 142 |
language_tokens: dict[str, Tensor] | None = None,
|
| 143 |
history_images: Tensor | None = None,
|
| 144 |
history_proprio: Tensor | None = None,
|
|
|
|
| 145 |
plan: bool = True,
|
| 146 |
support_mode_conditioning: bool = True,
|
| 147 |
candidate_chunks_override: Tensor | None = None,
|
|
@@ -153,6 +159,7 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
|
|
| 153 |
language_tokens=language_tokens,
|
| 154 |
history_images=history_images,
|
| 155 |
history_proprio=history_proprio,
|
|
|
|
| 156 |
)
|
| 157 |
reveal_state = self.reveal_head(
|
| 158 |
outputs["scene_tokens"],
|
|
@@ -232,6 +239,7 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
|
|
| 232 |
language_tokens: dict[str, Tensor] | None = None,
|
| 233 |
history_images: Tensor | None = None,
|
| 234 |
history_proprio: Tensor | None = None,
|
|
|
|
| 235 |
plan: bool = True,
|
| 236 |
support_mode_conditioning: bool = True,
|
| 237 |
candidate_chunks_override: Tensor | None = None,
|
|
@@ -248,7 +256,11 @@ class InteractionBimanualPolicy(BackboneOnlyPolicy):
|
|
| 248 |
)
|
| 249 |
if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 250 |
history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
|
| 251 |
-
memory_output = self.memory(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
interaction_state = None
|
| 254 |
if use_interaction_head:
|
|
|
|
| 111 |
language_tokens: dict[str, Tensor] | None = None,
|
| 112 |
history_images: Tensor | None = None,
|
| 113 |
history_proprio: Tensor | None = None,
|
| 114 |
+
history_actions: Tensor | None = None,
|
| 115 |
) -> dict[str, Tensor]:
|
| 116 |
scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
|
| 117 |
history_scene_tokens = self.encode_history(
|
|
|
|
| 120 |
texts=texts,
|
| 121 |
language_tokens=language_tokens,
|
| 122 |
)
|
| 123 |
+
memory_output = self.memory(
|
| 124 |
+
scene_tokens,
|
| 125 |
+
history_scene_tokens=history_scene_tokens,
|
| 126 |
+
history_actions=history_actions,
|
| 127 |
+
)
|
| 128 |
decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
|
| 129 |
decoded["scene_tokens"] = scene_tokens
|
| 130 |
decoded["history_scene_tokens"] = history_scene_tokens
|
|
|
|
| 147 |
language_tokens: dict[str, Tensor] | None = None,
|
| 148 |
history_images: Tensor | None = None,
|
| 149 |
history_proprio: Tensor | None = None,
|
| 150 |
+
history_actions: Tensor | None = None,
|
| 151 |
plan: bool = True,
|
| 152 |
support_mode_conditioning: bool = True,
|
| 153 |
candidate_chunks_override: Tensor | None = None,
|
|
|
|
| 159 |
language_tokens=language_tokens,
|
| 160 |
history_images=history_images,
|
| 161 |
history_proprio=history_proprio,
|
| 162 |
+
history_actions=history_actions,
|
| 163 |
)
|
| 164 |
reveal_state = self.reveal_head(
|
| 165 |
outputs["scene_tokens"],
|
|
|
|
| 239 |
language_tokens: dict[str, Tensor] | None = None,
|
| 240 |
history_images: Tensor | None = None,
|
| 241 |
history_proprio: Tensor | None = None,
|
| 242 |
+
history_actions: Tensor | None = None,
|
| 243 |
plan: bool = True,
|
| 244 |
support_mode_conditioning: bool = True,
|
| 245 |
candidate_chunks_override: Tensor | None = None,
|
|
|
|
| 256 |
)
|
| 257 |
if history_steps_override is not None and history_scene_tokens is not None and history_scene_tokens.numel() > 0:
|
| 258 |
history_scene_tokens = history_scene_tokens[:, -history_steps_override:]
|
| 259 |
+
memory_output = self.memory(
|
| 260 |
+
scene_tokens,
|
| 261 |
+
history_scene_tokens=history_scene_tokens,
|
| 262 |
+
history_actions=history_actions,
|
| 263 |
+
)
|
| 264 |
|
| 265 |
interaction_state = None
|
| 266 |
if use_interaction_head:
|
code/reveal_vla_bimanual/sim_reveal/dataset.py
CHANGED
|
@@ -11,7 +11,7 @@ import numpy as np
|
|
| 11 |
|
| 12 |
from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
|
| 13 |
|
| 14 |
-
NOLEAK_PROXY_DATASET_VERSION = "
|
| 15 |
LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
|
| 16 |
{
|
| 17 |
"target_template",
|
|
@@ -74,6 +74,7 @@ def collect_teacher_dataset(
|
|
| 74 |
)
|
| 75 |
padded_history_render_states = []
|
| 76 |
padded_history_proprio = []
|
|
|
|
| 77 |
history_count = min(history_steps, len(history_buffer))
|
| 78 |
pad_count = history_steps - history_count
|
| 79 |
if history_count > 0:
|
|
@@ -83,9 +84,11 @@ def collect_teacher_dataset(
|
|
| 83 |
for _ in range(pad_count):
|
| 84 |
padded_history_render_states.append(env.render_state(privileged_state))
|
| 85 |
padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
|
|
|
|
| 86 |
for item in recent_history:
|
| 87 |
padded_history_render_states.append(item["render_state"])
|
| 88 |
padded_history_proprio.append(item["proprio"])
|
|
|
|
| 89 |
samples.append(
|
| 90 |
{
|
| 91 |
"dataset_version": NOLEAK_PROXY_DATASET_VERSION,
|
|
@@ -108,16 +111,21 @@ def collect_teacher_dataset(
|
|
| 108 |
"history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
|
| 109 |
if padded_history_proprio
|
| 110 |
else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
|
|
|
|
|
|
|
|
|
|
| 111 |
"candidate_action_chunks": candidate_action_chunks.astype("float32"),
|
| 112 |
**candidate_outcomes,
|
| 113 |
}
|
| 114 |
)
|
| 115 |
proxy_samples += 1
|
| 116 |
-
|
|
|
|
| 117 |
history_buffer.append(
|
| 118 |
{
|
| 119 |
"render_state": env.render_state(privileged_state),
|
| 120 |
"proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
|
|
|
|
| 121 |
}
|
| 122 |
)
|
| 123 |
if terminated:
|
|
@@ -203,6 +211,13 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
|
|
| 203 |
"images": stacked,
|
| 204 |
"history_images": history_stacked,
|
| 205 |
"history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
"proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
|
| 207 |
"texts": sample["language_goal"],
|
| 208 |
"action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
|
|
|
|
| 11 |
|
| 12 |
from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
|
| 13 |
|
| 14 |
+
NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v5_noleak_actionhist"
|
| 15 |
LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
|
| 16 |
{
|
| 17 |
"target_template",
|
|
|
|
| 74 |
)
|
| 75 |
padded_history_render_states = []
|
| 76 |
padded_history_proprio = []
|
| 77 |
+
padded_history_actions = []
|
| 78 |
history_count = min(history_steps, len(history_buffer))
|
| 79 |
pad_count = history_steps - history_count
|
| 80 |
if history_count > 0:
|
|
|
|
| 84 |
for _ in range(pad_count):
|
| 85 |
padded_history_render_states.append(env.render_state(privileged_state))
|
| 86 |
padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
|
| 87 |
+
padded_history_actions.append(np.zeros((action_chunk.shape[-1],), dtype=np.float32))
|
| 88 |
for item in recent_history:
|
| 89 |
padded_history_render_states.append(item["render_state"])
|
| 90 |
padded_history_proprio.append(item["proprio"])
|
| 91 |
+
padded_history_actions.append(item["action"])
|
| 92 |
samples.append(
|
| 93 |
{
|
| 94 |
"dataset_version": NOLEAK_PROXY_DATASET_VERSION,
|
|
|
|
| 111 |
"history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
|
| 112 |
if padded_history_proprio
|
| 113 |
else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
|
| 114 |
+
"history_actions": np.stack(padded_history_actions, axis=0).astype("float32")
|
| 115 |
+
if padded_history_actions
|
| 116 |
+
else np.zeros((0, action_chunk.shape[-1]), dtype=np.float32),
|
| 117 |
"candidate_action_chunks": candidate_action_chunks.astype("float32"),
|
| 118 |
**candidate_outcomes,
|
| 119 |
}
|
| 120 |
)
|
| 121 |
proxy_samples += 1
|
| 122 |
+
executed_action = env.teacher_action().astype("float32")
|
| 123 |
+
_, _, terminated, truncated, privileged_state = env.step(executed_action)
|
| 124 |
history_buffer.append(
|
| 125 |
{
|
| 126 |
"render_state": env.render_state(privileged_state),
|
| 127 |
"proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
|
| 128 |
+
"action": executed_action,
|
| 129 |
}
|
| 130 |
)
|
| 131 |
if terminated:
|
|
|
|
| 211 |
"images": stacked,
|
| 212 |
"history_images": history_stacked,
|
| 213 |
"history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
|
| 214 |
+
"history_actions": torch.as_tensor(
|
| 215 |
+
sample.get(
|
| 216 |
+
"history_actions",
|
| 217 |
+
np.zeros((len(sample.get("history_render_states", [])), sample["action_chunk"].shape[-1]), dtype=np.float32),
|
| 218 |
+
),
|
| 219 |
+
dtype=torch.float32,
|
| 220 |
+
),
|
| 221 |
"proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
|
| 222 |
"texts": sample["language_goal"],
|
| 223 |
"action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
|
code/reveal_vla_bimanual/sim_rlbench/dataset.py
CHANGED
|
@@ -14,6 +14,16 @@ from sim_rlbench.camera_spec import canonical_to_upstream_camera
|
|
| 14 |
|
| 15 |
|
| 16 |
THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
|
|
@@ -196,8 +206,8 @@ def single_arm_absolute_action_from_delta(
|
|
| 196 |
arm_index = {"right": 0, "left": 1}[arm_name]
|
| 197 |
current_pose = _arm_pose(current_obs, arm_name)
|
| 198 |
offset = arm_index * 7
|
| 199 |
-
delta_position = delta_action[offset : offset + 3]
|
| 200 |
-
delta_rotvec = delta_action[offset + 3 : offset + 6]
|
| 201 |
gripper = float(delta_action[offset + 6] > 0.5)
|
| 202 |
current_quat = _xyzw_to_wxyz(current_pose[3:])
|
| 203 |
delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
|
|
@@ -379,6 +389,20 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
|
|
| 379 |
)
|
| 380 |
return torch.stack(history_features, dim=0)
|
| 381 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
def __getitem__(self, index: int) -> dict[str, Any]:
|
| 383 |
sample = self._samples[index]
|
| 384 |
episode = self._episodes[sample.episode_key]
|
|
@@ -388,6 +412,7 @@ class RLBenchOfflineChunkDataset(Dataset[dict[str, Any]]):
|
|
| 388 |
"images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
|
| 389 |
"history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
|
| 390 |
"history_proprio": self._history_proprio(observations, sample.step_index),
|
|
|
|
| 391 |
"proprio": torch.from_numpy(
|
| 392 |
bimanual_proprio_from_obs(
|
| 393 |
obs,
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
THREE_CAMERAS: tuple[str, str, str] = ("front", "wrist_left", "wrist_right")
|
| 17 |
+
MAX_RLBENCH_POSITION_DELTA = 0.05
|
| 18 |
+
MAX_RLBENCH_ROTATION_DELTA = 0.35
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _clip_vector_norm(vector: Sequence[float], max_norm: float) -> np.ndarray:
|
| 22 |
+
clipped = np.asarray(vector, dtype=np.float32)
|
| 23 |
+
norm = float(np.linalg.norm(clipped))
|
| 24 |
+
if norm <= max_norm or norm < 1e-8:
|
| 25 |
+
return clipped
|
| 26 |
+
return clipped * (max_norm / norm)
|
| 27 |
|
| 28 |
|
| 29 |
def _camera_value(obs: Any, camera_name: str, suffix: str) -> Any:
|
|
|
|
| 206 |
arm_index = {"right": 0, "left": 1}[arm_name]
|
| 207 |
current_pose = _arm_pose(current_obs, arm_name)
|
| 208 |
offset = arm_index * 7
|
| 209 |
+
delta_position = _clip_vector_norm(delta_action[offset : offset + 3], max_norm=MAX_RLBENCH_POSITION_DELTA)
|
| 210 |
+
delta_rotvec = _clip_vector_norm(delta_action[offset + 3 : offset + 6], max_norm=MAX_RLBENCH_ROTATION_DELTA)
|
| 211 |
gripper = float(delta_action[offset + 6] > 0.5)
|
| 212 |
current_quat = _xyzw_to_wxyz(current_pose[3:])
|
| 213 |
delta_quat = _rotvec_to_quat_wxyz(delta_rotvec)
|
|
|
|
| 389 |
)
|
| 390 |
return torch.stack(history_features, dim=0)
|
| 391 |
|
| 392 |
+
def _history_actions(self, observations: Any, step_index: int) -> torch.Tensor:
|
| 393 |
+
if self.history_steps <= 0:
|
| 394 |
+
return torch.zeros((0, 14), dtype=torch.float32)
|
| 395 |
+
history_actions = []
|
| 396 |
+
for history_offset in range(self.history_steps, 0, -1):
|
| 397 |
+
history_index = step_index - history_offset
|
| 398 |
+
if history_index < 0:
|
| 399 |
+
history_actions.append(torch.zeros((14,), dtype=torch.float32))
|
| 400 |
+
else:
|
| 401 |
+
history_actions.append(
|
| 402 |
+
torch.from_numpy(delta_action_from_transition(observations[history_index], observations[history_index + 1]))
|
| 403 |
+
)
|
| 404 |
+
return torch.stack(history_actions, dim=0)
|
| 405 |
+
|
| 406 |
def __getitem__(self, index: int) -> dict[str, Any]:
|
| 407 |
sample = self._samples[index]
|
| 408 |
episode = self._episodes[sample.episode_key]
|
|
|
|
| 412 |
"images": self._load_rgb_stack(episode.episode_dir, sample.step_index),
|
| 413 |
"history_images": self._history_rgb_stack(episode.episode_dir, sample.step_index),
|
| 414 |
"history_proprio": self._history_proprio(observations, sample.step_index),
|
| 415 |
+
"history_actions": self._history_actions(observations, sample.step_index),
|
| 416 |
"proprio": torch.from_numpy(
|
| 417 |
bimanual_proprio_from_obs(
|
| 418 |
obs,
|
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist.yaml
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_actionhist
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
|
| 6 |
+
data:
|
| 7 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 8 |
+
resolution: 96
|
| 9 |
+
train_episodes_per_proxy: 48
|
| 10 |
+
val_episodes_per_proxy: 16
|
| 11 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
|
| 12 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
|
| 13 |
+
rebuild_dataset: true
|
| 14 |
+
chunk_horizon: 8
|
| 15 |
+
rollout_horizon: 5
|
| 16 |
+
history_steps: 6
|
| 17 |
+
planner_candidates: 8
|
| 18 |
+
seed: 13
|
| 19 |
+
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 10
|
| 22 |
+
batch_size: 16
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.001
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
|
| 27 |
+
trainer:
|
| 28 |
+
policy_type: interaction_state
|
| 29 |
+
use_bf16: true
|
| 30 |
+
grad_clip_norm: 1.0
|
| 31 |
+
freeze_backbone: true
|
| 32 |
+
gradient_checkpointing: false
|
| 33 |
+
plan_during_train: true
|
| 34 |
+
plan_during_eval: true
|
| 35 |
+
support_mode_conditioning: true
|
| 36 |
+
planner_mode: trainable
|
| 37 |
+
|
| 38 |
+
policy:
|
| 39 |
+
backbone:
|
| 40 |
+
model_name: openai/clip-vit-base-patch32
|
| 41 |
+
hidden_dim: 128
|
| 42 |
+
max_text_tokens: 32
|
| 43 |
+
freeze_backbone: true
|
| 44 |
+
gradient_checkpointing: false
|
| 45 |
+
use_dummy_backbone: true
|
| 46 |
+
fusion:
|
| 47 |
+
hidden_dim: 128
|
| 48 |
+
num_cameras: 3
|
| 49 |
+
num_layers: 2
|
| 50 |
+
num_heads: 4
|
| 51 |
+
ff_dim: 256
|
| 52 |
+
dropout: 0.1
|
| 53 |
+
proprio_dim: 32
|
| 54 |
+
proprio_tokens: 1
|
| 55 |
+
memory:
|
| 56 |
+
hidden_dim: 128
|
| 57 |
+
action_dim: 14
|
| 58 |
+
history_steps: 6
|
| 59 |
+
num_layers: 2
|
| 60 |
+
dropout: 0.1
|
| 61 |
+
memory_bank_size: 4
|
| 62 |
+
num_heads: 4
|
| 63 |
+
max_history_steps: 8
|
| 64 |
+
decoder:
|
| 65 |
+
hidden_dim: 128
|
| 66 |
+
num_heads: 4
|
| 67 |
+
num_layers: 2
|
| 68 |
+
ff_dim: 256
|
| 69 |
+
dropout: 0.1
|
| 70 |
+
chunk_size: 8
|
| 71 |
+
action_dim: 14
|
| 72 |
+
arm_action_dim: 7
|
| 73 |
+
num_candidates: 8
|
| 74 |
+
num_phases: 5
|
| 75 |
+
num_arm_roles: 4
|
| 76 |
+
reveal_head:
|
| 77 |
+
hidden_dim: 128
|
| 78 |
+
num_support_modes: 3
|
| 79 |
+
num_approach_templates: 32
|
| 80 |
+
rollout_horizon: 5
|
| 81 |
+
belief_map_size: 32
|
| 82 |
+
field_size: 16
|
| 83 |
+
num_heads: 4
|
| 84 |
+
predict_belief_map: true
|
| 85 |
+
num_phases: 5
|
| 86 |
+
num_arm_roles: 4
|
| 87 |
+
num_interaction_tokens: 8
|
| 88 |
+
world_model:
|
| 89 |
+
hidden_dim: 128
|
| 90 |
+
action_dim: 14
|
| 91 |
+
num_support_modes: 3
|
| 92 |
+
num_approach_templates: 32
|
| 93 |
+
rollout_horizon: 5
|
| 94 |
+
field_size: 16
|
| 95 |
+
num_heads: 4
|
| 96 |
+
num_phases: 5
|
| 97 |
+
num_arm_roles: 4
|
| 98 |
+
num_interaction_tokens: 8
|
| 99 |
+
planner:
|
| 100 |
+
hidden_dim: 128
|
| 101 |
+
num_candidates: 8
|
| 102 |
+
action_dim: 14
|
| 103 |
+
num_support_modes: 3
|
| 104 |
+
utility_margin: 0.1
|
| 105 |
+
num_heads: 4
|
| 106 |
+
num_layers: 2
|
| 107 |
+
num_phases: 5
|
| 108 |
+
num_arm_roles: 4
|
| 109 |
+
|
| 110 |
+
loss_weights:
|
| 111 |
+
action: 1.0
|
| 112 |
+
phase: 0.15
|
| 113 |
+
arm_role: 0.2
|
| 114 |
+
support_mode: 0.15
|
| 115 |
+
corridor: 0.2
|
| 116 |
+
persistence: 0.1
|
| 117 |
+
disturbance: 0.1
|
| 118 |
+
world_model: 0.25
|
| 119 |
+
belief: 0.05
|
| 120 |
+
planner_success: 0.2
|
| 121 |
+
planner_risk: 0.1
|
| 122 |
+
planner_ranking: 0.1
|
| 123 |
+
proposal_reconstruction: 0.2
|
| 124 |
+
proposal_success: 0.1
|
| 125 |
+
proposal_ranking: 0.1
|
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_actionhist_smoke.yaml
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_actionhist_smoke
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
|
| 6 |
+
data:
|
| 7 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 8 |
+
resolution: 64
|
| 9 |
+
train_episodes_per_proxy: 6
|
| 10 |
+
val_episodes_per_proxy: 2
|
| 11 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_smoke_v5_actionhist.pt
|
| 12 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_smoke_v5_actionhist.pt
|
| 13 |
+
rebuild_dataset: true
|
| 14 |
+
chunk_horizon: 4
|
| 15 |
+
rollout_horizon: 3
|
| 16 |
+
history_steps: 6
|
| 17 |
+
planner_candidates: 4
|
| 18 |
+
seed: 13
|
| 19 |
+
|
| 20 |
+
optim:
|
| 21 |
+
epochs: 4
|
| 22 |
+
batch_size: 8
|
| 23 |
+
num_workers: 0
|
| 24 |
+
lr: 0.001
|
| 25 |
+
weight_decay: 0.0001
|
| 26 |
+
|
| 27 |
+
trainer:
|
| 28 |
+
policy_type: interaction_state
|
| 29 |
+
use_bf16: true
|
| 30 |
+
grad_clip_norm: 1.0
|
| 31 |
+
freeze_backbone: true
|
| 32 |
+
gradient_checkpointing: false
|
| 33 |
+
plan_during_train: true
|
| 34 |
+
plan_during_eval: true
|
| 35 |
+
support_mode_conditioning: true
|
| 36 |
+
planner_mode: trainable
|
| 37 |
+
|
| 38 |
+
policy:
|
| 39 |
+
backbone:
|
| 40 |
+
model_name: openai/clip-vit-base-patch32
|
| 41 |
+
hidden_dim: 64
|
| 42 |
+
max_text_tokens: 32
|
| 43 |
+
freeze_backbone: true
|
| 44 |
+
gradient_checkpointing: false
|
| 45 |
+
use_dummy_backbone: true
|
| 46 |
+
fusion:
|
| 47 |
+
hidden_dim: 64
|
| 48 |
+
num_cameras: 3
|
| 49 |
+
num_layers: 2
|
| 50 |
+
num_heads: 4
|
| 51 |
+
ff_dim: 128
|
| 52 |
+
dropout: 0.1
|
| 53 |
+
proprio_dim: 32
|
| 54 |
+
proprio_tokens: 1
|
| 55 |
+
memory:
|
| 56 |
+
hidden_dim: 64
|
| 57 |
+
action_dim: 14
|
| 58 |
+
history_steps: 6
|
| 59 |
+
num_layers: 2
|
| 60 |
+
dropout: 0.1
|
| 61 |
+
memory_bank_size: 4
|
| 62 |
+
num_heads: 4
|
| 63 |
+
max_history_steps: 8
|
| 64 |
+
decoder:
|
| 65 |
+
hidden_dim: 64
|
| 66 |
+
num_heads: 4
|
| 67 |
+
num_layers: 2
|
| 68 |
+
ff_dim: 128
|
| 69 |
+
dropout: 0.1
|
| 70 |
+
chunk_size: 4
|
| 71 |
+
action_dim: 14
|
| 72 |
+
arm_action_dim: 7
|
| 73 |
+
num_candidates: 4
|
| 74 |
+
num_phases: 5
|
| 75 |
+
num_arm_roles: 4
|
| 76 |
+
reveal_head:
|
| 77 |
+
hidden_dim: 64
|
| 78 |
+
num_support_modes: 3
|
| 79 |
+
num_approach_templates: 32
|
| 80 |
+
rollout_horizon: 3
|
| 81 |
+
belief_map_size: 32
|
| 82 |
+
field_size: 16
|
| 83 |
+
num_heads: 4
|
| 84 |
+
predict_belief_map: true
|
| 85 |
+
num_phases: 5
|
| 86 |
+
num_arm_roles: 4
|
| 87 |
+
num_interaction_tokens: 8
|
| 88 |
+
world_model:
|
| 89 |
+
hidden_dim: 64
|
| 90 |
+
action_dim: 14
|
| 91 |
+
num_support_modes: 3
|
| 92 |
+
num_approach_templates: 32
|
| 93 |
+
rollout_horizon: 3
|
| 94 |
+
field_size: 16
|
| 95 |
+
num_heads: 4
|
| 96 |
+
num_phases: 5
|
| 97 |
+
num_arm_roles: 4
|
| 98 |
+
num_interaction_tokens: 8
|
| 99 |
+
planner:
|
| 100 |
+
hidden_dim: 64
|
| 101 |
+
num_candidates: 4
|
| 102 |
+
action_dim: 14
|
| 103 |
+
num_support_modes: 3
|
| 104 |
+
utility_margin: 0.1
|
| 105 |
+
num_heads: 4
|
| 106 |
+
num_layers: 2
|
| 107 |
+
num_phases: 5
|
| 108 |
+
num_arm_roles: 4
|
| 109 |
+
|
| 110 |
+
loss_weights:
|
| 111 |
+
action: 1.0
|
| 112 |
+
phase: 0.15
|
| 113 |
+
arm_role: 0.2
|
| 114 |
+
support_mode: 0.15
|
| 115 |
+
corridor: 0.2
|
| 116 |
+
persistence: 0.1
|
| 117 |
+
disturbance: 0.1
|
| 118 |
+
world_model: 0.25
|
| 119 |
+
belief: 0.05
|
| 120 |
+
planner_success: 0.2
|
| 121 |
+
planner_risk: 0.1
|
| 122 |
+
planner_ranking: 0.1
|
| 123 |
+
proposal_reconstruction: 0.2
|
| 124 |
+
proposal_success: 0.1
|
| 125 |
+
proposal_ranking: 0.1
|
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_clip_actionhist.yaml
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_clip_actionhist
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 7
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
|
| 6 |
+
init_strict: false
|
| 7 |
+
|
| 8 |
+
data:
|
| 9 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 10 |
+
resolution: 224
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v5_actionhist.pt
|
| 14 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v5_actionhist.pt
|
| 15 |
+
rebuild_dataset: false
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
history_steps: 6
|
| 19 |
+
planner_candidates: 8
|
| 20 |
+
seed: 7
|
| 21 |
+
|
| 22 |
+
optim:
|
| 23 |
+
epochs: 4
|
| 24 |
+
batch_size: 2
|
| 25 |
+
num_workers: 0
|
| 26 |
+
lr: 0.0003
|
| 27 |
+
weight_decay: 0.0001
|
| 28 |
+
|
| 29 |
+
trainer:
|
| 30 |
+
policy_type: interaction_state
|
| 31 |
+
use_bf16: true
|
| 32 |
+
grad_clip_norm: 1.0
|
| 33 |
+
freeze_backbone: true
|
| 34 |
+
gradient_checkpointing: false
|
| 35 |
+
plan_during_train: true
|
| 36 |
+
plan_during_eval: true
|
| 37 |
+
support_mode_conditioning: true
|
| 38 |
+
planner_mode: trainable
|
| 39 |
+
|
| 40 |
+
policy:
|
| 41 |
+
backbone:
|
| 42 |
+
model_name: openai/clip-vit-base-patch32
|
| 43 |
+
hidden_dim: 512
|
| 44 |
+
max_text_tokens: 32
|
| 45 |
+
freeze_backbone: true
|
| 46 |
+
gradient_checkpointing: false
|
| 47 |
+
use_dummy_backbone: false
|
| 48 |
+
fusion:
|
| 49 |
+
hidden_dim: 512
|
| 50 |
+
num_cameras: 3
|
| 51 |
+
num_layers: 4
|
| 52 |
+
num_heads: 8
|
| 53 |
+
ff_dim: 2048
|
| 54 |
+
dropout: 0.1
|
| 55 |
+
proprio_dim: 32
|
| 56 |
+
proprio_tokens: 1
|
| 57 |
+
memory:
|
| 58 |
+
hidden_dim: 512
|
| 59 |
+
action_dim: 14
|
| 60 |
+
history_steps: 6
|
| 61 |
+
num_layers: 2
|
| 62 |
+
dropout: 0.1
|
| 63 |
+
memory_bank_size: 4
|
| 64 |
+
num_heads: 8
|
| 65 |
+
max_history_steps: 8
|
| 66 |
+
decoder:
|
| 67 |
+
hidden_dim: 512
|
| 68 |
+
num_heads: 8
|
| 69 |
+
num_layers: 4
|
| 70 |
+
ff_dim: 2048
|
| 71 |
+
dropout: 0.1
|
| 72 |
+
chunk_size: 8
|
| 73 |
+
action_dim: 14
|
| 74 |
+
arm_action_dim: 7
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
num_phases: 5
|
| 77 |
+
num_arm_roles: 4
|
| 78 |
+
reveal_head:
|
| 79 |
+
hidden_dim: 512
|
| 80 |
+
num_support_modes: 3
|
| 81 |
+
num_approach_templates: 32
|
| 82 |
+
rollout_horizon: 5
|
| 83 |
+
belief_map_size: 32
|
| 84 |
+
field_size: 16
|
| 85 |
+
num_heads: 8
|
| 86 |
+
predict_belief_map: true
|
| 87 |
+
num_phases: 5
|
| 88 |
+
num_arm_roles: 4
|
| 89 |
+
num_interaction_tokens: 8
|
| 90 |
+
world_model:
|
| 91 |
+
hidden_dim: 512
|
| 92 |
+
action_dim: 14
|
| 93 |
+
num_support_modes: 3
|
| 94 |
+
num_approach_templates: 32
|
| 95 |
+
rollout_horizon: 5
|
| 96 |
+
field_size: 16
|
| 97 |
+
num_heads: 8
|
| 98 |
+
num_phases: 5
|
| 99 |
+
num_arm_roles: 4
|
| 100 |
+
num_interaction_tokens: 8
|
| 101 |
+
belief_map_size: 32
|
| 102 |
+
predict_belief_map: true
|
| 103 |
+
planner:
|
| 104 |
+
hidden_dim: 512
|
| 105 |
+
num_candidates: 8
|
| 106 |
+
action_dim: 14
|
| 107 |
+
num_support_modes: 3
|
| 108 |
+
utility_margin: 0.1
|
| 109 |
+
num_heads: 8
|
| 110 |
+
num_layers: 2
|
| 111 |
+
num_phases: 5
|
| 112 |
+
num_arm_roles: 4
|
| 113 |
+
|
| 114 |
+
loss_weights:
|
| 115 |
+
action: 1.0
|
| 116 |
+
phase: 0.1
|
| 117 |
+
arm_role: 0.15
|
| 118 |
+
support_mode: 0.1
|
| 119 |
+
corridor: 0.15
|
| 120 |
+
persistence: 0.05
|
| 121 |
+
disturbance: 0.05
|
| 122 |
+
world_model: 0.2
|
| 123 |
+
belief: 0.05
|
| 124 |
+
planner_success: 0.25
|
| 125 |
+
planner_risk: 0.1
|
| 126 |
+
planner_ranking: 0.2
|
| 127 |
+
proposal_reconstruction: 0.1
|
| 128 |
+
proposal_success: 0.15
|
| 129 |
+
proposal_ranking: 0.2
|
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_recency_oracleft.yaml
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
experiment_name: proxy_interaction_state_recency_oracleft
|
| 2 |
+
output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction_debug
|
| 3 |
+
device: cuda
|
| 4 |
+
seed: 13
|
| 5 |
+
init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction_debug/proxy_interaction_state_actionhist/checkpoint_best.pt
|
| 6 |
+
init_strict: true
|
| 7 |
+
|
| 8 |
+
data:
|
| 9 |
+
proxies: [foliage_proxy, bag_proxy, cloth_proxy]
|
| 10 |
+
resolution: 96
|
| 11 |
+
train_episodes_per_proxy: 48
|
| 12 |
+
val_episodes_per_proxy: 16
|
| 13 |
+
train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v5_actionhist.pt
|
| 14 |
+
val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v5_actionhist.pt
|
| 15 |
+
rebuild_dataset: false
|
| 16 |
+
chunk_horizon: 8
|
| 17 |
+
rollout_horizon: 5
|
| 18 |
+
history_steps: 6
|
| 19 |
+
planner_candidates: 8
|
| 20 |
+
seed: 13
|
| 21 |
+
|
| 22 |
+
optim:
|
| 23 |
+
epochs: 8
|
| 24 |
+
batch_size: 16
|
| 25 |
+
num_workers: 0
|
| 26 |
+
lr: 0.0003
|
| 27 |
+
weight_decay: 0.0001
|
| 28 |
+
|
| 29 |
+
trainer:
|
| 30 |
+
policy_type: interaction_state
|
| 31 |
+
use_bf16: true
|
| 32 |
+
grad_clip_norm: 1.0
|
| 33 |
+
freeze_backbone: true
|
| 34 |
+
gradient_checkpointing: false
|
| 35 |
+
plan_during_train: true
|
| 36 |
+
plan_during_eval: true
|
| 37 |
+
support_mode_conditioning: true
|
| 38 |
+
planner_mode: trainable
|
| 39 |
+
|
| 40 |
+
policy:
|
| 41 |
+
backbone:
|
| 42 |
+
model_name: openai/clip-vit-base-patch32
|
| 43 |
+
hidden_dim: 128
|
| 44 |
+
max_text_tokens: 32
|
| 45 |
+
freeze_backbone: true
|
| 46 |
+
gradient_checkpointing: false
|
| 47 |
+
use_dummy_backbone: true
|
| 48 |
+
fusion:
|
| 49 |
+
hidden_dim: 128
|
| 50 |
+
num_cameras: 3
|
| 51 |
+
num_layers: 2
|
| 52 |
+
num_heads: 4
|
| 53 |
+
ff_dim: 256
|
| 54 |
+
dropout: 0.1
|
| 55 |
+
proprio_dim: 32
|
| 56 |
+
proprio_tokens: 1
|
| 57 |
+
memory:
|
| 58 |
+
hidden_dim: 128
|
| 59 |
+
action_dim: 14
|
| 60 |
+
history_steps: 6
|
| 61 |
+
num_layers: 2
|
| 62 |
+
dropout: 0.1
|
| 63 |
+
memory_bank_size: 4
|
| 64 |
+
num_heads: 4
|
| 65 |
+
max_history_steps: 8
|
| 66 |
+
decoder:
|
| 67 |
+
hidden_dim: 128
|
| 68 |
+
num_heads: 4
|
| 69 |
+
num_layers: 2
|
| 70 |
+
ff_dim: 256
|
| 71 |
+
dropout: 0.1
|
| 72 |
+
chunk_size: 8
|
| 73 |
+
action_dim: 14
|
| 74 |
+
arm_action_dim: 7
|
| 75 |
+
num_candidates: 8
|
| 76 |
+
num_phases: 5
|
| 77 |
+
num_arm_roles: 4
|
| 78 |
+
reveal_head:
|
| 79 |
+
hidden_dim: 128
|
| 80 |
+
num_support_modes: 3
|
| 81 |
+
num_approach_templates: 32
|
| 82 |
+
rollout_horizon: 5
|
| 83 |
+
belief_map_size: 32
|
| 84 |
+
field_size: 16
|
| 85 |
+
num_heads: 4
|
| 86 |
+
predict_belief_map: true
|
| 87 |
+
num_phases: 5
|
| 88 |
+
num_arm_roles: 4
|
| 89 |
+
num_interaction_tokens: 8
|
| 90 |
+
world_model:
|
| 91 |
+
hidden_dim: 128
|
| 92 |
+
action_dim: 14
|
| 93 |
+
num_support_modes: 3
|
| 94 |
+
num_approach_templates: 32
|
| 95 |
+
rollout_horizon: 5
|
| 96 |
+
field_size: 16
|
| 97 |
+
num_heads: 4
|
| 98 |
+
num_phases: 5
|
| 99 |
+
num_arm_roles: 4
|
| 100 |
+
num_interaction_tokens: 8
|
| 101 |
+
planner:
|
| 102 |
+
hidden_dim: 128
|
| 103 |
+
num_candidates: 8
|
| 104 |
+
action_dim: 14
|
| 105 |
+
num_support_modes: 3
|
| 106 |
+
utility_margin: 0.1
|
| 107 |
+
num_heads: 4
|
| 108 |
+
num_layers: 2
|
| 109 |
+
num_phases: 5
|
| 110 |
+
num_arm_roles: 4
|
| 111 |
+
|
| 112 |
+
loss_weights:
|
| 113 |
+
action: 1.0
|
| 114 |
+
phase: 0.1
|
| 115 |
+
arm_role: 0.15
|
| 116 |
+
support_mode: 0.1
|
| 117 |
+
corridor: 0.15
|
| 118 |
+
persistence: 0.05
|
| 119 |
+
disturbance: 0.05
|
| 120 |
+
world_model: 0.2
|
| 121 |
+
belief: 0.05
|
| 122 |
+
planner_success: 0.25
|
| 123 |
+
planner_risk: 0.1
|
| 124 |
+
planner_ranking: 0.2
|
| 125 |
+
proposal_reconstruction: 0.1
|
| 126 |
+
proposal_success: 0.15
|
| 127 |
+
proposal_ranking: 0.2
|
code/reveal_vla_bimanual/train/losses.py
CHANGED
|
@@ -34,18 +34,48 @@ def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | N
|
|
| 34 |
return loss.mean()
|
| 35 |
|
| 36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
|
| 38 |
losses = {}
|
| 39 |
if "phase_logits" in pred:
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
|
| 43 |
else:
|
| 44 |
losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 45 |
if "arm_role_logits" in pred:
|
| 46 |
-
|
| 47 |
-
role_target = torch.as_tensor([1, 2], device=pred["arm_role_logits"].device, dtype=torch.long)
|
| 48 |
-
role_target = role_target.unsqueeze(0).expand(batch_size, -1)
|
| 49 |
role_ce = F.cross_entropy(
|
| 50 |
pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
|
| 51 |
role_target.reshape(-1),
|
|
@@ -106,8 +136,9 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
|
|
| 106 |
"corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
|
| 107 |
"persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
|
| 108 |
"disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
|
|
|
|
| 109 |
}
|
| 110 |
-
|
| 111 |
F.cross_entropy(
|
| 112 |
pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
|
| 113 |
target_rollout["support_mode"].reshape(-1).long(),
|
|
@@ -119,6 +150,19 @@ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target
|
|
| 119 |
+ F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
|
| 120 |
+ F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
|
| 121 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
|
| 124 |
def compute_total_loss(
|
|
@@ -161,6 +205,7 @@ def compute_total_loss(
|
|
| 161 |
"corridor_feasible": batch["candidate_rollout_corridor_feasible"],
|
| 162 |
"persistence_horizon": batch["candidate_rollout_persistence_horizon"],
|
| 163 |
"disturbance_cost": batch["candidate_rollout_disturbance_cost"],
|
|
|
|
| 164 |
}
|
| 165 |
else:
|
| 166 |
rollout_target = {
|
|
@@ -168,6 +213,7 @@ def compute_total_loss(
|
|
| 168 |
"corridor_feasible": batch["rollout_corridor_feasible"],
|
| 169 |
"persistence_horizon": batch["rollout_persistence_horizon"],
|
| 170 |
"disturbance_cost": batch["rollout_disturbance_cost"],
|
|
|
|
| 171 |
}
|
| 172 |
world_model_loss = world_model_rollout_consistency_loss(
|
| 173 |
model_output["planned_rollout"],
|
|
@@ -199,6 +245,9 @@ def compute_total_loss(
|
|
| 199 |
ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
|
| 200 |
else:
|
| 201 |
ranking_loss = model_output["planner_scores"].new_tensor(0.0)
|
|
|
|
|
|
|
|
|
|
| 202 |
losses["planner_success"] = success_loss
|
| 203 |
losses["planner_risk"] = risk_loss
|
| 204 |
losses["planner_ranking"] = ranking_loss
|
|
@@ -259,6 +308,9 @@ def compute_total_loss(
|
|
| 259 |
].mean()
|
| 260 |
else:
|
| 261 |
proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
|
|
|
|
|
|
|
|
|
|
| 262 |
losses["proposal_success"] = proposal_success_loss
|
| 263 |
losses["proposal_ranking"] = proposal_ranking_loss
|
| 264 |
total = (
|
|
|
|
| 34 |
return loss.mean()
|
| 35 |
|
| 36 |
|
| 37 |
+
def _command_probability(command: Tensor) -> Tensor:
|
| 38 |
+
return (torch.tanh(command) + 1.0) * 0.5
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor:
|
| 42 |
+
open_cmd = action_chunk[..., 0]
|
| 43 |
+
actor_reach = _command_probability(action_chunk[..., 8])
|
| 44 |
+
retrieve_cmd = _command_probability(action_chunk[..., 13])
|
| 45 |
+
|
| 46 |
+
retrieve = retrieve_cmd >= 0.55
|
| 47 |
+
recover = open_cmd <= -0.10
|
| 48 |
+
reveal = open_cmd > 0.35
|
| 49 |
+
hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55)
|
| 50 |
+
|
| 51 |
+
phase_target = torch.zeros_like(open_cmd, dtype=torch.long)
|
| 52 |
+
phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target)
|
| 53 |
+
phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target)
|
| 54 |
+
phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target)
|
| 55 |
+
phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target)
|
| 56 |
+
return phase_target
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _role_targets_like(arm_role_logits: Tensor) -> Tensor:
|
| 60 |
+
role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long)
|
| 61 |
+
expand_shape = [1] * (arm_role_logits.ndim - 2) + [2]
|
| 62 |
+
return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1])
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
|
| 66 |
losses = {}
|
| 67 |
if "phase_logits" in pred:
|
| 68 |
+
action_chunk = target.get("action_chunk")
|
| 69 |
+
if action_chunk is not None:
|
| 70 |
+
phase_target = infer_phase_targets_from_actions(action_chunk[:, 0])
|
| 71 |
+
else:
|
| 72 |
+
phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
|
| 73 |
+
phase_target = phase_map[target["support_mode"].long()]
|
| 74 |
losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
|
| 75 |
else:
|
| 76 |
losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 77 |
if "arm_role_logits" in pred:
|
| 78 |
+
role_target = _role_targets_like(pred["arm_role_logits"])
|
|
|
|
|
|
|
| 79 |
role_ce = F.cross_entropy(
|
| 80 |
pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
|
| 81 |
role_target.reshape(-1),
|
|
|
|
| 136 |
"corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
|
| 137 |
"persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
|
| 138 |
"disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
|
| 139 |
+
"action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]),
|
| 140 |
}
|
| 141 |
+
loss = (
|
| 142 |
F.cross_entropy(
|
| 143 |
pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
|
| 144 |
target_rollout["support_mode"].reshape(-1).long(),
|
|
|
|
| 150 |
+ F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
|
| 151 |
+ F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
|
| 152 |
)
|
| 153 |
+
if "phase_logits" in pred_rollout:
|
| 154 |
+
phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"])
|
| 155 |
+
loss = loss + 0.5 * F.cross_entropy(
|
| 156 |
+
pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]),
|
| 157 |
+
phase_target.reshape(-1),
|
| 158 |
+
)
|
| 159 |
+
if "arm_role_logits" in pred_rollout:
|
| 160 |
+
role_target = _role_targets_like(pred_rollout["arm_role_logits"])
|
| 161 |
+
loss = loss + 0.25 * F.cross_entropy(
|
| 162 |
+
pred_rollout["arm_role_logits"].reshape(-1, pred_rollout["arm_role_logits"].shape[-1]),
|
| 163 |
+
role_target.reshape(-1),
|
| 164 |
+
)
|
| 165 |
+
return loss
|
| 166 |
|
| 167 |
|
| 168 |
def compute_total_loss(
|
|
|
|
| 205 |
"corridor_feasible": batch["candidate_rollout_corridor_feasible"],
|
| 206 |
"persistence_horizon": batch["candidate_rollout_persistence_horizon"],
|
| 207 |
"disturbance_cost": batch["candidate_rollout_disturbance_cost"],
|
| 208 |
+
"action_chunk": batch["candidate_action_chunks"],
|
| 209 |
}
|
| 210 |
else:
|
| 211 |
rollout_target = {
|
|
|
|
| 213 |
"corridor_feasible": batch["rollout_corridor_feasible"],
|
| 214 |
"persistence_horizon": batch["rollout_persistence_horizon"],
|
| 215 |
"disturbance_cost": batch["rollout_disturbance_cost"],
|
| 216 |
+
"action_chunk": batch["action_chunk"],
|
| 217 |
}
|
| 218 |
world_model_loss = world_model_rollout_consistency_loss(
|
| 219 |
model_output["planned_rollout"],
|
|
|
|
| 245 |
ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
|
| 246 |
else:
|
| 247 |
ranking_loss = model_output["planner_scores"].new_tensor(0.0)
|
| 248 |
+
oracle_target = utility_target.argmax(dim=-1)
|
| 249 |
+
oracle_loss = F.cross_entropy(model_output["planner_scores"], oracle_target)
|
| 250 |
+
ranking_loss = ranking_loss + 0.5 * oracle_loss
|
| 251 |
losses["planner_success"] = success_loss
|
| 252 |
losses["planner_risk"] = risk_loss
|
| 253 |
losses["planner_ranking"] = ranking_loss
|
|
|
|
| 308 |
].mean()
|
| 309 |
else:
|
| 310 |
proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
|
| 311 |
+
proposal_oracle_target = proposal_utility.argmax(dim=-1)
|
| 312 |
+
proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target)
|
| 313 |
+
proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss
|
| 314 |
losses["proposal_success"] = proposal_success_loss
|
| 315 |
losses["proposal_ranking"] = proposal_ranking_loss
|
| 316 |
total = (
|
code/reveal_vla_bimanual/train/run_experiment.py
CHANGED
|
@@ -61,6 +61,32 @@ def _loss_weights_from_omega(cfg: Any) -> LossWeights:
|
|
| 61 |
return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
|
| 62 |
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
|
| 65 |
dataset_path = data_cfg.get(f"{split}_dataset_path")
|
| 66 |
if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
|
|
@@ -138,6 +164,7 @@ def main() -> None:
|
|
| 138 |
trainer_config = _trainer_config_from_omega(cfg.trainer)
|
| 139 |
loss_weights = _loss_weights_from_omega(cfg.loss_weights)
|
| 140 |
model = build_policy(policy_config, trainer_config).to(device)
|
|
|
|
| 141 |
frozen_modules = apply_planner_mode(model, trainer_config)
|
| 142 |
trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
|
| 143 |
optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
|
|
@@ -166,6 +193,7 @@ def main() -> None:
|
|
| 166 |
"texts": moved["texts"],
|
| 167 |
"history_images": moved.get("history_images"),
|
| 168 |
"history_proprio": moved.get("history_proprio"),
|
|
|
|
| 169 |
}
|
| 170 |
if policy_supports_planning(trainer_config.policy_type):
|
| 171 |
forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
|
|
@@ -195,6 +223,7 @@ def main() -> None:
|
|
| 195 |
"state_dict": model.state_dict(),
|
| 196 |
"history": history,
|
| 197 |
"data_resolution": int(cfg.data.resolution),
|
|
|
|
| 198 |
},
|
| 199 |
best_checkpoint,
|
| 200 |
)
|
|
@@ -212,7 +241,9 @@ def main() -> None:
|
|
| 212 |
"num_val_samples": len(val_bundle["samples"]),
|
| 213 |
"planner_mode": trainer_config.planner_mode,
|
| 214 |
"frozen_modules": frozen_modules,
|
|
|
|
| 215 |
}
|
|
|
|
| 216 |
print(json.dumps(summary, indent=2))
|
| 217 |
|
| 218 |
|
|
|
|
| 61 |
return LossWeights(**OmegaConf.to_container(cfg, resolve=True))
|
| 62 |
|
| 63 |
|
| 64 |
+
def _load_init_checkpoint(model: torch.nn.Module, checkpoint_path: str | None, strict: bool) -> dict[str, Any] | None:
|
| 65 |
+
if not checkpoint_path:
|
| 66 |
+
return None
|
| 67 |
+
checkpoint = torch.load(Path(checkpoint_path), map_location="cpu", weights_only=False)
|
| 68 |
+
state_dict = checkpoint["state_dict"]
|
| 69 |
+
filtered_state_dict = state_dict
|
| 70 |
+
skipped_keys: list[str] = []
|
| 71 |
+
if not strict:
|
| 72 |
+
current_state = model.state_dict()
|
| 73 |
+
filtered_state_dict = {}
|
| 74 |
+
for key, value in state_dict.items():
|
| 75 |
+
current_value = current_state.get(key)
|
| 76 |
+
if current_value is None or current_value.shape != value.shape:
|
| 77 |
+
skipped_keys.append(key)
|
| 78 |
+
continue
|
| 79 |
+
filtered_state_dict[key] = value
|
| 80 |
+
incompatible = model.load_state_dict(filtered_state_dict, strict=strict)
|
| 81 |
+
return {
|
| 82 |
+
"path": str(checkpoint_path),
|
| 83 |
+
"loaded_keys": len(filtered_state_dict),
|
| 84 |
+
"skipped_shape_mismatch_keys": skipped_keys,
|
| 85 |
+
"missing_keys": list(incompatible.missing_keys),
|
| 86 |
+
"unexpected_keys": list(incompatible.unexpected_keys),
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
|
| 90 |
def _build_dataset_from_config(data_cfg: Any, split: str) -> dict[str, Any]:
|
| 91 |
dataset_path = data_cfg.get(f"{split}_dataset_path")
|
| 92 |
if dataset_path and Path(dataset_path).exists() and not data_cfg.get("rebuild_dataset", False):
|
|
|
|
| 164 |
trainer_config = _trainer_config_from_omega(cfg.trainer)
|
| 165 |
loss_weights = _loss_weights_from_omega(cfg.loss_weights)
|
| 166 |
model = build_policy(policy_config, trainer_config).to(device)
|
| 167 |
+
init_info = _load_init_checkpoint(model, cfg.get("init_checkpoint"), bool(cfg.get("init_strict", False)))
|
| 168 |
frozen_modules = apply_planner_mode(model, trainer_config)
|
| 169 |
trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
|
| 170 |
optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
|
|
|
|
| 193 |
"texts": moved["texts"],
|
| 194 |
"history_images": moved.get("history_images"),
|
| 195 |
"history_proprio": moved.get("history_proprio"),
|
| 196 |
+
"history_actions": moved.get("history_actions"),
|
| 197 |
}
|
| 198 |
if policy_supports_planning(trainer_config.policy_type):
|
| 199 |
forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
|
|
|
|
| 223 |
"state_dict": model.state_dict(),
|
| 224 |
"history": history,
|
| 225 |
"data_resolution": int(cfg.data.resolution),
|
| 226 |
+
"init_info": init_info,
|
| 227 |
},
|
| 228 |
best_checkpoint,
|
| 229 |
)
|
|
|
|
| 241 |
"num_val_samples": len(val_bundle["samples"]),
|
| 242 |
"planner_mode": trainer_config.planner_mode,
|
| 243 |
"frozen_modules": frozen_modules,
|
| 244 |
+
"init_info": init_info,
|
| 245 |
}
|
| 246 |
+
(output_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
|
| 247 |
print(json.dumps(summary, indent=2))
|
| 248 |
|
| 249 |
|
code/reveal_vla_bimanual/train/run_rlbench_experiment.py
CHANGED
|
@@ -133,6 +133,7 @@ def main() -> None:
|
|
| 133 |
"texts": moved["texts"],
|
| 134 |
"history_images": moved.get("history_images"),
|
| 135 |
"history_proprio": moved.get("history_proprio"),
|
|
|
|
| 136 |
}
|
| 137 |
if policy_supports_planning(trainer_config.policy_type):
|
| 138 |
forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
|
|
|
|
| 133 |
"texts": moved["texts"],
|
| 134 |
"history_images": moved.get("history_images"),
|
| 135 |
"history_proprio": moved.get("history_proprio"),
|
| 136 |
+
"history_actions": moved.get("history_actions"),
|
| 137 |
}
|
| 138 |
if policy_supports_planning(trainer_config.policy_type):
|
| 139 |
forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
|
code/reveal_vla_bimanual/train/smoke_checks.py
CHANGED
|
@@ -139,11 +139,13 @@ def _synthetic_rlbench_batch(
|
|
| 139 |
history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
|
| 140 |
proprio = torch.rand(batch_size, 32, device=device)
|
| 141 |
history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
|
|
|
|
| 142 |
action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
|
| 143 |
return {
|
| 144 |
"images": images,
|
| 145 |
"history_images": history_images,
|
| 146 |
"history_proprio": history_proprio,
|
|
|
|
| 147 |
"proprio": proprio,
|
| 148 |
"texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
|
| 149 |
"action_chunk": action_chunk,
|
|
@@ -207,6 +209,7 @@ def main() -> None:
|
|
| 207 |
texts=proxy_batch["texts"],
|
| 208 |
history_images=proxy_batch.get("history_images"),
|
| 209 |
history_proprio=proxy_batch.get("history_proprio"),
|
|
|
|
| 210 |
plan=True,
|
| 211 |
candidate_chunks_override=proxy_batch["candidate_action_chunks"],
|
| 212 |
)
|
|
@@ -245,6 +248,7 @@ def main() -> None:
|
|
| 245 |
texts=rlbench_batch["texts"],
|
| 246 |
history_images=rlbench_batch.get("history_images"),
|
| 247 |
history_proprio=rlbench_batch.get("history_proprio"),
|
|
|
|
| 248 |
plan=True,
|
| 249 |
)
|
| 250 |
_check_output_shapes(
|
|
|
|
| 139 |
history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
|
| 140 |
proprio = torch.rand(batch_size, 32, device=device)
|
| 141 |
history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
|
| 142 |
+
history_actions = torch.rand(batch_size, history_steps, 14, device=device)
|
| 143 |
action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
|
| 144 |
return {
|
| 145 |
"images": images,
|
| 146 |
"history_images": history_images,
|
| 147 |
"history_proprio": history_proprio,
|
| 148 |
+
"history_actions": history_actions,
|
| 149 |
"proprio": proprio,
|
| 150 |
"texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
|
| 151 |
"action_chunk": action_chunk,
|
|
|
|
| 209 |
texts=proxy_batch["texts"],
|
| 210 |
history_images=proxy_batch.get("history_images"),
|
| 211 |
history_proprio=proxy_batch.get("history_proprio"),
|
| 212 |
+
history_actions=proxy_batch.get("history_actions"),
|
| 213 |
plan=True,
|
| 214 |
candidate_chunks_override=proxy_batch["candidate_action_chunks"],
|
| 215 |
)
|
|
|
|
| 248 |
texts=rlbench_batch["texts"],
|
| 249 |
history_images=rlbench_batch.get("history_images"),
|
| 250 |
history_proprio=rlbench_batch.get("history_proprio"),
|
| 251 |
+
history_actions=rlbench_batch.get("history_actions"),
|
| 252 |
plan=True,
|
| 253 |
)
|
| 254 |
_check_output_shapes(
|
code/reveal_vla_bimanual/train/trainer.py
CHANGED
|
@@ -86,6 +86,7 @@ class BimanualTrainer:
|
|
| 86 |
"language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
|
| 87 |
"history_images": batch.get("history_images"),
|
| 88 |
"history_proprio": batch.get("history_proprio"),
|
|
|
|
| 89 |
}
|
| 90 |
if policy_supports_planning(self.config.policy_type):
|
| 91 |
forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
|
|
|
|
| 86 |
"language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
|
| 87 |
"history_images": batch.get("history_images"),
|
| 88 |
"history_proprio": batch.get("history_proprio"),
|
| 89 |
+
"history_actions": batch.get("history_actions"),
|
| 90 |
}
|
| 91 |
if policy_supports_planning(self.config.policy_type):
|
| 92 |
forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
|
environment/validate_same_machine.sh
CHANGED
|
@@ -14,6 +14,27 @@ RUNTIME_DIR="${ROOT_DIR}/runtime"
|
|
| 14 |
mkdir -p "${RUNTIME_DIR}"
|
| 15 |
chmod 700 "${RUNTIME_DIR}"
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
run_in_rlbench_env() {
|
| 18 |
local driver_version=""
|
| 19 |
local driver_branch=""
|
|
@@ -34,6 +55,8 @@ run_in_rlbench_env() {
|
|
| 34 |
"${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
|
| 35 |
}
|
| 36 |
|
|
|
|
|
|
|
| 37 |
echo "Display check"
|
| 38 |
DISPLAY="${DISPLAY}" glxinfo -B
|
| 39 |
|
|
|
|
| 14 |
mkdir -p "${RUNTIME_DIR}"
|
| 15 |
chmod 700 "${RUNTIME_DIR}"
|
| 16 |
|
| 17 |
+
ensure_rlbench_display() {
|
| 18 |
+
if DISPLAY="${DISPLAY}" xdpyinfo >/dev/null 2>&1; then
|
| 19 |
+
return 0
|
| 20 |
+
fi
|
| 21 |
+
|
| 22 |
+
local driver_version=""
|
| 23 |
+
local driver_branch=""
|
| 24 |
+
if command -v nvidia-smi >/dev/null 2>&1; then
|
| 25 |
+
driver_version="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 || true)"
|
| 26 |
+
driver_branch="${driver_version%%.*}"
|
| 27 |
+
fi
|
| 28 |
+
|
| 29 |
+
if [[ -n "${driver_branch}" && ! -f "${ROOT_DIR}/system_shims/nvidia${driver_branch}/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidia.so" ]]; then
|
| 30 |
+
echo "RLBench X shims missing; installing headless X prerequisites"
|
| 31 |
+
ROOT_DIR="${ROOT_DIR}" "${PROJECT_DIR}/scripts/setup_rlbench_headless_x.sh"
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
echo "Starting RLBench X server on ${DISPLAY}"
|
| 35 |
+
ROOT_DIR="${ROOT_DIR}" DISPLAY_NUM="${DISPLAY_NUM}" "${PROJECT_DIR}/scripts/start_rlbench_x.sh"
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
run_in_rlbench_env() {
|
| 39 |
local driver_version=""
|
| 40 |
local driver_branch=""
|
|
|
|
| 55 |
"${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
|
| 56 |
}
|
| 57 |
|
| 58 |
+
ensure_rlbench_display
|
| 59 |
+
|
| 60 |
echo "Display check"
|
| 61 |
DISPLAY="${DISPLAY}" glxinfo -B
|
| 62 |
|