lsnu commited on
Commit
8944de3
·
verified ·
1 Parent(s): bdab26f

Add files using upload-large-folder tool

Browse files
FILE_MANIFEST.txt CHANGED
@@ -1,166 +1,290 @@
1
- 32283 FILE_MANIFEST.txt
2
- 3195 MODEL_INDEX.md
3
- 6379 README.md
4
- 8489332 artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
5
- 806868 artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
6
- 8489300 artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
7
- 2800044 artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
8
- 264396 artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
9
- 2800012 artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
10
- 1184 artifacts/logs/system/rlbench_launch_smoke.txt
11
- 835 artifacts/logs/system/x99.conf
12
- 1011 artifacts/logs/system/x99.log
13
- 5 artifacts/logs/system/x99.pid
14
- 5071390 artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
15
- 1960 artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml
16
- 3506 artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json
17
- 802090604 artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
18
- 2072 artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml
19
- 1758 artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json
20
- 6836318 artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
21
- 1993 artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml
22
- 9016 artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json
23
- 826368812 artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
24
- 2103 artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml
25
- 4515 artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json
26
- 2914 artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json
27
- 1354 artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md
28
- 3219 artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json
29
- 1351 artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md
30
- 5831 artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json
31
- 2824 artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json
32
- 1351 artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md
33
- 2670 artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json
34
- 1354 artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md
35
- 798 artifacts/outputs/reveal_runs/reveal_eval_v4/reveal_benchmark.json
36
- 726 artifacts/outputs/reveal_runs/reveal_eval_v4/reveal_benchmark.md
37
- 848 artifacts/outputs/reveal_runs/reveal_eval_v4_clip_det/reveal_benchmark.json
38
- 736 artifacts/outputs/reveal_runs/reveal_eval_v4_clip_det/reveal_benchmark.md
39
- 732 artifacts/outputs/reveal_runs/reveal_eval_v4_det/reveal_benchmark.json
40
- 726 artifacts/outputs/reveal_runs/reveal_eval_v4_det/reveal_benchmark.md
41
- 1331678 artifacts/outputs/smoke/proxy_backbone_only_smoke/checkpoint_best.pt
42
- 2062 artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml
43
- 874 artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json
44
- 1865438 artifacts/outputs/smoke/proxy_reveal_state_smoke/checkpoint_best.pt
45
- 2061 artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml
46
- 2243 artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json
47
- 2713 artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json
48
- 1354 artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md
49
- 732 artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json
50
- 726 artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md
51
- 3033 artifacts/reports/reveal_ablation/ablations.json
52
- 1354 artifacts/reports/reveal_ablation/ablations.md
53
- 796 artifacts/reports/reveal_eval/reveal_benchmark.json
54
- 724 artifacts/reports/reveal_eval/reveal_benchmark.md
55
- 414 artifacts/reports/reveal_eval_noplan/reveal_benchmark.json
56
- 381 artifacts/reports/reveal_eval_noplan/reveal_benchmark.md
57
- 781 artifacts/reports/reveal_eval_progress/reveal_benchmark.json
58
- 724 artifacts/reports/reveal_eval_progress/reveal_benchmark.md
59
- 828 artifacts/reports/reveal_eval_v2/reveal_benchmark.json
60
- 725 artifacts/reports/reveal_eval_v2/reveal_benchmark.md
61
- 788 artifacts/reports/rlbench_custom/backbone_only_rollout/rollout_eval.json
62
- 408 artifacts/reports/rlbench_custom/backbone_only_rollout/rollout_eval.md
63
- 787 artifacts/reports/rlbench_custom/reveal_state_rollout_noplan/rollout_eval.json
64
- 407 artifacts/reports/rlbench_custom/reveal_state_rollout_noplan/rollout_eval.md
65
- 786 artifacts/reports/rlbench_custom/reveal_state_rollout_plan/rollout_eval.json
66
- 406 artifacts/reports/rlbench_custom/reveal_state_rollout_plan/rollout_eval.md
67
- 2239 artifacts/reports/rlbench_custom/rlbench_subset3_custom_eval.md
68
- 787 artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json
69
- 407 artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md
70
- 786 artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json
71
- 406 artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.md
72
- 785 artifacts/reports/rlbench_custom_clip/reveal_state_rollout_plan/rollout_eval.json
73
- 405 artifacts/reports/rlbench_custom_clip/reveal_state_rollout_plan/rollout_eval.md
74
- 2423 artifacts/reports/rlbench_custom_clip/rlbench_subset3_clip_eval.md
75
- 2019 artifacts/reports/rlbench_subset3_baseline_sanity.md
76
- 119 code/reveal_vla_bimanual/.gitignore
77
- 3500 code/reveal_vla_bimanual/README.md
78
- 1065 code/reveal_vla_bimanual/docs/upstream_pins.md
79
- 836 code/reveal_vla_bimanual/docs/xorg.rtx6000.conf
80
- 154 code/reveal_vla_bimanual/envs/mambafi1vu4sqxyk
81
- 635 code/reveal_vla_bimanual/envs/reveal310.yaml
82
- 930 code/reveal_vla_bimanual/envs/rlbench310.yaml
83
- 74 code/reveal_vla_bimanual/eval/__init__.py
84
- 205 code/reveal_vla_bimanual/eval/ablations.py
85
- 1583 code/reveal_vla_bimanual/eval/metrics.py
86
- 1617 code/reveal_vla_bimanual/eval/report.py
87
- 3886 code/reveal_vla_bimanual/eval/run_ablations.py
88
- 11796 code/reveal_vla_bimanual/eval/run_reveal_benchmark.py
89
- 10854 code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py
90
- 826 code/reveal_vla_bimanual/models/__init__.py
91
- 6628 code/reveal_vla_bimanual/models/action_decoder.py
92
- 5422 code/reveal_vla_bimanual/models/backbones.py
93
- 2077 code/reveal_vla_bimanual/models/multiview_fusion.py
94
- 1870 code/reveal_vla_bimanual/models/observation_memory.py
95
- 3278 code/reveal_vla_bimanual/models/planner.py
96
- 9128 code/reveal_vla_bimanual/models/policy.py
97
- 5373 code/reveal_vla_bimanual/models/reveal_head.py
98
- 3220 code/reveal_vla_bimanual/models/world_model.py
99
- 780 code/reveal_vla_bimanual/pyproject.toml
100
- 59 code/reveal_vla_bimanual/pytorch3d/__init__.py
101
- 3940 code/reveal_vla_bimanual/pytorch3d/transforms.py
102
- 1660 code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO
103
- 844 code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt
104
- 1 code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/dependency_links.txt
105
- 236 code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/requires.txt
106
- 51 code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/top_level.txt
107
- 967 code/reveal_vla_bimanual/scripts/download_coppeliasim.sh
108
- 800 code/reveal_vla_bimanual/scripts/install_micromamba.sh
109
- 3400 code/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
110
- 1909 code/reveal_vla_bimanual/scripts/setup_env_b_reveal.sh
111
- 2086 code/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
112
- 3116 code/reveal_vla_bimanual/scripts/start_rlbench_x.sh
113
- 456 code/reveal_vla_bimanual/sim_reveal/__init__.py
114
- 689 code/reveal_vla_bimanual/sim_reveal/base.py
115
- 11788 code/reveal_vla_bimanual/sim_reveal/dataset.py
116
- 1633 code/reveal_vla_bimanual/sim_reveal/generate_dataset.py
117
- 665 code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py
118
- 358 code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py
119
- 2198 code/reveal_vla_bimanual/sim_reveal/labels.py
120
- 31458 code/reveal_vla_bimanual/sim_reveal/procedural_envs.py
121
- 960 code/reveal_vla_bimanual/sim_reveal/proxy_specs.py
122
- 1899 code/reveal_vla_bimanual/sim_reveal/teachers.py
123
- 335 code/reveal_vla_bimanual/sim_rlbench/__init__.py
124
- 885 code/reveal_vla_bimanual/sim_rlbench/camera_spec.py
125
- 13817 code/reveal_vla_bimanual/sim_rlbench/dataset.py
126
- 4368 code/reveal_vla_bimanual/sim_rlbench/dataset_download.py
127
- 4698 code/reveal_vla_bimanual/sim_rlbench/generate_smoke_dataset.py
128
- 3232 code/reveal_vla_bimanual/sim_rlbench/launch_smoke.py
129
- 3025 code/reveal_vla_bimanual/sim_rlbench/obs_adapter.py
130
- 4525 code/reveal_vla_bimanual/sim_rlbench/peract2_runner.py
131
- 1410 code/reveal_vla_bimanual/sim_rlbench/smoke_test.py
132
- 523 code/reveal_vla_bimanual/sim_rlbench/task_splits.py
133
- 78 code/reveal_vla_bimanual/train/__init__.py
134
- 299 code/reveal_vla_bimanual/train/configs/base.yaml
135
- 213 code/reveal_vla_bimanual/train/configs/data/reveal_proxies.yaml
136
- 193 code/reveal_vla_bimanual/train/configs/data/rlbench_3cam.yaml
137
- 517 code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml
138
- 1014 code/reveal_vla_bimanual/train/configs/model/reveal_state.yaml
139
- 1957 code/reveal_vla_bimanual/train/configs/proxy_backbone_only.yaml
140
- 2069 code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
141
- 2059 code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
142
- 1990 code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
143
- 2100 code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
144
- 2058 code/reveal_vla_bimanual/train/configs/proxy_reveal_state_smoke.yaml
145
- 1921 code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip.yaml
146
- 1914 code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_dummy.yaml
147
- 2019 code/reveal_vla_bimanual/train/configs/rlbench_subset3_reveal_state_clip.yaml
148
- 2012 code/reveal_vla_bimanual/train/configs/rlbench_subset3_reveal_state_dummy.yaml
149
- 8452 code/reveal_vla_bimanual/train/losses.py
150
- 8870 code/reveal_vla_bimanual/train/run_experiment.py
151
- 7809 code/reveal_vla_bimanual/train/run_rlbench_experiment.py
152
- 2945 code/reveal_vla_bimanual/train/trainer.py
153
- 13139 code/upstream_local_patches/YARR/yarr/runners/_independent_env_runner.py
154
- 40316 code/upstream_local_patches/peract_bimanual/agents/bimanual_peract/qattention_peract_bc_agent.py
155
- 1501 environment/README.md
156
- 855 environment/glxinfo_B.txt
157
- 5417 environment/hardware_snapshot.txt
158
- 24531 environment/reveal_env_explicit.txt
159
- 13289 environment/reveal_env_export.yaml
160
- 7333 environment/reveal_pip_freeze.txt
161
- 25057 environment/rlbench_env_explicit.txt
162
- 11542 environment/rlbench_env_export.yaml
163
- 6265 environment/rlbench_pip_freeze.txt
164
- 2775 environment/setup_same_machine.sh
165
- 533 environment/upstream_revisions.txt
166
- 27789 export_manifest_data.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./.gitattributes
2
+ ./FILE_MANIFEST.txt
3
+ ./MODEL_INDEX.md
4
+ ./README.md
5
+ ./artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
6
+ ./artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
7
+ ./artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
8
+ ./artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
9
+ ./artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
10
+ ./artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
11
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0000.png
12
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/front_rgb/rgb_0001.png
13
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/low_dim_obs.pkl
14
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/variation_descriptions.pkl
15
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/variation_number.pkl
16
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/wrist_left_rgb/rgb_0000.png
17
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/wrist_left_rgb/rgb_0001.png
18
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/wrist_right_rgb/rgb_0000.png
19
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode0/wrist_right_rgb/rgb_0001.png
20
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/front_rgb/rgb_0000.png
21
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/front_rgb/rgb_0001.png
22
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/low_dim_obs.pkl
23
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/variation_descriptions.pkl
24
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/variation_number.pkl
25
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/wrist_left_rgb/rgb_0000.png
26
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/wrist_left_rgb/rgb_0001.png
27
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/wrist_right_rgb/rgb_0000.png
28
+ ./artifacts/data/rlbench_smoke_open_drawer/open_drawer/all_variations/episodes/episode1/wrist_right_rgb/rgb_0001.png
29
+ ./artifacts/logs/glxgears.log
30
+ ./artifacts/logs/isaac_smoke.log
31
+ ./artifacts/logs/rlbench2_dataset_train.log
32
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/.hydra/config.yaml
33
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/.hydra/hydra.yaml
34
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/.hydra/overrides.yaml
35
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/seed0/config.yaml
36
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/seed0/training.log
37
+ ./artifacts/logs/rlbench2_smoke_train/bimanual_lift_ball_smoke/BIMANUAL_PERACT/train.log
38
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/.hydra/config.yaml
39
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/.hydra/hydra.yaml
40
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/.hydra/overrides.yaml
41
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/config.yaml
42
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229659.e52be9725b2e.1397.0
43
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229898.e52be9725b2e.3678.0
44
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774229987.e52be9725b2e.3804.0
45
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230133.e52be9725b2e.3957.0
46
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230462.e52be9725b2e.4130.0
47
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/training.log
48
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt
49
+ ./artifacts/logs/rlbench2_subset/multi_3cam_subset/BIMANUAL_PERACT/train.log
50
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/.hydra/config.yaml
51
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/.hydra/hydra.yaml
52
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/.hydra/overrides.yaml
53
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/config.yaml
54
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/eval_data.csv
55
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774230948.e52be9725b2e.4400.0
56
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231048.e52be9725b2e.4582.0
57
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231173.e52be9725b2e.4703.0
58
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231301.e52be9725b2e.4824.0
59
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/events.out.tfevents.1774231438.e52be9725b2e.4951.0
60
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/training.log
61
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/seed0/weights/0/QAttentionAgent_layer0.pt
62
+ ./artifacts/logs/rlbench2_subset3/multi_3cam_subset3/BIMANUAL_PERACT/train.log
63
+ ./artifacts/logs/rlbench_extract/bimanual_lift_ball.log
64
+ ./artifacts/logs/rlbench_extract/bimanual_lift_ball_slice.log
65
+ ./artifacts/logs/system/rlbench_launch_smoke.txt
66
+ ./artifacts/logs/system/x99.conf
67
+ ./artifacts/logs/system/x99.log
68
+ ./artifacts/logs/system/x99.pid
69
+ ./artifacts/logs/x99-custom.log
70
+ ./artifacts/logs/x99.log
71
+ ./artifacts/logs/x99.pid
72
+ ./artifacts/outputs/interaction/proxy_interaction_state/checkpoint_best.pt
73
+ ./artifacts/outputs/interaction/proxy_interaction_state/config_resolved.yaml
74
+ ./artifacts/outputs/interaction/proxy_interaction_state/diagnostics/proxy_diagnostics.json
75
+ ./artifacts/outputs/interaction/proxy_interaction_state/metrics.json
76
+ ./artifacts/outputs/interaction/proxy_interaction_state_smoke/checkpoint_best.pt
77
+ ./artifacts/outputs/interaction/proxy_interaction_state_smoke/config_resolved.yaml
78
+ ./artifacts/outputs/interaction/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
79
+ ./artifacts/outputs/interaction/proxy_interaction_state_smoke/metrics.json
80
+ ./artifacts/outputs/interaction/proxy_reveal_state_full_diagnostics/proxy_diagnostics.json
81
+ ./artifacts/outputs/interaction/proxy_reveal_state_smoke_diagnostics/proxy_diagnostics.json
82
+ ./artifacts/outputs/interaction/reveal_ablation_interaction_ep1/ablations.json
83
+ ./artifacts/outputs/interaction/reveal_ablation_interaction_ep1/ablations.md
84
+ ./artifacts/outputs/interaction/reveal_ablation_interaction_ep1/ablations.partial.json
85
+ ./artifacts/outputs/interaction/reveal_eval_interaction_ep2/reveal_benchmark.json
86
+ ./artifacts/outputs/interaction/reveal_eval_interaction_ep2/reveal_benchmark.md
87
+ ./artifacts/outputs/interaction/reveal_eval_interaction_full_ep2/reveal_benchmark.json
88
+ ./artifacts/outputs/interaction/reveal_eval_interaction_full_ep2/reveal_benchmark.md
89
+ ./artifacts/outputs/interaction/rlbench_env_smoke/import_and_train_command.txt
90
+ ./artifacts/outputs/interaction/rlbench_env_smoke/import_smoke.json
91
+ ./artifacts/outputs/interaction/rlbench_env_smoke/launch_reset_smoke.json
92
+ ./artifacts/outputs/interaction/rlbench_env_smoke/launch_smoke_cli.json
93
+ ./artifacts/outputs/interaction/rlbench_env_smoke/launch_step_smoke.json
94
+ ./artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt
95
+ ./artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/config_resolved.yaml
96
+ ./artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/metrics.json
97
+ ./artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/summary.json
98
+ ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json
99
+ ./artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md
100
+ ./artifacts/outputs/interaction/smoke_checks/smoke_checks.json
101
+ ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt
102
+ ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/config_resolved.yaml
103
+ ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json
104
+ ./artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/metrics.json
105
+ ./artifacts/outputs/interaction_rolefix/reveal_eval_interaction_smoke/reveal_benchmark.json
106
+ ./artifacts/outputs/interaction_rolefix/reveal_eval_interaction_smoke/reveal_benchmark.md
107
+ ./artifacts/outputs/interaction_rolefix/smoke_checks/smoke_checks.json
108
+ ./artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/checkpoint_best.pt
109
+ ./artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/config_resolved.yaml
110
+ ./artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/diagnostics/proxy_diagnostics.json
111
+ ./artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/metrics.json
112
+ ./artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json
113
+ ./artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.md
114
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
115
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml
116
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json
117
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
118
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml
119
+ ./artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json
120
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
121
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml
122
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json
123
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
124
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml
125
+ ./artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json
126
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json
127
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md
128
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json
129
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md
130
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json
131
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json
132
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md
133
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json
134
+ ./artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md
135
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4/reveal_benchmark.json
136
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4/reveal_benchmark.md
137
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4_clip_det/reveal_benchmark.json
138
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4_clip_det/reveal_benchmark.md
139
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4_det/reveal_benchmark.json
140
+ ./artifacts/outputs/reveal_runs/reveal_eval_v4_det/reveal_benchmark.md
141
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt
142
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml
143
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json
144
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json
145
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt
146
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml
147
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json
148
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json
149
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt
150
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml
151
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json
152
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json
153
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt
154
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml
155
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json
156
+ ./artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json
157
+ ./artifacts/outputs/smoke/proxy_backbone_only_smoke/checkpoint_best.pt
158
+ ./artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml
159
+ ./artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json
160
+ ./artifacts/outputs/smoke/proxy_reveal_state_smoke/checkpoint_best.pt
161
+ ./artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml
162
+ ./artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json
163
+ ./artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json
164
+ ./artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md
165
+ ./artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json
166
+ ./artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md
167
+ ./artifacts/reports/reveal_ablation/ablations.json
168
+ ./artifacts/reports/reveal_ablation/ablations.md
169
+ ./artifacts/reports/reveal_eval/reveal_benchmark.json
170
+ ./artifacts/reports/reveal_eval/reveal_benchmark.md
171
+ ./artifacts/reports/reveal_eval_noplan/reveal_benchmark.json
172
+ ./artifacts/reports/reveal_eval_noplan/reveal_benchmark.md
173
+ ./artifacts/reports/reveal_eval_progress/reveal_benchmark.json
174
+ ./artifacts/reports/reveal_eval_progress/reveal_benchmark.md
175
+ ./artifacts/reports/reveal_eval_v2/reveal_benchmark.json
176
+ ./artifacts/reports/reveal_eval_v2/reveal_benchmark.md
177
+ ./artifacts/reports/rlbench_custom/backbone_only_rollout/rollout_eval.json
178
+ ./artifacts/reports/rlbench_custom/backbone_only_rollout/rollout_eval.md
179
+ ./artifacts/reports/rlbench_custom/reveal_state_rollout_noplan/rollout_eval.json
180
+ ./artifacts/reports/rlbench_custom/reveal_state_rollout_noplan/rollout_eval.md
181
+ ./artifacts/reports/rlbench_custom/reveal_state_rollout_plan/rollout_eval.json
182
+ ./artifacts/reports/rlbench_custom/reveal_state_rollout_plan/rollout_eval.md
183
+ ./artifacts/reports/rlbench_custom/rlbench_subset3_custom_eval.md
184
+ ./artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json
185
+ ./artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md
186
+ ./artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json
187
+ ./artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.md
188
+ ./artifacts/reports/rlbench_custom_clip/reveal_state_rollout_plan/rollout_eval.json
189
+ ./artifacts/reports/rlbench_custom_clip/reveal_state_rollout_plan/rollout_eval.md
190
+ ./artifacts/reports/rlbench_custom_clip/rlbench_subset3_clip_eval.md
191
+ ./artifacts/reports/rlbench_subset3_baseline_sanity.md
192
+ ./code/reveal_vla_bimanual/.gitignore
193
+ ./code/reveal_vla_bimanual/README.md
194
+ ./code/reveal_vla_bimanual/docs/upstream_pins.md
195
+ ./code/reveal_vla_bimanual/docs/xorg.rtx6000.conf
196
+ ./code/reveal_vla_bimanual/envs/reveal310.yaml
197
+ ./code/reveal_vla_bimanual/envs/rlbench310.yaml
198
+ ./code/reveal_vla_bimanual/eval/__init__.py
199
+ ./code/reveal_vla_bimanual/eval/ablations.py
200
+ ./code/reveal_vla_bimanual/eval/metrics.py
201
+ ./code/reveal_vla_bimanual/eval/report.py
202
+ ./code/reveal_vla_bimanual/eval/run_ablations.py
203
+ ./code/reveal_vla_bimanual/eval/run_proxy_diagnostics.py
204
+ ./code/reveal_vla_bimanual/eval/run_reveal_benchmark.py
205
+ ./code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py
206
+ ./code/reveal_vla_bimanual/models/__init__.py
207
+ ./code/reveal_vla_bimanual/models/action_decoder.py
208
+ ./code/reveal_vla_bimanual/models/backbones.py
209
+ ./code/reveal_vla_bimanual/models/multiview_fusion.py
210
+ ./code/reveal_vla_bimanual/models/observation_memory.py
211
+ ./code/reveal_vla_bimanual/models/planner.py
212
+ ./code/reveal_vla_bimanual/models/policy.py
213
+ ./code/reveal_vla_bimanual/models/reveal_head.py
214
+ ./code/reveal_vla_bimanual/models/world_model.py
215
+ ./code/reveal_vla_bimanual/pyproject.toml
216
+ ./code/reveal_vla_bimanual/pytorch3d/__init__.py
217
+ ./code/reveal_vla_bimanual/pytorch3d/transforms.py
218
+ ./code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO
219
+ ./code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt
220
+ ./code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/dependency_links.txt
221
+ ./code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/requires.txt
222
+ ./code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/top_level.txt
223
+ ./code/reveal_vla_bimanual/scripts/download_coppeliasim.sh
224
+ ./code/reveal_vla_bimanual/scripts/install_micromamba.sh
225
+ ./code/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
226
+ ./code/reveal_vla_bimanual/scripts/setup_env_b_reveal.sh
227
+ ./code/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
228
+ ./code/reveal_vla_bimanual/scripts/start_rlbench_x.sh
229
+ ./code/reveal_vla_bimanual/sim_reveal/__init__.py
230
+ ./code/reveal_vla_bimanual/sim_reveal/base.py
231
+ ./code/reveal_vla_bimanual/sim_reveal/dataset.py
232
+ ./code/reveal_vla_bimanual/sim_reveal/generate_dataset.py
233
+ ./code/reveal_vla_bimanual/sim_reveal/isaac_smoke.py
234
+ ./code/reveal_vla_bimanual/sim_reveal/isaac_wrapper.py
235
+ ./code/reveal_vla_bimanual/sim_reveal/labels.py
236
+ ./code/reveal_vla_bimanual/sim_reveal/procedural_envs.py
237
+ ./code/reveal_vla_bimanual/sim_reveal/proxy_specs.py
238
+ ./code/reveal_vla_bimanual/sim_reveal/teachers.py
239
+ ./code/reveal_vla_bimanual/sim_rlbench/__init__.py
240
+ ./code/reveal_vla_bimanual/sim_rlbench/camera_spec.py
241
+ ./code/reveal_vla_bimanual/sim_rlbench/dataset.py
242
+ ./code/reveal_vla_bimanual/sim_rlbench/dataset_download.py
243
+ ./code/reveal_vla_bimanual/sim_rlbench/generate_smoke_dataset.py
244
+ ./code/reveal_vla_bimanual/sim_rlbench/launch_smoke.py
245
+ ./code/reveal_vla_bimanual/sim_rlbench/obs_adapter.py
246
+ ./code/reveal_vla_bimanual/sim_rlbench/peract2_runner.py
247
+ ./code/reveal_vla_bimanual/sim_rlbench/smoke_test.py
248
+ ./code/reveal_vla_bimanual/sim_rlbench/task_splits.py
249
+ ./code/reveal_vla_bimanual/train/__init__.py
250
+ ./code/reveal_vla_bimanual/train/configs/base.yaml
251
+ ./code/reveal_vla_bimanual/train/configs/data/reveal_proxies.yaml
252
+ ./code/reveal_vla_bimanual/train/configs/data/rlbench_3cam.yaml
253
+ ./code/reveal_vla_bimanual/train/configs/model/backbone_only.yaml
254
+ ./code/reveal_vla_bimanual/train/configs/model/reveal_state.yaml
255
+ ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only.yaml
256
+ ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_clip.yaml
257
+ ./code/reveal_vla_bimanual/train/configs/proxy_backbone_only_smoke.yaml
258
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml
259
+ ./code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml
260
+ ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state.yaml
261
+ ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_clip.yaml
262
+ ./code/reveal_vla_bimanual/train/configs/proxy_reveal_state_smoke.yaml
263
+ ./code/reveal_vla_bimanual/train/configs/rlbench_open_drawer_interaction_smoke.yaml
264
+ ./code/reveal_vla_bimanual/train/configs/rlbench_peract2_13_interaction_state_dummy.yaml
265
+ ./code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_clip.yaml
266
+ ./code/reveal_vla_bimanual/train/configs/rlbench_subset3_backbone_only_dummy.yaml
267
+ ./code/reveal_vla_bimanual/train/configs/rlbench_subset3_interaction_state_dummy.yaml
268
+ ./code/reveal_vla_bimanual/train/configs/rlbench_subset3_reveal_state_clip.yaml
269
+ ./code/reveal_vla_bimanual/train/configs/rlbench_subset3_reveal_state_dummy.yaml
270
+ ./code/reveal_vla_bimanual/train/losses.py
271
+ ./code/reveal_vla_bimanual/train/run_experiment.py
272
+ ./code/reveal_vla_bimanual/train/run_rlbench_experiment.py
273
+ ./code/reveal_vla_bimanual/train/smoke_checks.py
274
+ ./code/reveal_vla_bimanual/train/trainer.py
275
+ ./code/upstream_local_patches/YARR/yarr/runners/_independent_env_runner.py
276
+ ./code/upstream_local_patches/peract_bimanual/agents/bimanual_peract/qattention_peract_bc_agent.py
277
+ ./environment/README.md
278
+ ./environment/glxinfo_B.txt
279
+ ./environment/hardware_snapshot.txt
280
+ ./environment/reveal_env_explicit.txt
281
+ ./environment/reveal_env_export.yaml
282
+ ./environment/reveal_pip_freeze.txt
283
+ ./environment/rlbench_env_explicit.txt
284
+ ./environment/rlbench_env_export.yaml
285
+ ./environment/rlbench_pip_freeze.txt
286
+ ./environment/setup_same_machine.sh
287
+ ./environment/system_packages_same_machine.txt
288
+ ./environment/upstream_revisions.txt
289
+ ./environment/validate_same_machine.sh
290
+ ./export_manifest_data.json
MODEL_INDEX.md CHANGED
@@ -58,6 +58,66 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
58
  - CLIP 18-episode ablation partial checkpoint
59
  - `artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json`
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  ## Smoke Artifacts
62
 
63
  - proxy backbone-only smoke checkpoint
@@ -79,5 +139,9 @@ This file lists the uploaded checkpoints, datasets, and raw report files referen
79
  - `environment/glxinfo_B.txt`
80
  - third-party pins
81
  - `environment/upstream_revisions.txt`
 
 
82
  - same-machine setup helper
83
  - `environment/setup_same_machine.sh`
 
 
 
58
  - CLIP 18-episode ablation partial checkpoint
59
  - `artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json`
60
 
61
+ ## Interaction-State Outputs
62
+
63
+ - interaction-state smoke checkpoint
64
+ - `artifacts/outputs/interaction/proxy_interaction_state_smoke/checkpoint_best.pt`
65
+ - interaction-state smoke metrics
66
+ - `artifacts/outputs/interaction/proxy_interaction_state_smoke/metrics.json`
67
+ - interaction-state smoke diagnostics
68
+ - `artifacts/outputs/interaction/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json`
69
+ - interaction-state smoke benchmark JSON
70
+ - `artifacts/outputs/interaction/reveal_eval_interaction_ep2/reveal_benchmark.json`
71
+ - interaction-state full checkpoint
72
+ - `artifacts/outputs/interaction/proxy_interaction_state/checkpoint_best.pt`
73
+ - interaction-state full metrics
74
+ - `artifacts/outputs/interaction/proxy_interaction_state/metrics.json`
75
+ - interaction-state full diagnostics
76
+ - `artifacts/outputs/interaction/proxy_interaction_state/diagnostics/proxy_diagnostics.json`
77
+ - interaction-state full benchmark JSON
78
+ - `artifacts/outputs/interaction/reveal_eval_interaction_full_ep2/reveal_benchmark.json`
79
+ - interaction-state ablation JSON
80
+ - `artifacts/outputs/interaction/reveal_ablation_interaction_ep1/ablations.json`
81
+ - interaction-state rolefix smoke checkpoint
82
+ - `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt`
83
+ - interaction-state rolefix smoke metrics
84
+ - `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/metrics.json`
85
+ - interaction-state rolefix smoke diagnostics
86
+ - `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json`
87
+ - interaction-state rolefix smoke benchmark JSON
88
+ - `artifacts/outputs/interaction_rolefix/reveal_eval_interaction_smoke/reveal_benchmark.json`
89
+ - interaction-state rolefix full checkpoint
90
+ - `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/checkpoint_best.pt`
91
+ - interaction-state rolefix full metrics
92
+ - `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/metrics.json`
93
+ - interaction-state rolefix full diagnostics
94
+ - `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/diagnostics/proxy_diagnostics.json`
95
+ - interaction-state rolefix full benchmark JSON
96
+ - `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json`
97
+
98
+ ## RLBench Two-Robot Smoke Outputs
99
+
100
+ - import smoke JSON
101
+ - `artifacts/outputs/interaction/rlbench_env_smoke/import_smoke.json`
102
+ - launch reset smoke JSON
103
+ - `artifacts/outputs/interaction/rlbench_env_smoke/launch_reset_smoke.json`
104
+ - launch step smoke JSON
105
+ - `artifacts/outputs/interaction/rlbench_env_smoke/launch_step_smoke.json`
106
+ - launch smoke CLI JSON
107
+ - `artifacts/outputs/interaction/rlbench_env_smoke/launch_smoke_cli.json`
108
+ - train command export
109
+ - `artifacts/outputs/interaction/rlbench_env_smoke/import_and_train_command.txt`
110
+ - RLBench open_drawer smoke checkpoint
111
+ - `artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt`
112
+ - RLBench open_drawer smoke summary
113
+ - `artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/summary.json`
114
+ - RLBench open_drawer rollout eval JSON
115
+ - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json`
116
+ - RLBench open_drawer rollout eval Markdown
117
+ - `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.md`
118
+ - RLBench smoke dataset root
119
+ - `artifacts/data/rlbench_smoke_open_drawer/`
120
+
121
  ## Smoke Artifacts
122
 
123
  - proxy backbone-only smoke checkpoint
 
139
  - `environment/glxinfo_B.txt`
140
  - third-party pins
141
  - `environment/upstream_revisions.txt`
142
+ - same-machine system packages
143
+ - `environment/system_packages_same_machine.txt`
144
  - same-machine setup helper
145
  - `environment/setup_same_machine.sh`
146
+ - same-machine validation helper
147
+ - `environment/validate_same_machine.sh`
README.md CHANGED
@@ -1,6 +1,6 @@
1
  # VLAarchtests
2
 
3
- Bundle uploaded from the `/workspace` runpod session dated `2026-03-23`.
4
 
5
  ## Contents
6
 
@@ -31,6 +31,19 @@ Raw machine outputs are in:
31
  - `environment/hardware_snapshot.txt`
32
  - `environment/glxinfo_B.txt`
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  ## Raw Training Endpoints
35
 
36
  | Run | Checkpoint | Final train total | Final val total | Metrics |
@@ -74,6 +87,67 @@ Source: `artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.js
74
  | no_wrist_cameras | 0.35185185185185186 | 0.3333333333333333 | 0.3888888888888889 | 0.3333333333333333 | 41.34216132428911 | 0.8971193510073202 | 0.0011574074074074073 | 0.9659118890357264 | 0.6302977896950863 |
75
  | no_global_camera | 0.35185185185185186 | 0.3333333333333333 | 0.3888888888888889 | 0.3333333333333333 | 41.33038121020353 | 0.8943758684175985 | 0.002777777777777778 | 0.9659084288095618 | 0.6303076523321646 |
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  ## RLBench Headless Smoke
78
 
79
  The headless X server files used on this node are:
@@ -92,6 +166,7 @@ The smoke output file is:
92
  - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
93
  - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
94
  - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
 
95
 
96
  ## Recreate The Same Software Layout
97
 
@@ -99,6 +174,7 @@ Use:
99
 
100
  ```bash
101
  /workspace/VLAarchtests/environment/setup_same_machine.sh
 
102
  ```
103
 
104
  That helper:
@@ -113,6 +189,7 @@ That helper:
113
  Raw setup metadata is in:
114
 
115
  - `environment/upstream_revisions.txt`
 
116
  - `environment/rlbench_env_export.yaml`
117
  - `environment/rlbench_env_explicit.txt`
118
  - `environment/rlbench_pip_freeze.txt`
 
1
  # VLAarchtests
2
 
3
+ Bundle uploaded from the `/workspace` runpod session dated `2026-03-24`.
4
 
5
  ## Contents
6
 
 
31
  - `environment/hardware_snapshot.txt`
32
  - `environment/glxinfo_B.txt`
33
 
34
+ ## Same-Machine Setup Files
35
+
36
+ - `environment/setup_same_machine.sh`
37
+ - `environment/validate_same_machine.sh`
38
+ - `environment/system_packages_same_machine.txt`
39
+ - `environment/upstream_revisions.txt`
40
+ - `environment/rlbench_env_export.yaml`
41
+ - `environment/rlbench_env_explicit.txt`
42
+ - `environment/rlbench_pip_freeze.txt`
43
+ - `environment/reveal_env_export.yaml`
44
+ - `environment/reveal_env_explicit.txt`
45
+ - `environment/reveal_pip_freeze.txt`
46
+
47
  ## Raw Training Endpoints
48
 
49
  | Run | Checkpoint | Final train total | Final val total | Metrics |
 
87
  | no_wrist_cameras | 0.35185185185185186 | 0.3333333333333333 | 0.3888888888888889 | 0.3333333333333333 | 41.34216132428911 | 0.8971193510073202 | 0.0011574074074074073 | 0.9659118890357264 | 0.6302977896950863 |
88
  | no_global_camera | 0.35185185185185186 | 0.3333333333333333 | 0.3888888888888889 | 0.3333333333333333 | 41.33038121020353 | 0.8943758684175985 | 0.002777777777777778 | 0.9659084288095618 | 0.6303076523321646 |
89
 
90
+ ## Raw Interaction-State Training Endpoints
91
+
92
+ | Run | Checkpoint | Final train total | Final val total | Metrics |
93
+ | --- | --- | ---: | ---: | --- |
94
+ | interaction-state smoke | `artifacts/outputs/interaction/proxy_interaction_state_smoke/checkpoint_best.pt` | 1.2046506234577723 | 1.1171032786369324 | `artifacts/outputs/interaction/proxy_interaction_state_smoke/metrics.json` |
95
+ | interaction-state full | `artifacts/outputs/interaction/proxy_interaction_state/checkpoint_best.pt` | 1.0910143380363782 | 1.1184726804494858 | `artifacts/outputs/interaction/proxy_interaction_state/metrics.json` |
96
+ | interaction-state rolefix smoke | `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/checkpoint_best.pt` | 1.205997347831726 | 1.130429446697235 | `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/metrics.json` |
97
+ | interaction-state rolefix full | `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/checkpoint_best.pt` | 1.1160989701747894 | 1.0928071364760399 | `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/metrics.json` |
98
+ | RLBench `open_drawer` smoke | `artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/checkpoint_best.pt` | 0.1628243774175644 | 0.05217807739973068 | `artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/summary.json` |
99
+
100
+ ## Raw Interaction-State Proxy Diagnostics
101
+
102
+ | Run | Planner top-1 accuracy | Planner regret | Risk calibration MSE | Role collapse rate | Samples | JSON |
103
+ | --- | ---: | ---: | ---: | ---: | ---: | --- |
104
+ | reveal-state smoke | 0.25 | 0.2715669870376587 | 0.0521860271692276 | 0.0 | 16 | `artifacts/outputs/interaction/proxy_reveal_state_smoke_diagnostics/proxy_diagnostics.json` |
105
+ | reveal-state full | 0.2698412698412698 | 0.3642531931400299 | 0.011263838969171047 | 0.0 | 126 | `artifacts/outputs/interaction/proxy_reveal_state_full_diagnostics/proxy_diagnostics.json` |
106
+ | interaction-state smoke | 0.375 | 0.4054882526397705 | 0.01106204278767109 | 1.0 | 16 | `artifacts/outputs/interaction/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json` |
107
+ | interaction-state full | 0.42857142857142855 | 0.1302730292081833 | 0.010318642482161522 | 1.0 | 126 | `artifacts/outputs/interaction/proxy_interaction_state/diagnostics/proxy_diagnostics.json` |
108
+ | interaction-state rolefix smoke | 0.5 | 0.28291308879852295 | 0.011351427994668484 | 0.0 | 16 | `artifacts/outputs/interaction_rolefix/proxy_interaction_state_smoke/diagnostics/proxy_diagnostics.json` |
109
+ | interaction-state rolefix full | 0.29365079365079366 | 0.08918909728527069 | 0.009334742091596127 | 0.0 | 126 | `artifacts/outputs/interaction_rolefix_full/proxy_interaction_state/diagnostics/proxy_diagnostics.json` |
110
+
111
+ ## Raw Interaction-State Proxy Benchmark Results
112
+
113
+ | Run | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost | JSON |
114
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | --- |
115
+ | interaction-state smoke | 0.0 | 0.0 | 0.0 | 0.0 | 62.47385279337565 | 0.9876543283462524 | 0.0 | 2.8290874414145946 | 0.9037613173325857 | `artifacts/outputs/interaction/reveal_eval_interaction_ep2/reveal_benchmark.json` |
116
+ | interaction-state full | 0.0 | 0.0 | 0.0 | 0.0 | 62.6574592590332 | 0.9876543283462524 | 0.0 | 2.1672919432322186 | 0.8980477948983511 | `artifacts/outputs/interaction/reveal_eval_interaction_full_ep2/reveal_benchmark.json` |
117
+ | interaction-state rolefix smoke | 0.0 | 0.0 | 0.0 | 0.0 | 62.513198375701904 | 0.9871399253606796 | 0.0 | 2.8390470795333385 | 0.9011474947134653 | `artifacts/outputs/interaction_rolefix/reveal_eval_interaction_smoke/reveal_benchmark.json` |
118
+ | interaction-state rolefix full | 0.0 | 0.0 | 0.0 | 0.0 | 64.04203844070435 | 0.9837105721235275 | 0.0 | 1.7914747282500483 | 0.8335009200705422 | `artifacts/outputs/interaction_rolefix_full/reveal_eval_interaction/reveal_benchmark.json` |
119
+
120
+ ## Raw Interaction-State Ablation Results
121
+
122
+ Source: `artifacts/outputs/interaction/reveal_ablation_interaction_ep1/ablations.json`
123
+
124
+ | Ablation | Mean success | foliage_proxy | bag_proxy | cloth_proxy | visibility_integral | corridor_availability | reocclusion_rate | persistence_horizon_mae | disturbance_cost |
125
+ | --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |
126
+ | full_model | 0.0 | 0.0 | 0.0 | 0.0 | 67.46570078531902 | 0.9835391044616699 | 0.0 | 2.9001280729969343 | 0.7520796457926432 |
127
+ | no_interaction_head | 0.0 | 0.0 | 0.0 | 0.0 | 63.56013615926107 | 0.9876543283462524 | 0.0 | null | 0.8973869283994039 |
128
+ | no_world_model | 0.0 | 0.0 | 0.0 | 0.0 | 63.46492385864258 | 0.9876543283462524 | 0.0 | 2.8303335145115853 | 0.900044322013855 |
129
+ | no_planner | 0.0 | 0.0 | 0.0 | 0.0 | 63.54209772745768 | 0.9876543283462524 | 0.0 | 2.8313964143395425 | 0.8962932030359904 |
130
+ | no_role_tokens | 0.0 | 0.0 | 0.0 | 0.0 | 67.39010111490886 | 0.9835391044616699 | 0.0 | 2.8990648205081624 | 0.7544853488604227 |
131
+ | short_history | 0.0 | 0.0 | 0.0 | 0.0 | 67.46627298990886 | 0.9835391044616699 | 0.0 | 2.900133213897546 | 0.7520630160967509 |
132
+
133
+ ## Raw RLBench Two-Robot Outputs
134
+
135
+ ### Env Smoke
136
+
137
+ | Output | Raw values | File |
138
+ | --- | --- | --- |
139
+ | import smoke | `resolved_upstream_root=/workspace/third_party/peract_bimanual/peract`, `imports.rlbench=ok`, `imports.pyrep=ok`, `imports.yarr=ok` | `artifacts/outputs/interaction/rlbench_env_smoke/import_smoke.json` |
140
+ | launch reset smoke | `description=\"hold the drawer with left hand and open the bottom drawer with right hand\"`, `front_rgb_shape=[128,128,3]`, `wrist_rgb_shape=[128,128,3]`, `wrist2_rgb_shape=[128,128,3]`, `right_pose_shape=[7]`, `left_pose_shape=[7]` | `artifacts/outputs/interaction/rlbench_env_smoke/launch_reset_smoke.json` |
141
+ | launch step smoke | `description=\"hold the drawer with left hand and open the bottom drawer with right hand\"`, `reward=0.0`, `done=false`, `front_rgb_shape_after_step=[128,128,3]` | `artifacts/outputs/interaction/rlbench_env_smoke/launch_step_smoke.json` |
142
+ | train command export | raw command text | `artifacts/outputs/interaction/rlbench_env_smoke/import_and_train_command.txt` |
143
+
144
+ ### Offline Smoke And Rollout Eval
145
+
146
+ | Output | Raw values | File |
147
+ | --- | --- | --- |
148
+ | offline RLBench smoke summary | `train_dataset.num_samples=1`, `val_dataset.num_samples=1`, `plan_enabled_for_eval=true`, `frozen_modules=[interaction_head, world_model, planner]` | `artifacts/outputs/interaction/rlbench_open_drawer_interaction_smoke/summary.json` |
149
+ | rollout eval | `plan_requested=true`, `plan_applied=true`, `tasks.open_drawer.error=\"A path could not be found because the target is outside of workspace.\"`, `mean_success=0.0` | `artifacts/outputs/interaction/rlbench_open_drawer_rollout_eval/rollout_eval.json` |
150
+
151
  ## RLBench Headless Smoke
152
 
153
  The headless X server files used on this node are:
 
166
  - `artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt`
167
  - `artifacts/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt`
168
  - `artifacts/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt`
169
+ - `artifacts/data/rlbench_smoke_open_drawer/`
170
 
171
  ## Recreate The Same Software Layout
172
 
 
174
 
175
  ```bash
176
  /workspace/VLAarchtests/environment/setup_same_machine.sh
177
+ /workspace/VLAarchtests/environment/validate_same_machine.sh
178
  ```
179
 
180
  That helper:
 
189
  Raw setup metadata is in:
190
 
191
  - `environment/upstream_revisions.txt`
192
+ - `environment/system_packages_same_machine.txt`
193
  - `environment/rlbench_env_export.yaml`
194
  - `environment/rlbench_env_explicit.txt`
195
  - `environment/rlbench_pip_freeze.txt`
code/reveal_vla_bimanual/sim_rlbench/launch_smoke.py CHANGED
@@ -5,84 +5,73 @@ import json
5
  import os
6
 
7
  import numpy as np
8
- from helpers.observation_utils import create_obs_config
9
- from rlbench.action_modes.action_mode import BimanualJointPositionActionMode
 
 
10
  from rlbench.backend.utils import task_file_to_task_class
11
- from rlbench.environment import Environment
12
 
13
- from sim_rlbench.obs_adapter import extract_canonical_bimanual_obs
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  def main() -> None:
17
  parser = argparse.ArgumentParser()
18
- parser.add_argument("--task", default="bimanual_lift_ball")
19
  parser.add_argument("--resolution", type=int, default=224)
20
  parser.add_argument("--display", default=None)
21
  parser.add_argument("--headless", action="store_true", default=True)
22
  parser.add_argument("--visible", action="store_true")
 
23
  args = parser.parse_args()
24
 
25
  headless = args.headless and not args.visible
26
- cameras = ["front", "wrist_left", "wrist_right"]
27
- obs_config = create_obs_config(
28
- cameras,
29
- [args.resolution, args.resolution],
30
- "BIMANUAL_PERACT",
31
- "bimanual",
32
  )
33
- env = Environment(
34
- action_mode=BimanualJointPositionActionMode(),
35
  obs_config=obs_config,
36
  headless=headless,
37
- robot_setup="dual_panda",
 
38
  )
39
 
40
  try:
41
  env.launch()
42
- task_cls = task_file_to_task_class(args.task, bimanual=True)
43
  task = env.get_task(task_cls)
44
  descriptions, obs = task.reset()
45
- canonical = extract_canonical_bimanual_obs(
46
- obs,
47
- descriptions[0],
48
- include_point_cloud=True,
49
- )
50
-
51
- right_joint_positions = np.asarray(obs.right.joint_positions, dtype=np.float32)
52
- left_joint_positions = np.asarray(obs.left.joint_positions, dtype=np.float32)
53
- right_open = float(obs.right.gripper_open)
54
- left_open = float(obs.left.gripper_open)
55
- action = np.concatenate(
56
- [
57
- right_joint_positions,
58
- np.array([right_open], dtype=np.float32),
59
- left_joint_positions,
60
- np.array([left_open], dtype=np.float32),
61
- ],
62
- axis=0,
63
- )
64
- next_obs, reward, done = task.step(action)
65
-
66
  payload = {
67
  "display": args.display or os.environ.get("DISPLAY"),
68
  "headless": headless,
69
- "task": task.get_name(),
70
- "description": descriptions[0],
71
- "rgb_shapes": {k: list(v.shape) for k, v in canonical.rgb.items()},
72
- "intrinsic_shapes": {
73
- k: list(v.shape) for k, v in canonical.camera_intrinsics.items()
74
- },
75
- "extrinsic_shapes": {
76
- k: list(v.shape) for k, v in canonical.camera_extrinsics.items()
77
- },
78
- "point_cloud_shapes": {
79
- k: list(v.shape) for k, v in canonical.point_cloud.items()
80
- },
81
- "proprio_shape": list(canonical.proprio.shape),
82
- "action_shape": list(action.shape),
83
  "reward": float(reward),
84
  "done": bool(done),
85
- "front_rgb_shape_after_step": list(next_obs.perception_data["front_rgb"].shape),
86
  }
87
  print(json.dumps(payload, indent=2))
88
  finally:
 
5
  import os
6
 
7
  import numpy as np
8
+ from helpers.utils import create_obs_config
9
+ from rlbench.action_modes.action_mode import MoveArmThenGripper2Robots
10
+ from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaPlanning2Robots
11
+ from rlbench.action_modes.gripper_action_modes import Discrete2Robots
12
  from rlbench.backend.utils import task_file_to_task_class
13
+ from rlbench.environments_two_robots import Environment2Robots
14
 
15
+ from sim_rlbench.camera_spec import default_three_camera_spec
16
+
17
+
18
+ def _noop_arm_action(obs: object, arm_name: str) -> np.ndarray:
19
+ if arm_name == "right":
20
+ pose = np.asarray(obs.gripper_right_pose, dtype=np.float32)
21
+ gripper_open = float(obs.gripper_right_open)
22
+ elif arm_name == "left":
23
+ pose = np.asarray(obs.gripper_left_pose, dtype=np.float32)
24
+ gripper_open = float(obs.gripper_left_open)
25
+ else: # pragma: no cover - smoke-only guard
26
+ raise ValueError(f"Unsupported arm: {arm_name}")
27
+ return np.concatenate([pose, np.array([gripper_open, 1.0], dtype=np.float32)], axis=0)
28
 
29
 
30
  def main() -> None:
31
  parser = argparse.ArgumentParser()
32
+ parser.add_argument("--task", default="open_drawer")
33
  parser.add_argument("--resolution", type=int, default=224)
34
  parser.add_argument("--display", default=None)
35
  parser.add_argument("--headless", action="store_true", default=True)
36
  parser.add_argument("--visible", action="store_true")
37
+ parser.add_argument("--arm", choices=("right", "left"), default="right")
38
  args = parser.parse_args()
39
 
40
  headless = args.headless and not args.visible
41
+ spec = default_three_camera_spec(args.resolution)
42
+ task_cls = task_file_to_task_class(args.task)
43
+ obs_config = create_obs_config(list(spec.upstream_cameras), [args.resolution, args.resolution], "PERACT_BC")
44
+ action_mode = MoveArmThenGripper2Robots(
45
+ EndEffectorPoseViaPlanning2Robots(absolute_mode=True, frame="world", collision_checking=False),
46
+ Discrete2Robots(),
47
  )
48
+ env = Environment2Robots(
49
+ action_mode=action_mode,
50
  obs_config=obs_config,
51
  headless=headless,
52
+ robot_setup="panda",
53
+ task_name=task_cls.__name__,
54
  )
55
 
56
  try:
57
  env.launch()
 
58
  task = env.get_task(task_cls)
59
  descriptions, obs = task.reset()
60
+ next_obs, reward, done = task.step(_noop_arm_action(obs, args.arm), args.arm)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  payload = {
62
  "display": args.display or os.environ.get("DISPLAY"),
63
  "headless": headless,
64
+ "task": task_cls.__name__,
65
+ "description": descriptions[0] if descriptions else "",
66
+ "front_rgb_shape": None if obs.front_rgb is None else list(obs.front_rgb.shape),
67
+ "wrist_rgb_shape": None if obs.wrist_rgb is None else list(obs.wrist_rgb.shape),
68
+ "wrist2_rgb_shape": None if obs.wrist2_rgb is None else list(obs.wrist2_rgb.shape),
69
+ "right_pose_shape": None if obs.gripper_right_pose is None else list(obs.gripper_right_pose.shape),
70
+ "left_pose_shape": None if obs.gripper_left_pose is None else list(obs.gripper_left_pose.shape),
71
+ "stepped_arm": args.arm,
 
 
 
 
 
 
72
  "reward": float(reward),
73
  "done": bool(done),
74
+ "front_rgb_shape_after_step": None if next_obs.front_rgb is None else list(next_obs.front_rgb.shape),
75
  }
76
  print(json.dumps(payload, indent=2))
77
  finally:
code/reveal_vla_bimanual/train/configs/proxy_interaction_state.yaml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction
3
+ device: cuda
4
+ seed: 13
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 96
9
+ train_episodes_per_proxy: 48
10
+ val_episodes_per_proxy: 16
11
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
12
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
13
+ rebuild_dataset: false
14
+ chunk_horizon: 8
15
+ rollout_horizon: 5
16
+ history_steps: 6
17
+ planner_candidates: 8
18
+ seed: 13
19
+
20
+ optim:
21
+ epochs: 8
22
+ batch_size: 16
23
+ num_workers: 0
24
+ lr: 0.001
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+
38
+ policy:
39
+ backbone:
40
+ model_name: openai/clip-vit-base-patch32
41
+ hidden_dim: 128
42
+ max_text_tokens: 32
43
+ freeze_backbone: true
44
+ gradient_checkpointing: false
45
+ use_dummy_backbone: true
46
+ fusion:
47
+ hidden_dim: 128
48
+ num_cameras: 3
49
+ num_layers: 2
50
+ num_heads: 4
51
+ ff_dim: 256
52
+ dropout: 0.1
53
+ proprio_dim: 32
54
+ proprio_tokens: 1
55
+ memory:
56
+ hidden_dim: 128
57
+ history_steps: 6
58
+ num_layers: 2
59
+ dropout: 0.1
60
+ memory_bank_size: 4
61
+ num_heads: 4
62
+ max_history_steps: 8
63
+ decoder:
64
+ hidden_dim: 128
65
+ num_heads: 4
66
+ num_layers: 2
67
+ ff_dim: 256
68
+ dropout: 0.1
69
+ chunk_size: 8
70
+ action_dim: 14
71
+ arm_action_dim: 7
72
+ num_candidates: 8
73
+ num_phases: 5
74
+ num_arm_roles: 4
75
+ reveal_head:
76
+ hidden_dim: 128
77
+ num_support_modes: 3
78
+ num_approach_templates: 32
79
+ rollout_horizon: 5
80
+ belief_map_size: 32
81
+ field_size: 16
82
+ num_heads: 4
83
+ predict_belief_map: true
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_interaction_tokens: 8
87
+ world_model:
88
+ hidden_dim: 128
89
+ action_dim: 14
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ field_size: 16
94
+ num_heads: 4
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ planner:
99
+ hidden_dim: 128
100
+ num_candidates: 8
101
+ action_dim: 14
102
+ num_support_modes: 3
103
+ utility_margin: 0.1
104
+ num_heads: 4
105
+ num_layers: 2
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+
109
+ loss_weights:
110
+ action: 1.0
111
+ support_mode: 0.15
112
+ corridor: 0.2
113
+ persistence: 0.1
114
+ disturbance: 0.1
115
+ world_model: 0.25
116
+ belief: 0.05
117
+ planner_success: 0.2
118
+ planner_risk: 0.1
119
+ planner_ranking: 0.1
120
+ proposal_reconstruction: 0.2
121
+ proposal_success: 0.1
122
+ proposal_ranking: 0.1
code/reveal_vla_bimanual/train/configs/proxy_interaction_state_smoke.yaml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_state_smoke
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction
3
+ device: cuda
4
+ seed: 13
5
+
6
+ data:
7
+ proxies: [foliage_proxy, bag_proxy, cloth_proxy]
8
+ resolution: 64
9
+ train_episodes_per_proxy: 6
10
+ val_episodes_per_proxy: 2
11
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_smoke_v4.pt
12
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_smoke_v4.pt
13
+ rebuild_dataset: false
14
+ chunk_horizon: 4
15
+ rollout_horizon: 3
16
+ history_steps: 6
17
+ planner_candidates: 4
18
+ seed: 13
19
+
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 8
23
+ num_workers: 0
24
+ lr: 0.001
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: trainable
37
+
38
+ policy:
39
+ backbone:
40
+ model_name: openai/clip-vit-base-patch32
41
+ hidden_dim: 64
42
+ max_text_tokens: 32
43
+ freeze_backbone: true
44
+ gradient_checkpointing: false
45
+ use_dummy_backbone: true
46
+ fusion:
47
+ hidden_dim: 64
48
+ num_cameras: 3
49
+ num_layers: 2
50
+ num_heads: 4
51
+ ff_dim: 128
52
+ dropout: 0.1
53
+ proprio_dim: 32
54
+ proprio_tokens: 1
55
+ memory:
56
+ hidden_dim: 64
57
+ history_steps: 6
58
+ num_layers: 2
59
+ dropout: 0.1
60
+ memory_bank_size: 4
61
+ num_heads: 4
62
+ max_history_steps: 8
63
+ decoder:
64
+ hidden_dim: 64
65
+ num_heads: 4
66
+ num_layers: 2
67
+ ff_dim: 128
68
+ dropout: 0.1
69
+ chunk_size: 4
70
+ action_dim: 14
71
+ arm_action_dim: 7
72
+ num_candidates: 4
73
+ num_phases: 5
74
+ num_arm_roles: 4
75
+ reveal_head:
76
+ hidden_dim: 64
77
+ num_support_modes: 3
78
+ num_approach_templates: 32
79
+ rollout_horizon: 3
80
+ belief_map_size: 32
81
+ field_size: 16
82
+ num_heads: 4
83
+ predict_belief_map: true
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_interaction_tokens: 8
87
+ world_model:
88
+ hidden_dim: 64
89
+ action_dim: 14
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 3
93
+ field_size: 16
94
+ num_heads: 4
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ planner:
99
+ hidden_dim: 64
100
+ num_candidates: 4
101
+ action_dim: 14
102
+ num_support_modes: 3
103
+ utility_margin: 0.1
104
+ num_heads: 4
105
+ num_layers: 2
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+
109
+ loss_weights:
110
+ action: 1.0
111
+ support_mode: 0.15
112
+ corridor: 0.2
113
+ persistence: 0.1
114
+ disturbance: 0.1
115
+ world_model: 0.25
116
+ belief: 0.05
117
+ planner_success: 0.2
118
+ planner_risk: 0.1
119
+ planner_ranking: 0.1
120
+ proposal_reconstruction: 0.2
121
+ proposal_success: 0.1
122
+ proposal_ranking: 0.1
code/reveal_vla_bimanual/train/configs/rlbench_open_drawer_interaction_smoke.yaml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_open_drawer_interaction_smoke
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction
3
+ device: cuda
4
+ seed: 13
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction/proxy_interaction_state_smoke/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ dataset_root: /workspace/VLAarchtests/artifacts/data/rlbench_smoke_open_drawer
10
+ tasks: [open_drawer]
11
+ train_episodes: [0]
12
+ val_episodes: [1]
13
+ resolution: 128
14
+ chunk_horizon: 4
15
+ proprio_dim: 32
16
+ history_steps: 6
17
+ max_train_samples: 8
18
+ max_val_samples: 4
19
+
20
+ optim:
21
+ epochs: 1
22
+ batch_size: 2
23
+ num_workers: 0
24
+ lr: 0.0005
25
+ weight_decay: 0.0001
26
+
27
+ trainer:
28
+ policy_type: interaction_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: false
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ planner_mode: proxy_pretrained
37
+
38
+ policy:
39
+ backbone:
40
+ model_name: openai/clip-vit-base-patch32
41
+ hidden_dim: 64
42
+ max_text_tokens: 32
43
+ freeze_backbone: true
44
+ gradient_checkpointing: false
45
+ use_dummy_backbone: true
46
+ fusion:
47
+ hidden_dim: 64
48
+ num_cameras: 3
49
+ num_layers: 2
50
+ num_heads: 4
51
+ ff_dim: 128
52
+ dropout: 0.1
53
+ proprio_dim: 32
54
+ proprio_tokens: 1
55
+ memory:
56
+ hidden_dim: 64
57
+ history_steps: 6
58
+ num_layers: 2
59
+ dropout: 0.1
60
+ memory_bank_size: 4
61
+ num_heads: 4
62
+ max_history_steps: 8
63
+ decoder:
64
+ hidden_dim: 64
65
+ num_heads: 4
66
+ num_layers: 2
67
+ ff_dim: 128
68
+ dropout: 0.1
69
+ chunk_size: 4
70
+ action_dim: 14
71
+ arm_action_dim: 7
72
+ num_candidates: 4
73
+ num_phases: 5
74
+ num_arm_roles: 4
75
+ reveal_head:
76
+ hidden_dim: 64
77
+ num_support_modes: 3
78
+ num_approach_templates: 32
79
+ rollout_horizon: 3
80
+ belief_map_size: 32
81
+ field_size: 16
82
+ num_heads: 4
83
+ predict_belief_map: true
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_interaction_tokens: 8
87
+ world_model:
88
+ hidden_dim: 64
89
+ action_dim: 14
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 3
93
+ field_size: 16
94
+ num_heads: 4
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ belief_map_size: 32
99
+ predict_belief_map: true
100
+ planner:
101
+ hidden_dim: 64
102
+ num_candidates: 4
103
+ action_dim: 14
104
+ num_support_modes: 3
105
+ utility_margin: 0.1
106
+ corridor_weight: 1.0
107
+ persistence_weight: 0.5
108
+ proposal_weight: 0.5
109
+ task_progress_weight: 0.75
110
+ disturbance_weight: 0.75
111
+ reocclusion_weight: 0.5
112
+ visibility_weight: 0.25
113
+ num_heads: 4
114
+ num_layers: 2
115
+ num_phases: 5
116
+ num_arm_roles: 4
117
+
118
+ loss_weights:
119
+ action: 1.0
120
+ support_mode: 0.0
121
+ corridor: 0.0
122
+ persistence: 0.0
123
+ disturbance: 0.0
124
+ world_model: 0.0
125
+ belief: 0.0
126
+ planner_success: 0.0
127
+ planner_risk: 0.0
128
+ planner_ranking: 0.0
code/reveal_vla_bimanual/train/configs/rlbench_peract2_13_interaction_state_dummy.yaml ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_peract2_13_interaction_state_dummy
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction
3
+ device: cuda
4
+ seed: 13
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction/proxy_interaction_state/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ dataset_root: /workspace/data/rlbench2
10
+ tasks:
11
+ [
12
+ bimanual_push_box,
13
+ bimanual_lift_ball,
14
+ bimanual_dual_push_buttons,
15
+ bimanual_pick_plate,
16
+ bimanual_put_item_in_drawer,
17
+ bimanual_put_bottle_in_fridge,
18
+ bimanual_handover_item,
19
+ bimanual_pick_laptop,
20
+ bimanual_straighten_rope,
21
+ bimanual_sweep_to_dustpan,
22
+ bimanual_lift_tray,
23
+ bimanual_handover_item_easy,
24
+ bimanual_take_tray_out_of_oven,
25
+ ]
26
+ train_episodes: [0]
27
+ val_episodes: [1]
28
+ resolution: 224
29
+ chunk_horizon: 8
30
+ proprio_dim: 32
31
+ history_steps: 6
32
+
33
+ optim:
34
+ epochs: 2
35
+ batch_size: 4
36
+ num_workers: 0
37
+ lr: 0.0005
38
+ weight_decay: 0.0001
39
+
40
+ trainer:
41
+ policy_type: interaction_state
42
+ use_bf16: true
43
+ grad_clip_norm: 1.0
44
+ freeze_backbone: true
45
+ gradient_checkpointing: false
46
+ plan_during_train: false
47
+ plan_during_eval: true
48
+ support_mode_conditioning: true
49
+ planner_mode: proxy_pretrained
50
+
51
+ policy:
52
+ backbone:
53
+ model_name: openai/clip-vit-base-patch32
54
+ hidden_dim: 128
55
+ max_text_tokens: 32
56
+ freeze_backbone: true
57
+ gradient_checkpointing: false
58
+ use_dummy_backbone: true
59
+ fusion:
60
+ hidden_dim: 128
61
+ num_cameras: 3
62
+ num_layers: 2
63
+ num_heads: 4
64
+ ff_dim: 256
65
+ dropout: 0.1
66
+ proprio_dim: 32
67
+ proprio_tokens: 1
68
+ memory:
69
+ hidden_dim: 128
70
+ history_steps: 6
71
+ num_layers: 2
72
+ dropout: 0.1
73
+ memory_bank_size: 4
74
+ num_heads: 4
75
+ max_history_steps: 8
76
+ decoder:
77
+ hidden_dim: 128
78
+ num_heads: 4
79
+ num_layers: 2
80
+ ff_dim: 256
81
+ dropout: 0.1
82
+ chunk_size: 8
83
+ action_dim: 14
84
+ arm_action_dim: 7
85
+ num_candidates: 8
86
+ num_phases: 5
87
+ num_arm_roles: 4
88
+ reveal_head:
89
+ hidden_dim: 128
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ belief_map_size: 32
94
+ field_size: 16
95
+ num_heads: 4
96
+ predict_belief_map: true
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ world_model:
101
+ hidden_dim: 128
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ num_approach_templates: 32
105
+ rollout_horizon: 5
106
+ field_size: 16
107
+ num_heads: 4
108
+ num_phases: 5
109
+ num_arm_roles: 4
110
+ num_interaction_tokens: 8
111
+ planner:
112
+ hidden_dim: 128
113
+ num_candidates: 8
114
+ action_dim: 14
115
+ num_support_modes: 3
116
+ utility_margin: 0.1
117
+ num_heads: 4
118
+ num_layers: 2
119
+ num_phases: 5
120
+ num_arm_roles: 4
121
+
122
+ loss_weights:
123
+ action: 1.0
124
+ support_mode: 0.0
125
+ corridor: 0.0
126
+ persistence: 0.0
127
+ disturbance: 0.0
128
+ world_model: 0.0
129
+ belief: 0.0
130
+ planner_success: 0.0
131
+ planner_risk: 0.0
132
+ planner_ranking: 0.0
code/reveal_vla_bimanual/train/configs/rlbench_subset3_interaction_state_dummy.yaml ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_subset3_interaction_state_dummy
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/interaction
3
+ device: cuda
4
+ seed: 13
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/interaction/proxy_interaction_state_smoke/checkpoint_best.pt
6
+ init_strict: false
7
+
8
+ data:
9
+ dataset_root: /workspace/data/rlbench2
10
+ tasks: [bimanual_lift_ball, bimanual_push_box, bimanual_dual_push_buttons]
11
+ train_episodes: [0]
12
+ val_episodes: [1]
13
+ resolution: 224
14
+ chunk_horizon: 8
15
+ proprio_dim: 32
16
+ history_steps: 6
17
+
18
+ optim:
19
+ epochs: 2
20
+ batch_size: 4
21
+ num_workers: 0
22
+ lr: 0.0005
23
+ weight_decay: 0.0001
24
+
25
+ trainer:
26
+ policy_type: interaction_state
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ plan_during_train: false
32
+ plan_during_eval: true
33
+ support_mode_conditioning: true
34
+ planner_mode: proxy_pretrained
35
+
36
+ policy:
37
+ backbone:
38
+ model_name: openai/clip-vit-base-patch32
39
+ hidden_dim: 128
40
+ max_text_tokens: 32
41
+ freeze_backbone: true
42
+ gradient_checkpointing: false
43
+ use_dummy_backbone: true
44
+ fusion:
45
+ hidden_dim: 128
46
+ num_cameras: 3
47
+ num_layers: 2
48
+ num_heads: 4
49
+ ff_dim: 256
50
+ dropout: 0.1
51
+ proprio_dim: 32
52
+ proprio_tokens: 1
53
+ memory:
54
+ hidden_dim: 128
55
+ history_steps: 6
56
+ num_layers: 2
57
+ dropout: 0.1
58
+ memory_bank_size: 4
59
+ num_heads: 4
60
+ max_history_steps: 8
61
+ decoder:
62
+ hidden_dim: 128
63
+ num_heads: 4
64
+ num_layers: 2
65
+ ff_dim: 256
66
+ dropout: 0.1
67
+ chunk_size: 8
68
+ action_dim: 14
69
+ arm_action_dim: 7
70
+ num_candidates: 8
71
+ num_phases: 5
72
+ num_arm_roles: 4
73
+ reveal_head:
74
+ hidden_dim: 128
75
+ num_support_modes: 3
76
+ num_approach_templates: 32
77
+ rollout_horizon: 5
78
+ belief_map_size: 32
79
+ field_size: 16
80
+ num_heads: 4
81
+ predict_belief_map: true
82
+ num_phases: 5
83
+ num_arm_roles: 4
84
+ num_interaction_tokens: 8
85
+ world_model:
86
+ hidden_dim: 128
87
+ action_dim: 14
88
+ num_support_modes: 3
89
+ num_approach_templates: 32
90
+ rollout_horizon: 5
91
+ field_size: 16
92
+ num_heads: 4
93
+ num_phases: 5
94
+ num_arm_roles: 4
95
+ num_interaction_tokens: 8
96
+ planner:
97
+ hidden_dim: 128
98
+ num_candidates: 8
99
+ action_dim: 14
100
+ num_support_modes: 3
101
+ utility_margin: 0.1
102
+ num_heads: 4
103
+ num_layers: 2
104
+ num_phases: 5
105
+ num_arm_roles: 4
106
+
107
+ loss_weights:
108
+ action: 1.0
109
+ support_mode: 0.0
110
+ corridor: 0.0
111
+ persistence: 0.0
112
+ disturbance: 0.0
113
+ world_model: 0.0
114
+ belief: 0.0
115
+ planner_success: 0.0
116
+ planner_risk: 0.0
117
+ planner_ranking: 0.0
code/reveal_vla_bimanual/train/losses.py CHANGED
@@ -10,6 +10,8 @@ from torch import Tensor
10
  @dataclass
11
  class LossWeights:
12
  action: float = 1.0
 
 
13
  support_mode: float = 0.1
14
  corridor: float = 0.1
15
  persistence: float = 0.05
@@ -19,6 +21,9 @@ class LossWeights:
19
  planner_success: float = 0.1
20
  planner_risk: float = 0.05
21
  planner_ranking: float = 0.05
 
 
 
22
 
23
 
24
  def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | None = None) -> Tensor:
@@ -31,6 +36,26 @@ def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | N
31
 
32
  def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
33
  losses = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  support_target = target["support_mode"].long()
35
  losses["support_mode"] = F.cross_entropy(pred["support_mode_logits"], support_target)
36
  losses["corridor"] = F.binary_cross_entropy_with_logits(
@@ -107,11 +132,17 @@ def compute_total_loss(
107
  }
108
  total = weights.action * losses["action"]
109
 
110
- if "reveal_state" in model_output and "support_mode" in batch:
111
- reveal_losses = reveal_state_loss(model_output["reveal_state"], batch, weights)
 
 
 
 
112
  losses.update(reveal_losses)
113
  total = (
114
  total
 
 
115
  + weights.support_mode * reveal_losses["support_mode"]
116
  + weights.corridor * reveal_losses["corridor"]
117
  + weights.persistence * reveal_losses["persistence"]
@@ -121,7 +152,7 @@ def compute_total_loss(
121
  + 0.01 * reveal_losses["uncertainty"]
122
  )
123
 
124
- if "planned_rollout" in model_output and (
125
  "candidate_rollout_support_mode" in batch or "rollout_support_mode" in batch
126
  ):
127
  if "candidate_rollout_support_mode" in batch:
@@ -182,5 +213,62 @@ def compute_total_loss(
182
  losses["planner_risk"] = model_output["action_mean"].new_tensor(0.0)
183
  losses["planner_ranking"] = model_output["action_mean"].new_tensor(0.0)
184
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  losses["total"] = total
186
  return losses
 
10
  @dataclass
11
  class LossWeights:
12
  action: float = 1.0
13
+ phase: float = 0.05
14
+ arm_role: float = 0.2
15
  support_mode: float = 0.1
16
  corridor: float = 0.1
17
  persistence: float = 0.05
 
21
  planner_success: float = 0.1
22
  planner_risk: float = 0.05
23
  planner_ranking: float = 0.05
24
+ proposal_reconstruction: float = 0.1
25
+ proposal_success: float = 0.05
26
+ proposal_ranking: float = 0.05
27
 
28
 
29
  def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | None = None) -> Tensor:
 
36
 
37
  def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
38
  losses = {}
39
+ if "phase_logits" in pred:
40
+ phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
41
+ phase_target = phase_map[target["support_mode"].long()]
42
+ losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
43
+ else:
44
+ losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
45
+ if "arm_role_logits" in pred:
46
+ batch_size = pred["arm_role_logits"].shape[0]
47
+ role_target = torch.as_tensor([1, 2], device=pred["arm_role_logits"].device, dtype=torch.long)
48
+ role_target = role_target.unsqueeze(0).expand(batch_size, -1)
49
+ role_ce = F.cross_entropy(
50
+ pred["arm_role_logits"].reshape(-1, pred["arm_role_logits"].shape[-1]),
51
+ role_target.reshape(-1),
52
+ )
53
+ role_probs = pred["arm_role_logits"].softmax(dim=-1)
54
+ role_gap = torch.mean(torch.abs(role_probs[:, 0] - role_probs[:, 1]), dim=-1)
55
+ role_separation = torch.relu(0.25 - role_gap).mean()
56
+ losses["arm_role"] = role_ce + 0.5 * role_separation
57
+ else:
58
+ losses["arm_role"] = pred["support_mode_logits"].new_tensor(0.0)
59
  support_target = target["support_mode"].long()
60
  losses["support_mode"] = F.cross_entropy(pred["support_mode_logits"], support_target)
61
  losses["corridor"] = F.binary_cross_entropy_with_logits(
 
132
  }
133
  total = weights.action * losses["action"]
134
 
135
+ state_output = model_output.get("interaction_state")
136
+ if state_output is None:
137
+ state_output = model_output.get("reveal_state")
138
+
139
+ if state_output is not None and "support_mode" in batch:
140
+ reveal_losses = reveal_state_loss(state_output, batch, weights)
141
  losses.update(reveal_losses)
142
  total = (
143
  total
144
+ + weights.phase * reveal_losses["phase"]
145
+ + weights.arm_role * reveal_losses["arm_role"]
146
  + weights.support_mode * reveal_losses["support_mode"]
147
  + weights.corridor * reveal_losses["corridor"]
148
  + weights.persistence * reveal_losses["persistence"]
 
152
  + 0.01 * reveal_losses["uncertainty"]
153
  )
154
 
155
+ if model_output.get("planned_rollout") and (
156
  "candidate_rollout_support_mode" in batch or "rollout_support_mode" in batch
157
  ):
158
  if "candidate_rollout_support_mode" in batch:
 
213
  losses["planner_risk"] = model_output["action_mean"].new_tensor(0.0)
214
  losses["planner_ranking"] = model_output["action_mean"].new_tensor(0.0)
215
 
216
+ if "proposal_candidates" in model_output and "candidate_action_chunks" in batch:
217
+ candidate_count = min(
218
+ model_output["proposal_candidates"].shape[1],
219
+ batch["candidate_action_chunks"].shape[1],
220
+ )
221
+ proposal_reconstruction = chunk_bc_loss(
222
+ model_output["proposal_candidates"][:, :candidate_count],
223
+ batch["candidate_action_chunks"][:, :candidate_count],
224
+ )
225
+ losses["proposal_reconstruction"] = proposal_reconstruction
226
+ total = total + weights.proposal_reconstruction * proposal_reconstruction
227
+ else:
228
+ losses["proposal_reconstruction"] = model_output["action_mean"].new_tensor(0.0)
229
+
230
+ if "proposal_logits" in model_output and "candidate_retrieval_success" in batch:
231
+ candidate_count = min(
232
+ model_output["proposal_logits"].shape[1],
233
+ batch["candidate_retrieval_success"].shape[1],
234
+ )
235
+ proposal_logits = model_output["proposal_logits"][:, :candidate_count]
236
+ proposal_success_target = batch["candidate_retrieval_success"][:, :candidate_count].float()
237
+ proposal_utility = batch.get("candidate_utility")
238
+ if proposal_utility is None:
239
+ proposal_risk = batch.get("candidate_risk")
240
+ if proposal_risk is None:
241
+ proposal_risk = torch.clamp(
242
+ batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
243
+ 0.0,
244
+ 1.0,
245
+ )
246
+ proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count]
247
+ else:
248
+ proposal_utility = proposal_utility[:, :candidate_count]
249
+ proposal_success_loss = F.binary_cross_entropy_with_logits(
250
+ proposal_logits,
251
+ proposal_success_target,
252
+ )
253
+ proposal_pred_diff = proposal_logits.unsqueeze(-1) - proposal_logits.unsqueeze(-2)
254
+ proposal_target_diff = proposal_utility.float().unsqueeze(-1) - proposal_utility.float().unsqueeze(-2)
255
+ proposal_mask = proposal_target_diff.abs() > 1e-4
256
+ if proposal_mask.any():
257
+ proposal_ranking_loss = torch.relu(0.1 - torch.sign(proposal_target_diff) * proposal_pred_diff)[
258
+ proposal_mask
259
+ ].mean()
260
+ else:
261
+ proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
262
+ losses["proposal_success"] = proposal_success_loss
263
+ losses["proposal_ranking"] = proposal_ranking_loss
264
+ total = (
265
+ total
266
+ + weights.proposal_success * proposal_success_loss
267
+ + weights.proposal_ranking * proposal_ranking_loss
268
+ )
269
+ else:
270
+ losses["proposal_success"] = model_output["action_mean"].new_tensor(0.0)
271
+ losses["proposal_ranking"] = model_output["action_mean"].new_tensor(0.0)
272
+
273
  losses["total"] = total
274
  return losses
code/reveal_vla_bimanual/train/run_experiment.py CHANGED
@@ -21,7 +21,14 @@ from models.reveal_head import RevealHeadConfig
21
  from models.world_model import RevealWMConfig
22
  from sim_reveal.dataset import RevealOfflineDataset, collect_teacher_dataset, load_teacher_dataset, save_teacher_dataset
23
  from train.losses import LossWeights, compute_total_loss
24
- from train.trainer import BimanualTrainer, TrainerConfig, build_policy
 
 
 
 
 
 
 
25
 
26
 
27
  def _move_batch_to_device(batch: dict[str, Any], device: torch.device) -> dict[str, Any]:
@@ -131,6 +138,7 @@ def main() -> None:
131
  trainer_config = _trainer_config_from_omega(cfg.trainer)
132
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
133
  model = build_policy(policy_config, trainer_config).to(device)
 
134
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
135
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
136
  trainer = BimanualTrainer(model=model, optimizer=optimizer, config=trainer_config)
@@ -159,8 +167,8 @@ def main() -> None:
159
  "history_images": moved.get("history_images"),
160
  "history_proprio": moved.get("history_proprio"),
161
  }
162
- if trainer_config.policy_type == "reveal_state":
163
- forward_kwargs["plan"] = trainer_config.plan_during_eval
164
  forward_kwargs["support_mode_conditioning"] = trainer_config.support_mode_conditioning
165
  if "candidate_action_chunks" in moved:
166
  forward_kwargs["candidate_chunks_override"] = moved["candidate_action_chunks"]
@@ -202,6 +210,8 @@ def main() -> None:
202
  "final_val_total": history[-1]["val"]["total"],
203
  "num_train_samples": len(train_bundle["samples"]),
204
  "num_val_samples": len(val_bundle["samples"]),
 
 
205
  }
206
  print(json.dumps(summary, indent=2))
207
 
 
21
  from models.world_model import RevealWMConfig
22
  from sim_reveal.dataset import RevealOfflineDataset, collect_teacher_dataset, load_teacher_dataset, save_teacher_dataset
23
  from train.losses import LossWeights, compute_total_loss
24
+ from train.trainer import (
25
+ BimanualTrainer,
26
+ TrainerConfig,
27
+ apply_planner_mode,
28
+ build_policy,
29
+ planner_enabled,
30
+ policy_supports_planning,
31
+ )
32
 
33
 
34
  def _move_batch_to_device(batch: dict[str, Any], device: torch.device) -> dict[str, Any]:
 
138
  trainer_config = _trainer_config_from_omega(cfg.trainer)
139
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
140
  model = build_policy(policy_config, trainer_config).to(device)
141
+ frozen_modules = apply_planner_mode(model, trainer_config)
142
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
143
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
144
  trainer = BimanualTrainer(model=model, optimizer=optimizer, config=trainer_config)
 
167
  "history_images": moved.get("history_images"),
168
  "history_proprio": moved.get("history_proprio"),
169
  }
170
+ if policy_supports_planning(trainer_config.policy_type):
171
+ forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
172
  forward_kwargs["support_mode_conditioning"] = trainer_config.support_mode_conditioning
173
  if "candidate_action_chunks" in moved:
174
  forward_kwargs["candidate_chunks_override"] = moved["candidate_action_chunks"]
 
210
  "final_val_total": history[-1]["val"]["total"],
211
  "num_train_samples": len(train_bundle["samples"]),
212
  "num_val_samples": len(val_bundle["samples"]),
213
+ "planner_mode": trainer_config.planner_mode,
214
+ "frozen_modules": frozen_modules,
215
  }
216
  print(json.dumps(summary, indent=2))
217
 
code/reveal_vla_bimanual/train/run_rlbench_experiment.py CHANGED
@@ -19,7 +19,13 @@ from train.run_experiment import (
19
  _policy_config_from_omega,
20
  _trainer_config_from_omega,
21
  )
22
- from train.trainer import BimanualTrainer, build_policy
 
 
 
 
 
 
23
 
24
 
25
  def _make_loader(dataset: RLBenchOfflineChunkDataset, batch_size: int, shuffle: bool, num_workers: int) -> DataLoader:
@@ -95,16 +101,10 @@ def main() -> None:
95
 
96
  policy_config = _policy_config_from_omega(cfg.policy)
97
  trainer_config = _trainer_config_from_omega(cfg.trainer)
98
- planning_disabled_for_rlbench = False
99
- if trainer_config.policy_type == "reveal_state" and (
100
- trainer_config.plan_during_train or trainer_config.plan_during_eval
101
- ):
102
- trainer_config.plan_during_train = False
103
- trainer_config.plan_during_eval = False
104
- planning_disabled_for_rlbench = True
105
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
106
  model = build_policy(policy_config, trainer_config).to(device)
107
  init_info = _load_init_checkpoint(model, cfg.get("init_checkpoint"), bool(cfg.get("init_strict", False)))
 
108
 
109
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
110
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
@@ -134,8 +134,8 @@ def main() -> None:
134
  "history_images": moved.get("history_images"),
135
  "history_proprio": moved.get("history_proprio"),
136
  }
137
- if trainer_config.policy_type == "reveal_state":
138
- forward_kwargs["plan"] = trainer_config.plan_during_eval
139
  forward_kwargs["support_mode_conditioning"] = trainer_config.support_mode_conditioning
140
  output = model(**forward_kwargs)
141
  from train.losses import compute_total_loss
@@ -178,7 +178,10 @@ def main() -> None:
178
  "train_dataset": train_dataset.summary(),
179
  "val_dataset": val_dataset.summary(),
180
  "init_info": init_info,
181
- "planning_disabled_for_rlbench": planning_disabled_for_rlbench,
 
 
 
182
  }
183
  (output_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
184
  print(json.dumps(summary, indent=2))
 
19
  _policy_config_from_omega,
20
  _trainer_config_from_omega,
21
  )
22
+ from train.trainer import (
23
+ BimanualTrainer,
24
+ apply_planner_mode,
25
+ build_policy,
26
+ planner_enabled,
27
+ policy_supports_planning,
28
+ )
29
 
30
 
31
  def _make_loader(dataset: RLBenchOfflineChunkDataset, batch_size: int, shuffle: bool, num_workers: int) -> DataLoader:
 
101
 
102
  policy_config = _policy_config_from_omega(cfg.policy)
103
  trainer_config = _trainer_config_from_omega(cfg.trainer)
 
 
 
 
 
 
 
104
  loss_weights = _loss_weights_from_omega(cfg.loss_weights)
105
  model = build_policy(policy_config, trainer_config).to(device)
106
  init_info = _load_init_checkpoint(model, cfg.get("init_checkpoint"), bool(cfg.get("init_strict", False)))
107
+ frozen_modules = apply_planner_mode(model, trainer_config)
108
 
109
  trainable_parameters = [parameter for parameter in model.parameters() if parameter.requires_grad]
110
  optimizer = torch.optim.AdamW(trainable_parameters, lr=float(cfg.optim.lr), weight_decay=float(cfg.optim.weight_decay))
 
134
  "history_images": moved.get("history_images"),
135
  "history_proprio": moved.get("history_proprio"),
136
  }
137
+ if policy_supports_planning(trainer_config.policy_type):
138
+ forward_kwargs["plan"] = planner_enabled(trainer_config, during_eval=True)
139
  forward_kwargs["support_mode_conditioning"] = trainer_config.support_mode_conditioning
140
  output = model(**forward_kwargs)
141
  from train.losses import compute_total_loss
 
178
  "train_dataset": train_dataset.summary(),
179
  "val_dataset": val_dataset.summary(),
180
  "init_info": init_info,
181
+ "planner_mode": trainer_config.planner_mode,
182
+ "plan_enabled_for_train": planner_enabled(trainer_config, during_eval=False),
183
+ "plan_enabled_for_eval": planner_enabled(trainer_config, during_eval=True),
184
+ "frozen_modules": frozen_modules,
185
  }
186
  (output_dir / "summary.json").write_text(json.dumps(summary, indent=2), encoding="utf-8")
187
  print(json.dumps(summary, indent=2))
code/reveal_vla_bimanual/train/smoke_checks.py ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import torch
10
+ from torch.utils.data import DataLoader
11
+
12
+ from models.action_decoder import ChunkDecoderConfig
13
+ from models.backbones import FrozenVLBackboneConfig
14
+ from models.multiview_fusion import MultiViewFusionConfig
15
+ from models.observation_memory import ObservationMemoryConfig
16
+ from models.planner import PlannerConfig
17
+ from models.policy import PolicyConfig
18
+ from models.reveal_head import RevealHeadConfig
19
+ from models.world_model import RevealWMConfig
20
+ from sim_reveal.dataset import dataset_from_bundle, load_teacher_dataset
21
+ from train.losses import LossWeights, compute_total_loss
22
+ from train.trainer import TrainerConfig, apply_planner_mode, build_policy, planner_enabled
23
+
24
+
25
+ def _move_batch_to_device(batch: dict[str, Any], device: torch.device) -> dict[str, Any]:
26
+ moved = {}
27
+ for key, value in batch.items():
28
+ if isinstance(value, torch.Tensor):
29
+ moved[key] = value.to(device)
30
+ else:
31
+ moved[key] = value
32
+ return moved
33
+
34
+
35
+ def _small_policy_config() -> PolicyConfig:
36
+ hidden_dim = 64
37
+ return PolicyConfig(
38
+ backbone=FrozenVLBackboneConfig(
39
+ model_name="openai/clip-vit-base-patch32",
40
+ hidden_dim=hidden_dim,
41
+ max_text_tokens=32,
42
+ freeze_backbone=True,
43
+ gradient_checkpointing=False,
44
+ use_dummy_backbone=True,
45
+ ),
46
+ fusion=MultiViewFusionConfig(
47
+ hidden_dim=hidden_dim,
48
+ num_cameras=3,
49
+ num_layers=2,
50
+ num_heads=4,
51
+ ff_dim=128,
52
+ dropout=0.1,
53
+ proprio_dim=32,
54
+ proprio_tokens=1,
55
+ ),
56
+ memory=ObservationMemoryConfig(
57
+ hidden_dim=hidden_dim,
58
+ history_steps=6,
59
+ num_layers=2,
60
+ dropout=0.1,
61
+ memory_bank_size=4,
62
+ num_heads=4,
63
+ max_history_steps=8,
64
+ ),
65
+ decoder=ChunkDecoderConfig(
66
+ hidden_dim=hidden_dim,
67
+ num_heads=4,
68
+ num_layers=2,
69
+ ff_dim=128,
70
+ dropout=0.1,
71
+ chunk_size=4,
72
+ action_dim=14,
73
+ arm_action_dim=7,
74
+ num_candidates=4,
75
+ num_phases=5,
76
+ num_arm_roles=4,
77
+ ),
78
+ reveal_head=RevealHeadConfig(
79
+ hidden_dim=hidden_dim,
80
+ num_support_modes=3,
81
+ num_approach_templates=32,
82
+ rollout_horizon=3,
83
+ belief_map_size=32,
84
+ field_size=16,
85
+ num_heads=4,
86
+ predict_belief_map=True,
87
+ num_phases=5,
88
+ num_arm_roles=4,
89
+ num_interaction_tokens=8,
90
+ ),
91
+ world_model=RevealWMConfig(
92
+ hidden_dim=hidden_dim,
93
+ action_dim=14,
94
+ num_support_modes=3,
95
+ num_approach_templates=32,
96
+ rollout_horizon=3,
97
+ field_size=16,
98
+ num_heads=4,
99
+ num_phases=5,
100
+ num_arm_roles=4,
101
+ num_interaction_tokens=8,
102
+ ),
103
+ planner=PlannerConfig(
104
+ hidden_dim=hidden_dim,
105
+ num_candidates=4,
106
+ action_dim=14,
107
+ num_support_modes=3,
108
+ utility_margin=0.1,
109
+ num_heads=4,
110
+ num_layers=2,
111
+ num_phases=5,
112
+ num_arm_roles=4,
113
+ ),
114
+ )
115
+
116
+
117
+ def _trainer_config(planner_mode: str = "trainable") -> TrainerConfig:
118
+ return TrainerConfig(
119
+ policy_type="interaction_state",
120
+ use_bf16=False,
121
+ grad_clip_norm=1.0,
122
+ freeze_backbone=True,
123
+ gradient_checkpointing=False,
124
+ plan_during_train=True,
125
+ plan_during_eval=True,
126
+ support_mode_conditioning=True,
127
+ planner_mode=planner_mode,
128
+ )
129
+
130
+
131
+ def _synthetic_rlbench_batch(
132
+ batch_size: int,
133
+ chunk_size: int,
134
+ history_steps: int,
135
+ resolution: int,
136
+ device: torch.device,
137
+ ) -> dict[str, Any]:
138
+ images = torch.rand(batch_size, 3, 3, resolution, resolution, device=device)
139
+ history_images = torch.rand(batch_size, history_steps, 3, 3, resolution, resolution, device=device)
140
+ proprio = torch.rand(batch_size, 32, device=device)
141
+ history_proprio = torch.rand(batch_size, history_steps, 32, device=device)
142
+ action_chunk = torch.rand(batch_size, chunk_size, 14, device=device)
143
+ return {
144
+ "images": images,
145
+ "history_images": history_images,
146
+ "history_proprio": history_proprio,
147
+ "proprio": proprio,
148
+ "texts": ["synthetic dual-arm RLBench smoke task"] * batch_size,
149
+ "action_chunk": action_chunk,
150
+ }
151
+
152
+
153
+ def _check_output_shapes(outputs: dict[str, Any], batch_size: int, num_candidates: int, chunk_size: int) -> None:
154
+ required = (
155
+ "action_mean",
156
+ "candidate_chunks",
157
+ "planned_chunk",
158
+ "interaction_state",
159
+ "reveal_state",
160
+ "planned_rollout",
161
+ "planner_success_logits",
162
+ "planner_risk_values",
163
+ "planner_scores",
164
+ "best_candidate_indices",
165
+ )
166
+ missing = [key for key in required if key not in outputs]
167
+ if missing:
168
+ raise AssertionError(f"Missing keys from interaction policy output: {missing}")
169
+ if outputs["candidate_chunks"].shape != (batch_size, num_candidates, chunk_size, 14):
170
+ raise AssertionError(f"Unexpected candidate chunk shape: {tuple(outputs['candidate_chunks'].shape)}")
171
+ if outputs["planned_chunk"].shape != (batch_size, chunk_size, 14):
172
+ raise AssertionError(f"Unexpected planned chunk shape: {tuple(outputs['planned_chunk'].shape)}")
173
+ if outputs["planner_scores"].shape != (batch_size, num_candidates):
174
+ raise AssertionError(f"Unexpected planner score shape: {tuple(outputs['planner_scores'].shape)}")
175
+ rollout = outputs["planned_rollout"]
176
+ if rollout["phase_logits"].shape[:3] != (batch_size, num_candidates, chunk_size):
177
+ raise AssertionError(f"Unexpected rollout phase shape: {tuple(rollout['phase_logits'].shape)}")
178
+
179
+
180
+ def main() -> None:
181
+ parser = argparse.ArgumentParser()
182
+ parser.add_argument("--proxy-dataset", required=True)
183
+ parser.add_argument("--output-dir", required=True)
184
+ parser.add_argument("--batch-size", type=int, default=2)
185
+ args = parser.parse_args()
186
+
187
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
188
+ policy_config = _small_policy_config()
189
+ loss_weights = LossWeights()
190
+
191
+ proxy_bundle = load_teacher_dataset(args.proxy_dataset)
192
+ proxy_dataset = dataset_from_bundle(proxy_bundle, resolution=int(proxy_bundle["resolution"]))
193
+ proxy_loader = DataLoader(proxy_dataset, batch_size=args.batch_size, shuffle=False, num_workers=0)
194
+ proxy_batch = _move_batch_to_device(next(iter(proxy_loader)), device)
195
+
196
+ proxy_trainer_config = _trainer_config(planner_mode="trainable")
197
+ proxy_model = build_policy(policy_config, proxy_trainer_config).to(device)
198
+ proxy_optimizer = torch.optim.AdamW(
199
+ [parameter for parameter in proxy_model.parameters() if parameter.requires_grad],
200
+ lr=1e-3,
201
+ )
202
+ proxy_model.train()
203
+ proxy_optimizer.zero_grad(set_to_none=True)
204
+ proxy_outputs = proxy_model(
205
+ images=proxy_batch["images"],
206
+ proprio=proxy_batch["proprio"],
207
+ texts=proxy_batch["texts"],
208
+ history_images=proxy_batch.get("history_images"),
209
+ history_proprio=proxy_batch.get("history_proprio"),
210
+ plan=True,
211
+ candidate_chunks_override=proxy_batch["candidate_action_chunks"],
212
+ )
213
+ _check_output_shapes(
214
+ proxy_outputs,
215
+ batch_size=proxy_batch["images"].shape[0],
216
+ num_candidates=proxy_batch["candidate_action_chunks"].shape[1],
217
+ chunk_size=proxy_batch["action_chunk"].shape[1],
218
+ )
219
+ proxy_losses = compute_total_loss(proxy_outputs, proxy_batch, weights=loss_weights)
220
+ proxy_losses["total"].backward()
221
+ proxy_grad_norm = float(
222
+ torch.nn.utils.clip_grad_norm_(proxy_model.parameters(), max_norm=10.0).detach().cpu()
223
+ )
224
+ proxy_optimizer.step()
225
+
226
+ rlbench_trainer_config = _trainer_config(planner_mode="proxy_pretrained")
227
+ rlbench_model = build_policy(policy_config, rlbench_trainer_config).to(device)
228
+ frozen_modules = apply_planner_mode(rlbench_model, rlbench_trainer_config)
229
+ rlbench_optimizer = torch.optim.AdamW(
230
+ [parameter for parameter in rlbench_model.parameters() if parameter.requires_grad],
231
+ lr=1e-3,
232
+ )
233
+ rlbench_model.train()
234
+ rlbench_batch = _synthetic_rlbench_batch(
235
+ batch_size=args.batch_size,
236
+ chunk_size=policy_config.decoder.chunk_size,
237
+ history_steps=policy_config.memory.history_steps,
238
+ resolution=64,
239
+ device=device,
240
+ )
241
+ rlbench_optimizer.zero_grad(set_to_none=True)
242
+ rlbench_outputs = rlbench_model(
243
+ images=rlbench_batch["images"],
244
+ proprio=rlbench_batch["proprio"],
245
+ texts=rlbench_batch["texts"],
246
+ history_images=rlbench_batch.get("history_images"),
247
+ history_proprio=rlbench_batch.get("history_proprio"),
248
+ plan=True,
249
+ )
250
+ _check_output_shapes(
251
+ rlbench_outputs,
252
+ batch_size=rlbench_batch["images"].shape[0],
253
+ num_candidates=policy_config.decoder.num_candidates,
254
+ chunk_size=policy_config.decoder.chunk_size,
255
+ )
256
+ rlbench_losses = compute_total_loss(rlbench_outputs, rlbench_batch, weights=loss_weights)
257
+ rlbench_losses["total"].backward()
258
+ rlbench_grad_norm = float(
259
+ torch.nn.utils.clip_grad_norm_(rlbench_model.parameters(), max_norm=10.0).detach().cpu()
260
+ )
261
+ rlbench_optimizer.step()
262
+
263
+ planner_gate = planner_enabled(rlbench_trainer_config, during_eval=True)
264
+ if not planner_gate:
265
+ raise AssertionError("planner_enabled should be true for interaction_state with planner_mode=proxy_pretrained")
266
+
267
+ report = {
268
+ "proxy": {
269
+ "losses": {key: float(value.detach().cpu()) for key, value in proxy_losses.items()},
270
+ "grad_norm": proxy_grad_norm,
271
+ "candidate_shape": list(proxy_outputs["candidate_chunks"].shape),
272
+ "rollout_phase_shape": list(proxy_outputs["planned_rollout"]["phase_logits"].shape),
273
+ },
274
+ "rlbench": {
275
+ "losses": {key: float(value.detach().cpu()) for key, value in rlbench_losses.items()},
276
+ "grad_norm": rlbench_grad_norm,
277
+ "candidate_shape": list(rlbench_outputs["candidate_chunks"].shape),
278
+ "rollout_phase_shape": list(rlbench_outputs["planned_rollout"]["phase_logits"].shape),
279
+ "planner_enabled_for_eval": planner_gate,
280
+ "frozen_modules": frozen_modules,
281
+ },
282
+ "policy_config": asdict(policy_config),
283
+ }
284
+
285
+ output_dir = Path(args.output_dir)
286
+ output_dir.mkdir(parents=True, exist_ok=True)
287
+ (output_dir / "smoke_checks.json").write_text(json.dumps(report, indent=2), encoding="utf-8")
288
+ print(json.dumps(report, indent=2))
289
+
290
+
291
+ if __name__ == "__main__":
292
+ main()
code/reveal_vla_bimanual/train/trainer.py CHANGED
@@ -6,7 +6,7 @@ from typing import Sequence
6
  import torch
7
  from torch import Tensor, nn
8
 
9
- from models.policy import BackboneOnlyPolicy, PolicyConfig, RevealBimanualPolicy
10
  from train.losses import LossWeights, compute_total_loss
11
 
12
 
@@ -20,6 +20,7 @@ class TrainerConfig:
20
  plan_during_train: bool = True
21
  plan_during_eval: bool = True
22
  support_mode_conditioning: bool = True
 
23
 
24
 
25
  def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Module:
@@ -27,9 +28,39 @@ def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Modu
27
  config.backbone.gradient_checkpointing = trainer_config.gradient_checkpointing
28
  if trainer_config.policy_type == "reveal_state":
29
  return RevealBimanualPolicy(config)
 
 
30
  return BackboneOnlyPolicy(config)
31
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  class BimanualTrainer:
34
  def __init__(self, model: nn.Module, optimizer: torch.optim.Optimizer, config: TrainerConfig) -> None:
35
  self.model = model
@@ -56,8 +87,8 @@ class BimanualTrainer:
56
  "history_images": batch.get("history_images"),
57
  "history_proprio": batch.get("history_proprio"),
58
  }
59
- if self.config.policy_type == "reveal_state":
60
- forward_kwargs["plan"] = self.config.plan_during_train
61
  forward_kwargs["support_mode_conditioning"] = self.config.support_mode_conditioning
62
  if "candidate_action_chunks" in batch:
63
  forward_kwargs["candidate_chunks_override"] = batch["candidate_action_chunks"]
 
6
  import torch
7
  from torch import Tensor, nn
8
 
9
+ from models.policy import BackboneOnlyPolicy, InteractionBimanualPolicy, PolicyConfig, RevealBimanualPolicy
10
  from train.losses import LossWeights, compute_total_loss
11
 
12
 
 
20
  plan_during_train: bool = True
21
  plan_during_eval: bool = True
22
  support_mode_conditioning: bool = True
23
+ planner_mode: str = "trainable"
24
 
25
 
26
  def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Module:
 
28
  config.backbone.gradient_checkpointing = trainer_config.gradient_checkpointing
29
  if trainer_config.policy_type == "reveal_state":
30
  return RevealBimanualPolicy(config)
31
+ if trainer_config.policy_type == "interaction_state":
32
+ return InteractionBimanualPolicy(config)
33
  return BackboneOnlyPolicy(config)
34
 
35
 
36
+ def policy_supports_planning(policy_type: str) -> bool:
37
+ return policy_type in {"reveal_state", "interaction_state"}
38
+
39
+
40
+ def planner_enabled(trainer_config: TrainerConfig, during_eval: bool) -> bool:
41
+ if not policy_supports_planning(trainer_config.policy_type):
42
+ return False
43
+ if trainer_config.planner_mode == "off":
44
+ return False
45
+ if during_eval:
46
+ return trainer_config.plan_during_eval
47
+ return trainer_config.plan_during_train
48
+
49
+
50
+ def apply_planner_mode(model: nn.Module, trainer_config: TrainerConfig) -> list[str]:
51
+ if trainer_config.planner_mode != "proxy_pretrained":
52
+ return []
53
+ frozen_modules = []
54
+ for module_name in ("interaction_head", "world_model", "planner"):
55
+ module = getattr(model, module_name, None)
56
+ if module is None:
57
+ continue
58
+ frozen_modules.append(module_name)
59
+ for parameter in module.parameters():
60
+ parameter.requires_grad = False
61
+ return frozen_modules
62
+
63
+
64
  class BimanualTrainer:
65
  def __init__(self, model: nn.Module, optimizer: torch.optim.Optimizer, config: TrainerConfig) -> None:
66
  self.model = model
 
87
  "history_images": batch.get("history_images"),
88
  "history_proprio": batch.get("history_proprio"),
89
  }
90
+ if policy_supports_planning(self.config.policy_type):
91
+ forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
92
  forward_kwargs["support_mode_conditioning"] = self.config.support_mode_conditioning
93
  if "candidate_action_chunks" in batch:
94
  forward_kwargs["candidate_chunks_override"] = batch["candidate_action_chunks"]
environment/README.md CHANGED
@@ -10,6 +10,8 @@ This directory contains the machine snapshot and setup helpers for the runpod no
10
  - raw `glxinfo -B` output from the working `:99` X server
11
  - `upstream_revisions.txt`
12
  - third-party repo URLs and pinned commits used on this node
 
 
13
  - `rlbench_env_export.yaml`
14
  - `micromamba env export` for `/workspace/envs/rlbench`
15
  - `rlbench_env_explicit.txt`
@@ -24,6 +26,8 @@ This directory contains the machine snapshot and setup helpers for the runpod no
24
  - `pip freeze` for `/workspace/envs/reveal`
25
  - `setup_same_machine.sh`
26
  - bundle-aware bootstrap script for a matching `/workspace` machine
 
 
27
 
28
  ## Default Layout
29
 
@@ -37,4 +41,5 @@ This directory contains the machine snapshot and setup helpers for the runpod no
37
  ```bash
38
  DISPLAY=:99 glxinfo -B
39
  /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_rlbench.launch_smoke --headless
 
40
  ```
 
10
  - raw `glxinfo -B` output from the working `:99` X server
11
  - `upstream_revisions.txt`
12
  - third-party repo URLs and pinned commits used on this node
13
+ - `system_packages_same_machine.txt`
14
+ - apt package names installed for the RLBench/X11 path on this node
15
  - `rlbench_env_export.yaml`
16
  - `micromamba env export` for `/workspace/envs/rlbench`
17
  - `rlbench_env_explicit.txt`
 
26
  - `pip freeze` for `/workspace/envs/reveal`
27
  - `setup_same_machine.sh`
28
  - bundle-aware bootstrap script for a matching `/workspace` machine
29
+ - `validate_same_machine.sh`
30
+ - validation helper that runs `glxinfo`, RLBench import smoke, and `open_drawer` launch smoke
31
 
32
  ## Default Layout
33
 
 
41
  ```bash
42
  DISPLAY=:99 glxinfo -B
43
  /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_rlbench.launch_smoke --headless
44
+ /workspace/VLAarchtests/environment/validate_same_machine.sh
45
  ```
environment/setup_same_machine.sh CHANGED
@@ -69,3 +69,4 @@ echo "Recommended validation commands:"
69
  echo " DISPLAY=:99 glxinfo -B"
70
  echo " /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_rlbench.launch_smoke --headless"
71
  echo " /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_reveal.isaac_smoke"
 
 
69
  echo " DISPLAY=:99 glxinfo -B"
70
  echo " /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_rlbench.launch_smoke --headless"
71
  echo " /workspace/.tools/micromamba/bin/micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench python -m sim_reveal.isaac_smoke"
72
+ echo " ${BUNDLE_ROOT}/environment/validate_same_machine.sh"
environment/system_packages_same_machine.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ libxcb-cursor0
2
+ libxcb-icccm4
3
+ libxcb-image0
4
+ libxcb-keysyms1
5
+ libxcb-randr0
6
+ libxcb-render-util0
7
+ libxcb-xinerama0
8
+ libxkbcommon0
9
+ libxkbcommon-x11-0
10
+ libxrender1
11
+ mesa-utils
12
+ nvidia-xconfig
13
+ x11-utils
14
+ x11-xserver-utils
15
+ xauth
16
+ xserver-xorg
17
+ xserver-xorg-core
18
+ xvfb
environment/validate_same_machine.sh ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ ROOT_DIR="${ROOT_DIR:-/workspace}"
5
+ DISPLAY_NUM="${DISPLAY_NUM:-99}"
6
+ DISPLAY=":${DISPLAY_NUM}"
7
+ PROJECT_DIR="${PROJECT_DIR:-${ROOT_DIR}/reveal_vla_bimanual}"
8
+ MAMBA_BIN="${ROOT_DIR}/.tools/micromamba/bin/micromamba"
9
+ MAMBA_ROOT_PREFIX="${ROOT_DIR}/.micromamba"
10
+ ENV_PREFIX="${ROOT_DIR}/envs/rlbench"
11
+ COPPELIA_DIR="${COPPELIASIM_ROOT:-${ROOT_DIR}/assets/coppeliasim_v4_1_0}"
12
+ RUNTIME_DIR="${ROOT_DIR}/runtime"
13
+
14
+ mkdir -p "${RUNTIME_DIR}"
15
+ chmod 700 "${RUNTIME_DIR}"
16
+
17
+ run_in_rlbench_env() {
18
+ local driver_version=""
19
+ local driver_branch=""
20
+ local shim_ld=""
21
+ if command -v nvidia-smi >/dev/null 2>&1; then
22
+ driver_version="$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 || true)"
23
+ driver_branch="${driver_version%%.*}"
24
+ if [[ -n "${driver_branch}" && -d "${ROOT_DIR}/system_shims/nvidia${driver_branch}/usr/lib/x86_64-linux-gnu" ]]; then
25
+ shim_ld="${ROOT_DIR}/system_shims/nvidia${driver_branch}/usr/lib/x86_64-linux-gnu"
26
+ fi
27
+ fi
28
+ env \
29
+ DISPLAY="${DISPLAY}" \
30
+ COPPELIASIM_ROOT="${COPPELIA_DIR}" \
31
+ XDG_RUNTIME_DIR="${RUNTIME_DIR}" \
32
+ LD_LIBRARY_PATH="${COPPELIA_DIR}${shim_ld:+:${shim_ld}}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" \
33
+ QT_QPA_PLATFORM_PLUGIN_PATH="${COPPELIA_DIR}" \
34
+ "${MAMBA_BIN}" run -r "${MAMBA_ROOT_PREFIX}" -p "${ENV_PREFIX}" "$@"
35
+ }
36
+
37
+ echo "Display check"
38
+ DISPLAY="${DISPLAY}" glxinfo -B
39
+
40
+ echo
41
+ echo "RLBench import smoke"
42
+ run_in_rlbench_env python -m sim_rlbench.smoke_test --print-train-command
43
+
44
+ echo
45
+ echo "RLBench launch smoke"
46
+ run_in_rlbench_env python -m sim_rlbench.launch_smoke --task open_drawer --resolution 128 --headless