lsnu commited on
Commit
6fa1956
·
verified ·
1 Parent(s): 35377df

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml +85 -0
  2. artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json +106 -0
  3. artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml +88 -0
  4. artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json +54 -0
  5. artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml +85 -0
  6. artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json +186 -0
  7. artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml +88 -0
  8. artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json +94 -0
  9. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml +89 -0
  10. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json +28 -0
  11. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json +44 -0
  12. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml +89 -0
  13. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json +28 -0
  14. artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json +44 -0
  15. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml +89 -0
  16. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json +28 -0
  17. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json +44 -0
  18. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml +89 -0
  19. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json +28 -0
  20. artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json +44 -0
  21. artifacts/reports/reveal_eval/reveal_benchmark.json +28 -0
  22. artifacts/reports/reveal_eval/reveal_benchmark.md +25 -0
  23. artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json +41 -0
  24. artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md +12 -0
  25. artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json +41 -0
  26. code/reveal_vla_bimanual/.gitignore +12 -0
  27. code/reveal_vla_bimanual/README.md +82 -0
  28. code/reveal_vla_bimanual/docs/upstream_pins.md +24 -0
  29. code/reveal_vla_bimanual/docs/xorg.rtx6000.conf +33 -0
  30. code/reveal_vla_bimanual/envs/reveal310.yaml +38 -0
  31. code/reveal_vla_bimanual/envs/rlbench310.yaml +50 -0
  32. code/reveal_vla_bimanual/eval/__init__.py +3 -0
  33. code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc +0 -0
  34. code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc +0 -0
  35. code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc +0 -0
  36. code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc +0 -0
  37. code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc +0 -0
  38. code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc +0 -0
  39. code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc +0 -0
  40. code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc +0 -0
  41. code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc +0 -0
  42. code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc +0 -0
  43. code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc +0 -0
  44. code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc +0 -0
  45. code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc +0 -0
  46. code/reveal_vla_bimanual/eval/ablations.py +8 -0
  47. code/reveal_vla_bimanual/eval/metrics.py +52 -0
  48. code/reveal_vla_bimanual/eval/report.py +50 -0
  49. code/reveal_vla_bimanual/eval/run_ablations.py +68 -0
  50. code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +231 -0
artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_backbone_only
2
+ output_dir: /workspace/outputs/reveal_runs
3
+ device: cuda
4
+ seed: 7
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
15
+ rebuild_dataset: true
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ seed: 7
19
+ optim:
20
+ epochs: 8
21
+ batch_size: 16
22
+ num_workers: 0
23
+ lr: 0.001
24
+ weight_decay: 0.0001
25
+ trainer:
26
+ policy_type: backbone_only
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ policy:
32
+ backbone:
33
+ model_name: openai/clip-vit-base-patch32
34
+ hidden_dim: 128
35
+ max_text_tokens: 32
36
+ freeze_backbone: true
37
+ gradient_checkpointing: false
38
+ use_dummy_backbone: true
39
+ fusion:
40
+ hidden_dim: 128
41
+ num_cameras: 3
42
+ num_layers: 2
43
+ num_heads: 4
44
+ ff_dim: 256
45
+ dropout: 0.1
46
+ proprio_dim: 32
47
+ proprio_tokens: 1
48
+ decoder:
49
+ hidden_dim: 128
50
+ num_heads: 4
51
+ num_layers: 2
52
+ ff_dim: 256
53
+ dropout: 0.1
54
+ chunk_size: 8
55
+ action_dim: 14
56
+ num_candidates: 8
57
+ reveal_head:
58
+ hidden_dim: 128
59
+ num_support_modes: 3
60
+ num_approach_templates: 32
61
+ rollout_horizon: 5
62
+ belief_map_size: 32
63
+ predict_belief_map: true
64
+ world_model:
65
+ hidden_dim: 128
66
+ action_dim: 14
67
+ num_support_modes: 3
68
+ num_approach_templates: 32
69
+ rollout_horizon: 5
70
+ planner:
71
+ num_candidates: 8
72
+ corridor_weight: 1.0
73
+ persistence_weight: 0.5
74
+ proposal_weight: 0.5
75
+ disturbance_weight: 0.75
76
+ reocclusion_weight: 0.5
77
+ visibility_weight: 0.25
78
+ loss_weights:
79
+ action: 1.0
80
+ support_mode: 0.1
81
+ corridor: 0.1
82
+ persistence: 0.05
83
+ disturbance: 0.05
84
+ world_model: 0.1
85
+ belief: 0.05
artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.06700062464612226,
6
+ "total": 0.06700062464612226,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.02209080010652542,
11
+ "total": 0.02209080010652542,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.02441179845482111,
19
+ "total": 0.02441179845482111,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.01861108955927193,
24
+ "total": 0.01861108955927193,
25
+ "world_model": 0.0
26
+ }
27
+ },
28
+ {
29
+ "epoch": 2,
30
+ "train": {
31
+ "action": 0.020652000947544973,
32
+ "total": 0.020652000947544973,
33
+ "world_model": 0.0
34
+ },
35
+ "val": {
36
+ "action": 0.01581601658836007,
37
+ "total": 0.01581601658836007,
38
+ "world_model": 0.0
39
+ }
40
+ },
41
+ {
42
+ "epoch": 3,
43
+ "train": {
44
+ "action": 0.01735153196689983,
45
+ "total": 0.01735153196689983,
46
+ "world_model": 0.0
47
+ },
48
+ "val": {
49
+ "action": 0.01413003564812243,
50
+ "total": 0.01413003564812243,
51
+ "world_model": 0.0
52
+ }
53
+ },
54
+ {
55
+ "epoch": 4,
56
+ "train": {
57
+ "action": 0.015502698409060637,
58
+ "total": 0.015502698409060637,
59
+ "world_model": 0.0
60
+ },
61
+ "val": {
62
+ "action": 0.012679400155320764,
63
+ "total": 0.012679400155320764,
64
+ "world_model": 0.0
65
+ }
66
+ },
67
+ {
68
+ "epoch": 5,
69
+ "train": {
70
+ "action": 0.015521424783704182,
71
+ "total": 0.015521424783704182,
72
+ "world_model": 0.0
73
+ },
74
+ "val": {
75
+ "action": 0.011973066837526858,
76
+ "total": 0.011973066837526858,
77
+ "world_model": 0.0
78
+ }
79
+ },
80
+ {
81
+ "epoch": 6,
82
+ "train": {
83
+ "action": 0.014476912096142769,
84
+ "total": 0.014476912096142769,
85
+ "world_model": 0.0
86
+ },
87
+ "val": {
88
+ "action": 0.011093099834397435,
89
+ "total": 0.011093099834397435,
90
+ "world_model": 0.0
91
+ }
92
+ },
93
+ {
94
+ "epoch": 7,
95
+ "train": {
96
+ "action": 0.012226066280466815,
97
+ "total": 0.012226066280466815,
98
+ "world_model": 0.0
99
+ },
100
+ "val": {
101
+ "action": 0.012411019764840603,
102
+ "total": 0.012411019764840603,
103
+ "world_model": 0.0
104
+ }
105
+ }
106
+ ]
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_backbone_only_clip
2
+ output_dir: /workspace/outputs/reveal_runs
3
+ device: cuda
4
+ seed: 7
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 224
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
15
+ rebuild_dataset: true
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ seed: 7
19
+ optim:
20
+ epochs: 4
21
+ batch_size: 2
22
+ num_workers: 0
23
+ lr: 0.0003
24
+ weight_decay: 0.0001
25
+ trainer:
26
+ policy_type: backbone_only
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ plan_during_train: false
32
+ plan_during_eval: false
33
+ support_mode_conditioning: true
34
+ policy:
35
+ backbone:
36
+ model_name: openai/clip-vit-base-patch32
37
+ hidden_dim: 512
38
+ max_text_tokens: 32
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ use_dummy_backbone: false
42
+ fusion:
43
+ hidden_dim: 512
44
+ num_cameras: 3
45
+ num_layers: 4
46
+ num_heads: 8
47
+ ff_dim: 2048
48
+ dropout: 0.1
49
+ proprio_dim: 32
50
+ proprio_tokens: 1
51
+ decoder:
52
+ hidden_dim: 512
53
+ num_heads: 8
54
+ num_layers: 4
55
+ ff_dim: 2048
56
+ dropout: 0.1
57
+ chunk_size: 8
58
+ action_dim: 14
59
+ num_candidates: 8
60
+ reveal_head:
61
+ hidden_dim: 512
62
+ num_support_modes: 3
63
+ num_approach_templates: 32
64
+ rollout_horizon: 5
65
+ belief_map_size: 32
66
+ predict_belief_map: true
67
+ world_model:
68
+ hidden_dim: 512
69
+ action_dim: 14
70
+ num_support_modes: 3
71
+ num_approach_templates: 32
72
+ rollout_horizon: 5
73
+ planner:
74
+ num_candidates: 8
75
+ corridor_weight: 1.0
76
+ persistence_weight: 0.5
77
+ proposal_weight: 0.5
78
+ disturbance_weight: 0.75
79
+ reocclusion_weight: 0.5
80
+ visibility_weight: 0.25
81
+ loss_weights:
82
+ action: 1.0
83
+ support_mode: 0.1
84
+ corridor: 0.1
85
+ persistence: 0.05
86
+ disturbance: 0.05
87
+ world_model: 0.1
88
+ belief: 0.05
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.14342915779711063,
6
+ "total": 0.14342915779711063,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.026520084648851364,
11
+ "total": 0.026520084648851364,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.01376689436079944,
19
+ "total": 0.01376689436079944,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.00792281218390498,
24
+ "total": 0.00792281218390498,
25
+ "world_model": 0.0
26
+ }
27
+ },
28
+ {
29
+ "epoch": 2,
30
+ "train": {
31
+ "action": 0.009396829446095057,
32
+ "total": 0.009396829446095057,
33
+ "world_model": 0.0
34
+ },
35
+ "val": {
36
+ "action": 0.006728713663058385,
37
+ "total": 0.006728713663058385,
38
+ "world_model": 0.0
39
+ }
40
+ },
41
+ {
42
+ "epoch": 3,
43
+ "train": {
44
+ "action": 0.007774835790102784,
45
+ "total": 0.007774835790102784,
46
+ "world_model": 0.0
47
+ },
48
+ "val": {
49
+ "action": 0.005187951255634073,
50
+ "total": 0.005187951255634073,
51
+ "world_model": 0.0
52
+ }
53
+ }
54
+ ]
artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_reveal_state
2
+ output_dir: /workspace/outputs/reveal_runs
3
+ device: cuda
4
+ seed: 7
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
15
+ rebuild_dataset: false
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ seed: 7
19
+ optim:
20
+ epochs: 8
21
+ batch_size: 16
22
+ num_workers: 0
23
+ lr: 0.001
24
+ weight_decay: 0.0001
25
+ trainer:
26
+ policy_type: reveal_state
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ policy:
32
+ backbone:
33
+ model_name: openai/clip-vit-base-patch32
34
+ hidden_dim: 128
35
+ max_text_tokens: 32
36
+ freeze_backbone: true
37
+ gradient_checkpointing: false
38
+ use_dummy_backbone: true
39
+ fusion:
40
+ hidden_dim: 128
41
+ num_cameras: 3
42
+ num_layers: 2
43
+ num_heads: 4
44
+ ff_dim: 256
45
+ dropout: 0.1
46
+ proprio_dim: 32
47
+ proprio_tokens: 1
48
+ decoder:
49
+ hidden_dim: 128
50
+ num_heads: 4
51
+ num_layers: 2
52
+ ff_dim: 256
53
+ dropout: 0.1
54
+ chunk_size: 8
55
+ action_dim: 14
56
+ num_candidates: 8
57
+ reveal_head:
58
+ hidden_dim: 128
59
+ num_support_modes: 3
60
+ num_approach_templates: 32
61
+ rollout_horizon: 5
62
+ belief_map_size: 32
63
+ predict_belief_map: true
64
+ world_model:
65
+ hidden_dim: 128
66
+ action_dim: 14
67
+ num_support_modes: 3
68
+ num_approach_templates: 32
69
+ rollout_horizon: 5
70
+ planner:
71
+ num_candidates: 8
72
+ corridor_weight: 1.0
73
+ persistence_weight: 0.65
74
+ proposal_weight: 0.35
75
+ disturbance_weight: 0.8
76
+ reocclusion_weight: 0.6
77
+ visibility_weight: 0.35
78
+ loss_weights:
79
+ action: 1.0
80
+ support_mode: 0.15
81
+ corridor: 0.2
82
+ persistence: 0.1
83
+ disturbance: 0.1
84
+ world_model: 0.2
85
+ belief: 0.05
artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.2602546961667637,
6
+ "belief": 0.4802860766649246,
7
+ "corridor": 0.6443073948224386,
8
+ "disturbance": 0.006578955658672688,
9
+ "persistence": 4.514919241269429,
10
+ "support_mode": 0.8015391031901041,
11
+ "total": 2.0875226110219955,
12
+ "world_model": 5.510057131449382
13
+ },
14
+ "val": {
15
+ "action": 0.04658499173820019,
16
+ "belief": 0.280171237885952,
17
+ "corridor": 0.5032978095114231,
18
+ "disturbance": 0.003645064221927896,
19
+ "persistence": 3.8178451359272003,
20
+ "support_mode": 0.6714280992746353,
21
+ "total": 1.012940600514412,
22
+ "world_model": 1.8441212028265
23
+ }
24
+ },
25
+ {
26
+ "epoch": 1,
27
+ "train": {
28
+ "action": 0.03881739747400085,
29
+ "belief": 0.18641860752056041,
30
+ "corridor": 0.3944183625280857,
31
+ "disturbance": 0.030439561344489146,
32
+ "persistence": 3.206294293204943,
33
+ "support_mode": 0.5347911287099123,
34
+ "total": 0.9082020496328672,
35
+ "world_model": 1.8864398151636124
36
+ },
37
+ "val": {
38
+ "action": 0.04213718790560961,
39
+ "belief": 0.15712551027536392,
40
+ "corridor": 0.3507457673549652,
41
+ "disturbance": 0.006276358384639025,
42
+ "persistence": 1.8078171163797379,
43
+ "support_mode": 0.10970124043524265,
44
+ "total": 0.6724201738834381,
45
+ "world_model": 1.772064983844757
46
+ }
47
+ },
48
+ {
49
+ "epoch": 2,
50
+ "train": {
51
+ "action": 0.031200370130439598,
52
+ "belief": 0.13828600694735846,
53
+ "corridor": 0.31750819956262905,
54
+ "disturbance": 0.011857866222271696,
55
+ "persistence": 1.7015922193725903,
56
+ "support_mode": 0.02674841312303518,
57
+ "total": 0.6129550884167353,
58
+ "world_model": 1.6799074759085972
59
+ },
60
+ "val": {
61
+ "action": 0.019523032009601593,
62
+ "belief": 0.09429990872740746,
63
+ "corridor": 0.24884792044758797,
64
+ "disturbance": 0.0043011417728848755,
65
+ "persistence": 1.5114311277866364,
66
+ "support_mode": 0.0060500025865621865,
67
+ "total": 0.5359727554023266,
68
+ "world_model": 1.5474220663309097
69
+ }
70
+ },
71
+ {
72
+ "epoch": 3,
73
+ "train": {
74
+ "action": 0.022356805779660743,
75
+ "belief": 0.09125891048461199,
76
+ "corridor": 0.23351835707823435,
77
+ "disturbance": 0.006718798467773013,
78
+ "persistence": 1.6300043910741806,
79
+ "support_mode": 0.004253969304651643,
80
+ "total": 0.5548354809482893,
81
+ "world_model": 1.5845081210136414
82
+ },
83
+ "val": {
84
+ "action": 0.01580847823061049,
85
+ "belief": 0.09042494650930166,
86
+ "corridor": 0.22376472875475883,
87
+ "disturbance": 0.018967560958117247,
88
+ "persistence": 1.4363956600427628,
89
+ "support_mode": 0.03418254409916699,
90
+ "total": 0.5279115326702595,
91
+ "world_model": 1.5608257874846458
92
+ }
93
+ },
94
+ {
95
+ "epoch": 4,
96
+ "train": {
97
+ "action": 0.019881066245337326,
98
+ "belief": 0.08954659259567659,
99
+ "corridor": 0.21636931287745634,
100
+ "disturbance": 0.005539724506282558,
101
+ "persistence": 1.592231921851635,
102
+ "support_mode": 0.008331454223177085,
103
+ "total": 0.5372808227936426,
104
+ "world_model": 1.5431083713968594
105
+ },
106
+ "val": {
107
+ "action": 0.015133287757635117,
108
+ "belief": 0.08718204218894243,
109
+ "corridor": 0.20481965504586697,
110
+ "disturbance": 0.0031357303814729676,
111
+ "persistence": 1.3192060887813568,
112
+ "support_mode": 0.0030863596766721457,
113
+ "total": 0.47997843474149704,
114
+ "world_model": 1.4341248571872711
115
+ }
116
+ },
117
+ {
118
+ "epoch": 5,
119
+ "train": {
120
+ "action": 0.030778280459344387,
121
+ "belief": 0.09159998937199514,
122
+ "corridor": 0.21967005419234434,
123
+ "disturbance": 0.005901901221174437,
124
+ "persistence": 1.651158797244231,
125
+ "support_mode": 0.0024410486221313477,
126
+ "total": 0.5050872204204401,
127
+ "world_model": 1.2986134762565296
128
+ },
129
+ "val": {
130
+ "action": 0.03259791061282158,
131
+ "belief": 0.08867455553263426,
132
+ "corridor": 0.20528649538755417,
133
+ "disturbance": 0.0037689711316488683,
134
+ "persistence": 1.3772646486759186,
135
+ "support_mode": 0.0007588127191411331,
136
+ "total": 0.4101765304803848,
137
+ "world_model": 0.9693519398570061
138
+ }
139
+ },
140
+ {
141
+ "epoch": 6,
142
+ "train": {
143
+ "action": 0.028416083427146077,
144
+ "belief": 0.09289384291817744,
145
+ "corridor": 0.22298985657592615,
146
+ "disturbance": 0.0031898027373244986,
147
+ "persistence": 1.2752377291520436,
148
+ "support_mode": 0.04850278014297752,
149
+ "total": 0.40898223718007404,
150
+ "world_model": 0.9810265600681305
151
+ },
152
+ "val": {
153
+ "action": 0.02159481483977288,
154
+ "belief": 0.08797950763255358,
155
+ "corridor": 0.20524934865534306,
156
+ "disturbance": 0.0015436648827744648,
157
+ "persistence": 1.286000706255436,
158
+ "support_mode": 0.0010480962373549119,
159
+ "total": 0.3605738691985607,
160
+ "world_model": 0.8230927512049675
161
+ }
162
+ },
163
+ {
164
+ "epoch": 7,
165
+ "train": {
166
+ "action": 0.021424691736077268,
167
+ "belief": 0.0899931692207853,
168
+ "corridor": 0.21607277914881706,
169
+ "disturbance": 0.0034827212220989168,
170
+ "persistence": 0.9069182885189851,
171
+ "support_mode": 0.00435957100125961,
172
+ "total": 0.3383450036247571,
173
+ "world_model": 0.8875602881113688
174
+ },
175
+ "val": {
176
+ "action": 0.017686392879113555,
177
+ "belief": 0.09035013243556023,
178
+ "corridor": 0.21036655083298683,
179
+ "disturbance": 0.004888073919573799,
180
+ "persistence": 0.5709216743707657,
181
+ "support_mode": 0.001884725206764415,
182
+ "total": 0.31777225248515606,
183
+ "world_model": 0.978156752884388
184
+ }
185
+ }
186
+ ]
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_reveal_state_clip
2
+ output_dir: /workspace/outputs/reveal_runs
3
+ device: cuda
4
+ seed: 7
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 224
11
+ train_episodes_per_proxy: 48
12
+ val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
15
+ rebuild_dataset: false
16
+ chunk_horizon: 8
17
+ rollout_horizon: 5
18
+ seed: 7
19
+ optim:
20
+ epochs: 4
21
+ batch_size: 2
22
+ num_workers: 0
23
+ lr: 0.0003
24
+ weight_decay: 0.0001
25
+ trainer:
26
+ policy_type: reveal_state
27
+ use_bf16: true
28
+ grad_clip_norm: 1.0
29
+ freeze_backbone: true
30
+ gradient_checkpointing: false
31
+ plan_during_train: true
32
+ plan_during_eval: true
33
+ support_mode_conditioning: true
34
+ policy:
35
+ backbone:
36
+ model_name: openai/clip-vit-base-patch32
37
+ hidden_dim: 512
38
+ max_text_tokens: 32
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ use_dummy_backbone: false
42
+ fusion:
43
+ hidden_dim: 512
44
+ num_cameras: 3
45
+ num_layers: 4
46
+ num_heads: 8
47
+ ff_dim: 2048
48
+ dropout: 0.1
49
+ proprio_dim: 32
50
+ proprio_tokens: 1
51
+ decoder:
52
+ hidden_dim: 512
53
+ num_heads: 8
54
+ num_layers: 4
55
+ ff_dim: 2048
56
+ dropout: 0.1
57
+ chunk_size: 8
58
+ action_dim: 14
59
+ num_candidates: 8
60
+ reveal_head:
61
+ hidden_dim: 512
62
+ num_support_modes: 3
63
+ num_approach_templates: 32
64
+ rollout_horizon: 5
65
+ belief_map_size: 32
66
+ predict_belief_map: true
67
+ world_model:
68
+ hidden_dim: 512
69
+ action_dim: 14
70
+ num_support_modes: 3
71
+ num_approach_templates: 32
72
+ rollout_horizon: 5
73
+ planner:
74
+ num_candidates: 8
75
+ corridor_weight: 1.0
76
+ persistence_weight: 0.65
77
+ proposal_weight: 0.35
78
+ disturbance_weight: 0.8
79
+ reocclusion_weight: 0.6
80
+ visibility_weight: 0.35
81
+ loss_weights:
82
+ action: 1.0
83
+ support_mode: 0.15
84
+ corridor: 0.2
85
+ persistence: 0.1
86
+ disturbance: 0.1
87
+ world_model: 0.2
88
+ belief: 0.05
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/metrics.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.2168051045758562,
6
+ "belief": 0.16835976690444024,
7
+ "corridor": 0.2947022703851705,
8
+ "disturbance": 0.007973204485554213,
9
+ "persistence": 4.26063614482967,
10
+ "support_mode": 0.7333370827879581,
11
+ "total": 1.1824027625990163,
12
+ "world_model": 1.8068884567440493
13
+ },
14
+ "val": {
15
+ "action": 0.06980070081495103,
16
+ "belief": 0.09293079068736425,
17
+ "corridor": 0.23202623426914215,
18
+ "disturbance": 0.006832122442401236,
19
+ "persistence": 3.871745571257576,
20
+ "support_mode": 0.6699983808729384,
21
+ "total": 0.7863351002572074,
22
+ "world_model": 0.8856253113065448
23
+ }
24
+ },
25
+ {
26
+ "epoch": 1,
27
+ "train": {
28
+ "action": 0.054234529075003104,
29
+ "belief": 0.09439963061382009,
30
+ "corridor": 0.24123663386983396,
31
+ "disturbance": 0.008799185583979581,
32
+ "persistence": 3.9709763473865247,
33
+ "support_mode": 0.674577163776178,
34
+ "total": 0.796180099092853,
35
+ "world_model": 0.9490705705125918
36
+ },
37
+ "val": {
38
+ "action": 0.06558700479448788,
39
+ "belief": 0.1815936780638165,
40
+ "corridor": 0.3361685186151474,
41
+ "disturbance": 0.023940630294086915,
42
+ "persistence": 4.7415515091565865,
43
+ "support_mode": 0.8642671259622725,
44
+ "total": 0.9338183213794042,
45
+ "world_model": 0.9286431225519332
46
+ }
47
+ },
48
+ {
49
+ "epoch": 2,
50
+ "train": {
51
+ "action": 0.03919103866472294,
52
+ "belief": 0.09202757795677759,
53
+ "corridor": 0.21921133667874243,
54
+ "disturbance": 0.04529383548148981,
55
+ "persistence": 1.5436662856260246,
56
+ "support_mode": 0.23989241035820927,
57
+ "total": 0.45590807076212,
58
+ "world_model": 0.8669675243774634
59
+ },
60
+ "val": {
61
+ "action": 0.02496799406787706,
62
+ "belief": 0.08762083173034683,
63
+ "corridor": 0.1930048821996602,
64
+ "disturbance": 0.012308748878745569,
65
+ "persistence": 0.9973389923809066,
66
+ "support_mode": 0.14653402309687363,
67
+ "total": 0.34120540746620726,
68
+ "world_model": 0.7515525425237323
69
+ }
70
+ },
71
+ {
72
+ "epoch": 3,
73
+ "train": {
74
+ "action": 0.034251564747961094,
75
+ "belief": 0.0881565280882788,
76
+ "corridor": 0.19749194407513784,
77
+ "disturbance": 0.019202744416642326,
78
+ "persistence": 1.0902665860137868,
79
+ "support_mode": 0.07417118861413127,
80
+ "total": 0.3623058025905599,
81
+ "world_model": 0.810377035309507
82
+ },
83
+ "val": {
84
+ "action": 0.020182275937663183,
85
+ "belief": 0.08651774370717624,
86
+ "corridor": 0.18512752960022125,
87
+ "disturbance": 0.02845218790591591,
88
+ "persistence": 1.0011120429706006,
89
+ "support_mode": 0.1388084255080367,
90
+ "total": 0.3356363290832156,
91
+ "world_model": 0.7516248249818408
92
+ }
93
+ }
94
+ ]
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/config_resolved.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_subset3_backbone_only_clip
2
+ output_dir: /workspace/outputs/rlbench_custom
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ dataset_root: /workspace/data/rlbench2
9
+ tasks:
10
+ - bimanual_lift_ball
11
+ - bimanual_push_box
12
+ - bimanual_dual_push_buttons
13
+ train_episodes:
14
+ - 0
15
+ val_episodes:
16
+ - 1
17
+ resolution: 224
18
+ chunk_horizon: 8
19
+ proprio_dim: 32
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 2
23
+ num_workers: 0
24
+ lr: 0.0002
25
+ weight_decay: 0.0001
26
+ trainer:
27
+ policy_type: backbone_only
28
+ use_bf16: true
29
+ grad_clip_norm: 1.0
30
+ freeze_backbone: true
31
+ gradient_checkpointing: false
32
+ plan_during_train: false
33
+ plan_during_eval: false
34
+ support_mode_conditioning: true
35
+ policy:
36
+ backbone:
37
+ model_name: openai/clip-vit-base-patch32
38
+ hidden_dim: 512
39
+ max_text_tokens: 32
40
+ freeze_backbone: true
41
+ gradient_checkpointing: false
42
+ use_dummy_backbone: false
43
+ fusion:
44
+ hidden_dim: 512
45
+ num_cameras: 3
46
+ num_layers: 4
47
+ num_heads: 8
48
+ ff_dim: 2048
49
+ dropout: 0.1
50
+ proprio_dim: 32
51
+ proprio_tokens: 1
52
+ decoder:
53
+ hidden_dim: 512
54
+ num_heads: 8
55
+ num_layers: 4
56
+ ff_dim: 2048
57
+ dropout: 0.1
58
+ chunk_size: 8
59
+ action_dim: 14
60
+ num_candidates: 8
61
+ reveal_head:
62
+ hidden_dim: 512
63
+ num_support_modes: 3
64
+ num_approach_templates: 32
65
+ rollout_horizon: 5
66
+ belief_map_size: 32
67
+ predict_belief_map: true
68
+ world_model:
69
+ hidden_dim: 512
70
+ action_dim: 14
71
+ num_support_modes: 3
72
+ num_approach_templates: 32
73
+ rollout_horizon: 5
74
+ planner:
75
+ num_candidates: 8
76
+ corridor_weight: 1.0
77
+ persistence_weight: 0.5
78
+ proposal_weight: 0.5
79
+ disturbance_weight: 0.75
80
+ reocclusion_weight: 0.5
81
+ visibility_weight: 0.25
82
+ loss_weights:
83
+ action: 1.0
84
+ support_mode: 0.1
85
+ corridor: 0.1
86
+ persistence: 0.05
87
+ disturbance: 0.05
88
+ world_model: 0.1
89
+ belief: 0.05
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/metrics.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.010832569689285108,
6
+ "total": 0.010832569689285108,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.00584922067168602,
11
+ "total": 0.00584922067168602,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.007243322389241776,
19
+ "total": 0.007243322389241776,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.004669623740794346,
24
+ "total": 0.004669623740794346,
25
+ "world_model": 0.0
26
+ }
27
+ }
28
+ ]
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/summary.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "rlbench_subset3_backbone_only_clip",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
5
+ "final_train_total": 0.007243322389241776,
6
+ "final_val_total": 0.004669623740794346,
7
+ "train_dataset": {
8
+ "dataset_root": "/workspace/data/rlbench2",
9
+ "tasks": [
10
+ "bimanual_lift_ball",
11
+ "bimanual_push_box",
12
+ "bimanual_dual_push_buttons"
13
+ ],
14
+ "episode_indices": [
15
+ 0
16
+ ],
17
+ "num_episodes": 3,
18
+ "num_samples": 381,
19
+ "resolution": 224,
20
+ "chunk_size": 8,
21
+ "proprio_dim": 32
22
+ },
23
+ "val_dataset": {
24
+ "dataset_root": "/workspace/data/rlbench2",
25
+ "tasks": [
26
+ "bimanual_lift_ball",
27
+ "bimanual_push_box",
28
+ "bimanual_dual_push_buttons"
29
+ ],
30
+ "episode_indices": [
31
+ 1
32
+ ],
33
+ "num_episodes": 3,
34
+ "num_samples": 374,
35
+ "resolution": 224,
36
+ "chunk_size": 8,
37
+ "proprio_dim": 32
38
+ },
39
+ "init_info": {
40
+ "path": "/workspace/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
41
+ "missing_keys": [],
42
+ "unexpected_keys": []
43
+ }
44
+ }
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/config_resolved.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_subset3_backbone_only_dummy
2
+ output_dir: /workspace/outputs/rlbench_custom
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ dataset_root: /workspace/data/rlbench2
9
+ tasks:
10
+ - bimanual_lift_ball
11
+ - bimanual_push_box
12
+ - bimanual_dual_push_buttons
13
+ train_episodes:
14
+ - 0
15
+ val_episodes:
16
+ - 1
17
+ resolution: 224
18
+ chunk_horizon: 8
19
+ proprio_dim: 32
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 4
23
+ num_workers: 0
24
+ lr: 0.0005
25
+ weight_decay: 0.0001
26
+ trainer:
27
+ policy_type: backbone_only
28
+ use_bf16: true
29
+ grad_clip_norm: 1.0
30
+ freeze_backbone: true
31
+ gradient_checkpointing: false
32
+ plan_during_train: false
33
+ plan_during_eval: false
34
+ support_mode_conditioning: true
35
+ policy:
36
+ backbone:
37
+ model_name: openai/clip-vit-base-patch32
38
+ hidden_dim: 128
39
+ max_text_tokens: 32
40
+ freeze_backbone: true
41
+ gradient_checkpointing: false
42
+ use_dummy_backbone: true
43
+ fusion:
44
+ hidden_dim: 128
45
+ num_cameras: 3
46
+ num_layers: 2
47
+ num_heads: 4
48
+ ff_dim: 256
49
+ dropout: 0.1
50
+ proprio_dim: 32
51
+ proprio_tokens: 1
52
+ decoder:
53
+ hidden_dim: 128
54
+ num_heads: 4
55
+ num_layers: 2
56
+ ff_dim: 256
57
+ dropout: 0.1
58
+ chunk_size: 8
59
+ action_dim: 14
60
+ num_candidates: 8
61
+ reveal_head:
62
+ hidden_dim: 128
63
+ num_support_modes: 3
64
+ num_approach_templates: 32
65
+ rollout_horizon: 5
66
+ belief_map_size: 32
67
+ predict_belief_map: true
68
+ world_model:
69
+ hidden_dim: 128
70
+ action_dim: 14
71
+ num_support_modes: 3
72
+ num_approach_templates: 32
73
+ rollout_horizon: 5
74
+ planner:
75
+ num_candidates: 8
76
+ corridor_weight: 1.0
77
+ persistence_weight: 0.5
78
+ proposal_weight: 0.5
79
+ disturbance_weight: 0.75
80
+ reocclusion_weight: 0.5
81
+ visibility_weight: 0.25
82
+ loss_weights:
83
+ action: 1.0
84
+ support_mode: 0.1
85
+ corridor: 0.1
86
+ persistence: 0.05
87
+ disturbance: 0.05
88
+ world_model: 0.1
89
+ belief: 0.05
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/metrics.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.012133247866586316,
6
+ "total": 0.012133247866586316,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.008180527588070191,
11
+ "total": 0.008180527588070191,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.00792471425726641,
19
+ "total": 0.00792471425726641,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.005605970580716608,
24
+ "total": 0.005605970580716608,
25
+ "world_model": 0.0
26
+ }
27
+ }
28
+ ]
artifacts/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/summary.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "rlbench_subset3_backbone_only_dummy",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_dummy/checkpoint_best.pt",
5
+ "final_train_total": 0.00792471425726641,
6
+ "final_val_total": 0.005605970580716608,
7
+ "train_dataset": {
8
+ "dataset_root": "/workspace/data/rlbench2",
9
+ "tasks": [
10
+ "bimanual_lift_ball",
11
+ "bimanual_push_box",
12
+ "bimanual_dual_push_buttons"
13
+ ],
14
+ "episode_indices": [
15
+ 0
16
+ ],
17
+ "num_episodes": 3,
18
+ "num_samples": 381,
19
+ "resolution": 224,
20
+ "chunk_size": 8,
21
+ "proprio_dim": 32
22
+ },
23
+ "val_dataset": {
24
+ "dataset_root": "/workspace/data/rlbench2",
25
+ "tasks": [
26
+ "bimanual_lift_ball",
27
+ "bimanual_push_box",
28
+ "bimanual_dual_push_buttons"
29
+ ],
30
+ "episode_indices": [
31
+ 1
32
+ ],
33
+ "num_episodes": 3,
34
+ "num_samples": 374,
35
+ "resolution": 224,
36
+ "chunk_size": 8,
37
+ "proprio_dim": 32
38
+ },
39
+ "init_info": {
40
+ "path": "/workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt",
41
+ "missing_keys": [],
42
+ "unexpected_keys": []
43
+ }
44
+ }
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/config_resolved.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_subset3_reveal_state_clip
2
+ output_dir: /workspace/outputs/rlbench_custom
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ dataset_root: /workspace/data/rlbench2
9
+ tasks:
10
+ - bimanual_lift_ball
11
+ - bimanual_push_box
12
+ - bimanual_dual_push_buttons
13
+ train_episodes:
14
+ - 0
15
+ val_episodes:
16
+ - 1
17
+ resolution: 224
18
+ chunk_horizon: 8
19
+ proprio_dim: 32
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 2
23
+ num_workers: 0
24
+ lr: 0.0002
25
+ weight_decay: 0.0001
26
+ trainer:
27
+ policy_type: reveal_state
28
+ use_bf16: true
29
+ grad_clip_norm: 1.0
30
+ freeze_backbone: true
31
+ gradient_checkpointing: false
32
+ plan_during_train: false
33
+ plan_during_eval: false
34
+ support_mode_conditioning: true
35
+ policy:
36
+ backbone:
37
+ model_name: openai/clip-vit-base-patch32
38
+ hidden_dim: 512
39
+ max_text_tokens: 32
40
+ freeze_backbone: true
41
+ gradient_checkpointing: false
42
+ use_dummy_backbone: false
43
+ fusion:
44
+ hidden_dim: 512
45
+ num_cameras: 3
46
+ num_layers: 4
47
+ num_heads: 8
48
+ ff_dim: 2048
49
+ dropout: 0.1
50
+ proprio_dim: 32
51
+ proprio_tokens: 1
52
+ decoder:
53
+ hidden_dim: 512
54
+ num_heads: 8
55
+ num_layers: 4
56
+ ff_dim: 2048
57
+ dropout: 0.1
58
+ chunk_size: 8
59
+ action_dim: 14
60
+ num_candidates: 8
61
+ reveal_head:
62
+ hidden_dim: 512
63
+ num_support_modes: 3
64
+ num_approach_templates: 32
65
+ rollout_horizon: 5
66
+ belief_map_size: 32
67
+ predict_belief_map: true
68
+ world_model:
69
+ hidden_dim: 512
70
+ action_dim: 14
71
+ num_support_modes: 3
72
+ num_approach_templates: 32
73
+ rollout_horizon: 5
74
+ planner:
75
+ num_candidates: 8
76
+ corridor_weight: 1.0
77
+ persistence_weight: 0.65
78
+ proposal_weight: 0.35
79
+ disturbance_weight: 0.8
80
+ reocclusion_weight: 0.6
81
+ visibility_weight: 0.35
82
+ loss_weights:
83
+ action: 1.0
84
+ support_mode: 0.15
85
+ corridor: 0.2
86
+ persistence: 0.1
87
+ disturbance: 0.1
88
+ world_model: 0.2
89
+ belief: 0.05
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/metrics.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.012311161635931172,
6
+ "total": 0.012311161635931172,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.00556847607881269,
11
+ "total": 0.00556847607881269,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.0070935887447924045,
19
+ "total": 0.0070935887447924045,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.004233352240750238,
24
+ "total": 0.004233352240750238,
25
+ "world_model": 0.0
26
+ }
27
+ }
28
+ ]
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/summary.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "rlbench_subset3_reveal_state_clip",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
5
+ "final_train_total": 0.0070935887447924045,
6
+ "final_val_total": 0.004233352240750238,
7
+ "train_dataset": {
8
+ "dataset_root": "/workspace/data/rlbench2",
9
+ "tasks": [
10
+ "bimanual_lift_ball",
11
+ "bimanual_push_box",
12
+ "bimanual_dual_push_buttons"
13
+ ],
14
+ "episode_indices": [
15
+ 0
16
+ ],
17
+ "num_episodes": 3,
18
+ "num_samples": 381,
19
+ "resolution": 224,
20
+ "chunk_size": 8,
21
+ "proprio_dim": 32
22
+ },
23
+ "val_dataset": {
24
+ "dataset_root": "/workspace/data/rlbench2",
25
+ "tasks": [
26
+ "bimanual_lift_ball",
27
+ "bimanual_push_box",
28
+ "bimanual_dual_push_buttons"
29
+ ],
30
+ "episode_indices": [
31
+ 1
32
+ ],
33
+ "num_episodes": 3,
34
+ "num_samples": 374,
35
+ "resolution": 224,
36
+ "chunk_size": 8,
37
+ "proprio_dim": 32
38
+ },
39
+ "init_info": {
40
+ "path": "/workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt",
41
+ "missing_keys": [],
42
+ "unexpected_keys": []
43
+ }
44
+ }
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/config_resolved.yaml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: rlbench_subset3_reveal_state_dummy
2
+ output_dir: /workspace/outputs/rlbench_custom
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ dataset_root: /workspace/data/rlbench2
9
+ tasks:
10
+ - bimanual_lift_ball
11
+ - bimanual_push_box
12
+ - bimanual_dual_push_buttons
13
+ train_episodes:
14
+ - 0
15
+ val_episodes:
16
+ - 1
17
+ resolution: 224
18
+ chunk_horizon: 8
19
+ proprio_dim: 32
20
+ optim:
21
+ epochs: 2
22
+ batch_size: 4
23
+ num_workers: 0
24
+ lr: 0.0005
25
+ weight_decay: 0.0001
26
+ trainer:
27
+ policy_type: reveal_state
28
+ use_bf16: true
29
+ grad_clip_norm: 1.0
30
+ freeze_backbone: true
31
+ gradient_checkpointing: false
32
+ plan_during_train: false
33
+ plan_during_eval: false
34
+ support_mode_conditioning: true
35
+ policy:
36
+ backbone:
37
+ model_name: openai/clip-vit-base-patch32
38
+ hidden_dim: 128
39
+ max_text_tokens: 32
40
+ freeze_backbone: true
41
+ gradient_checkpointing: false
42
+ use_dummy_backbone: true
43
+ fusion:
44
+ hidden_dim: 128
45
+ num_cameras: 3
46
+ num_layers: 2
47
+ num_heads: 4
48
+ ff_dim: 256
49
+ dropout: 0.1
50
+ proprio_dim: 32
51
+ proprio_tokens: 1
52
+ decoder:
53
+ hidden_dim: 128
54
+ num_heads: 4
55
+ num_layers: 2
56
+ ff_dim: 256
57
+ dropout: 0.1
58
+ chunk_size: 8
59
+ action_dim: 14
60
+ num_candidates: 8
61
+ reveal_head:
62
+ hidden_dim: 128
63
+ num_support_modes: 3
64
+ num_approach_templates: 32
65
+ rollout_horizon: 5
66
+ belief_map_size: 32
67
+ predict_belief_map: true
68
+ world_model:
69
+ hidden_dim: 128
70
+ action_dim: 14
71
+ num_support_modes: 3
72
+ num_approach_templates: 32
73
+ rollout_horizon: 5
74
+ planner:
75
+ num_candidates: 8
76
+ corridor_weight: 1.0
77
+ persistence_weight: 0.65
78
+ proposal_weight: 0.35
79
+ disturbance_weight: 0.8
80
+ reocclusion_weight: 0.6
81
+ visibility_weight: 0.35
82
+ loss_weights:
83
+ action: 1.0
84
+ support_mode: 0.15
85
+ corridor: 0.2
86
+ persistence: 0.1
87
+ disturbance: 0.1
88
+ world_model: 0.2
89
+ belief: 0.05
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/metrics.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.015062082646181807,
6
+ "total": 0.015062082646181807,
7
+ "world_model": 0.0
8
+ },
9
+ "val": {
10
+ "action": 0.008003641142846738,
11
+ "total": 0.008003641142846738,
12
+ "world_model": 0.0
13
+ }
14
+ },
15
+ {
16
+ "epoch": 1,
17
+ "train": {
18
+ "action": 0.007828686845944807,
19
+ "total": 0.007828686845944807,
20
+ "world_model": 0.0
21
+ },
22
+ "val": {
23
+ "action": 0.0091639062995958,
24
+ "total": 0.0091639062995958,
25
+ "world_model": 0.0
26
+ }
27
+ }
28
+ ]
artifacts/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/summary.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "rlbench_subset3_reveal_state_dummy",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_dummy/checkpoint_best.pt",
5
+ "final_train_total": 0.007828686845944807,
6
+ "final_val_total": 0.0091639062995958,
7
+ "train_dataset": {
8
+ "dataset_root": "/workspace/data/rlbench2",
9
+ "tasks": [
10
+ "bimanual_lift_ball",
11
+ "bimanual_push_box",
12
+ "bimanual_dual_push_buttons"
13
+ ],
14
+ "episode_indices": [
15
+ 0
16
+ ],
17
+ "num_episodes": 3,
18
+ "num_samples": 381,
19
+ "resolution": 224,
20
+ "chunk_size": 8,
21
+ "proprio_dim": 32
22
+ },
23
+ "val_dataset": {
24
+ "dataset_root": "/workspace/data/rlbench2",
25
+ "tasks": [
26
+ "bimanual_lift_ball",
27
+ "bimanual_push_box",
28
+ "bimanual_dual_push_buttons"
29
+ ],
30
+ "episode_indices": [
31
+ 1
32
+ ],
33
+ "num_episodes": 3,
34
+ "num_samples": 374,
35
+ "resolution": 224,
36
+ "chunk_size": 8,
37
+ "proprio_dim": 32
38
+ },
39
+ "init_info": {
40
+ "path": "/workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt",
41
+ "missing_keys": [],
42
+ "unexpected_keys": []
43
+ }
44
+ }
artifacts/reports/reveal_eval/reveal_benchmark.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backbone": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 1.0,
5
+ "bag_proxy": 1.0,
6
+ "cloth_proxy": 1.0
7
+ },
8
+ "mean_success": 1.0,
9
+ "visibility_integral": 1.7894555413060718,
10
+ "corridor_availability": 0.7018518588609166,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 0.0,
13
+ "disturbance_cost": 0.1193024102701909
14
+ },
15
+ "reveal": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.9583333333333334,
18
+ "bag_proxy": 0.9166666666666666,
19
+ "cloth_proxy": 1.0
20
+ },
21
+ "mean_success": 0.9583333333333334,
22
+ "visibility_integral": 6.966822463605139,
23
+ "corridor_availability": 0.7799575842089124,
24
+ "reocclusion_rate": 0.005997474747474748,
25
+ "persistence_horizon_mae": 1.2541997782345518,
26
+ "disturbance_cost": 0.2107134228054848
27
+ }
28
+ }
artifacts/reports/reveal_eval/reveal_benchmark.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## backbone
4
+ - checkpoint: /workspace/outputs/reveal_runs/proxy_backbone_only/checkpoint_best.pt
5
+ - mean_success: 1.000
6
+ - visibility_integral: 1.789
7
+ - corridor_availability: 0.702
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 0.000
10
+ - disturbance_cost: 0.119
11
+ - foliage_proxy_success: 1.000
12
+ - bag_proxy_success: 1.000
13
+ - cloth_proxy_success: 1.000
14
+
15
+ ## reveal
16
+ - checkpoint: /workspace/outputs/reveal_runs/proxy_reveal_state/checkpoint_best.pt
17
+ - mean_success: 0.958
18
+ - visibility_integral: 6.967
19
+ - corridor_availability: 0.780
20
+ - reocclusion_rate: 0.006
21
+ - persistence_horizon_mae: 1.254
22
+ - disturbance_cost: 0.211
23
+ - foliage_proxy_success: 0.958
24
+ - bag_proxy_success: 0.917
25
+ - cloth_proxy_success: 1.000
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt",
3
+ "plan": false,
4
+ "support_mode_conditioning": true,
5
+ "episodes_per_task": 1,
6
+ "episode_length": 5,
7
+ "resolution": 224,
8
+ "tasks": {
9
+ "bimanual_lift_ball": {
10
+ "successes": [
11
+ 0.0
12
+ ],
13
+ "returns": [
14
+ 0.0
15
+ ],
16
+ "mean_success": 0.0,
17
+ "mean_return": 0.0
18
+ },
19
+ "bimanual_push_box": {
20
+ "successes": [
21
+ 0.0
22
+ ],
23
+ "returns": [
24
+ 0.0
25
+ ],
26
+ "mean_success": 0.0,
27
+ "mean_return": 0.0
28
+ },
29
+ "bimanual_dual_push_buttons": {
30
+ "successes": [
31
+ 0.0
32
+ ],
33
+ "returns": [
34
+ 0.0
35
+ ],
36
+ "mean_success": 0.0,
37
+ "mean_return": 0.0
38
+ }
39
+ },
40
+ "mean_success": 0.0
41
+ }
artifacts/reports/rlbench_custom_clip/backbone_only_rollout/rollout_eval.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RLBench Rollout Eval
2
+
3
+ - Checkpoint: `/workspace/outputs/rlbench_custom/rlbench_subset3_backbone_only_clip/checkpoint_best.pt`
4
+ - Plan enabled: `False`
5
+ - Support-mode conditioning: `True`
6
+ - Mean success: `0.000`
7
+
8
+ ## Per-task
9
+
10
+ - `bimanual_lift_ball`: mean_success=0.000, returns=[0.0]
11
+ - `bimanual_push_box`: mean_success=0.000, returns=[0.0]
12
+ - `bimanual_dual_push_buttons`: mean_success=0.000, returns=[0.0]
artifacts/reports/rlbench_custom_clip/reveal_state_rollout_noplan/rollout_eval.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/outputs/rlbench_custom/rlbench_subset3_reveal_state_clip/checkpoint_best.pt",
3
+ "plan": false,
4
+ "support_mode_conditioning": true,
5
+ "episodes_per_task": 1,
6
+ "episode_length": 5,
7
+ "resolution": 224,
8
+ "tasks": {
9
+ "bimanual_lift_ball": {
10
+ "successes": [
11
+ 0.0
12
+ ],
13
+ "returns": [
14
+ 0.0
15
+ ],
16
+ "mean_success": 0.0,
17
+ "mean_return": 0.0
18
+ },
19
+ "bimanual_push_box": {
20
+ "successes": [
21
+ 0.0
22
+ ],
23
+ "returns": [
24
+ 0.0
25
+ ],
26
+ "mean_success": 0.0,
27
+ "mean_return": 0.0
28
+ },
29
+ "bimanual_dual_push_buttons": {
30
+ "successes": [
31
+ 0.0
32
+ ],
33
+ "returns": [
34
+ 0.0
35
+ ],
36
+ "mean_success": 0.0,
37
+ "mean_return": 0.0
38
+ }
39
+ },
40
+ "mean_success": 0.0
41
+ }
code/reveal_vla_bimanual/.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .DS_Store
4
+ .mypy_cache/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ .venv/
8
+ artifacts/
9
+ outputs/
10
+ logs/
11
+ wandb/
12
+ reports/
code/reveal_vla_bimanual/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reveal_vla_bimanual
2
+
3
+ Simulation-first prototype for bimanual reveal-and-retrieve under elastic occlusion.
4
+
5
+ This repo is structured around five top-level modules:
6
+
7
+ - `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
8
+ - `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
9
+ - `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
10
+ - `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
11
+ - `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
12
+
13
+ Current bootstrap priorities:
14
+
15
+ 1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
16
+ 2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
17
+ 3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
18
+
19
+ Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
20
+
21
+ ## RLBench env A
22
+
23
+ The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
24
+
25
+ Bring it up with:
26
+
27
+ ```bash
28
+ /workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
29
+ /workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
30
+ /workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
31
+ ```
32
+
33
+ Verify GPU GL on the headless display:
34
+
35
+ ```bash
36
+ DISPLAY=:99 glxinfo -B
37
+ ```
38
+
39
+ Run the RLBench launch/reset/step smoke test:
40
+
41
+ ```bash
42
+ env \
43
+ DISPLAY=:99 \
44
+ XDG_RUNTIME_DIR=/tmp/runtime-root \
45
+ COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
46
+ LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
47
+ QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
48
+ /workspace/.tools/micromamba/bin/micromamba run \
49
+ -r /workspace/.micromamba \
50
+ -p /workspace/envs/rlbench \
51
+ python -m sim_rlbench.launch_smoke --headless
52
+ ```
53
+
54
+ The working benchmark interface is fixed to three cameras only:
55
+
56
+ - `front`
57
+ - `wrist_left`
58
+ - `wrist_right`
59
+
60
+ The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
61
+
62
+ Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
63
+
64
+ ```bash
65
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
66
+ python -m sim_rlbench.smoke_test --print-train-command
67
+ ```
68
+
69
+ Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
70
+
71
+ ```bash
72
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
73
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train
74
+ ```
75
+
76
+ If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
77
+
78
+ ```bash
79
+ apt-get install -y squashfs-tools
80
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
81
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
82
+ ```
code/reveal_vla_bimanual/docs/upstream_pins.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Upstream Pins
2
+
3
+ Pinned on 2026-03-22 in `/workspace/third_party`.
4
+
5
+ Mandatory benchmark stack:
6
+
7
+ - `peract_bimanual`: `bb0232a6ba3fe116566e9568f0c7af980ed6703d`
8
+ - `RLBench`: `8af748c51287989294e00c9c670e3330a0e35ed5`
9
+ - `PyRep`: `b8bd1d7a3182adcd570d001649c0849047ebf197`
10
+ - `YARR`: `6822ff78602c77878b27d4cfe759ce029c67bffb`
11
+
12
+ Optional published baseline:
13
+
14
+ - `AnyBimanual`: `76024e48b0e9489101459e85bc909c126ec581b4`
15
+
16
+ Reveal-proxy stack candidate:
17
+
18
+ - `IsaacLab`: `v2.3.1` was cloned for inspection, but it targets Python 3.11 and Isaac Sim 5.x.
19
+ - For the frozen project scope of Python 3.10 on Ubuntu 22.04, env B should stay on an Isaac Sim 4.5-compatible Isaac Lab release instead of the latest branch.
20
+
21
+ Notes:
22
+
23
+ - `peract_bimanual` defaults to 6 cameras and older Python/Torch pins. This repo overrides camera selection and environment creation rather than running the upstream install scripts unchanged.
24
+ - RLBench headless execution on this RunPod host will require an X server setup because the base image does not currently ship `X`, `xvfb`, or `nvidia-xconfig`.
code/reveal_vla_bimanual/docs/xorg.rtx6000.conf ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Section "ServerLayout"
2
+ Identifier "Layout0"
3
+ Screen 0 "Screen0"
4
+ EndSection
5
+
6
+ Section "Monitor"
7
+ Identifier "Monitor0"
8
+ VendorName "Unknown"
9
+ ModelName "Unknown"
10
+ Option "DPMS"
11
+ EndSection
12
+
13
+ Section "Device"
14
+ Identifier "Device0"
15
+ Driver "nvidia"
16
+ VendorName "NVIDIA Corporation"
17
+ BusID "PCI:65:0:0"
18
+ Option "AllowEmptyInitialConfiguration" "True"
19
+ Option "UseDisplayDevice" "None"
20
+ Option "ProbeAllGpus" "False"
21
+ EndSection
22
+
23
+ Section "Screen"
24
+ Identifier "Screen0"
25
+ Device "Device0"
26
+ Monitor "Monitor0"
27
+ DefaultDepth 24
28
+ Option "AllowEmptyInitialConfiguration" "True"
29
+ SubSection "Display"
30
+ Depth 24
31
+ Virtual 1280 1024
32
+ EndSubSection
33
+ EndSection
code/reveal_vla_bimanual/envs/reveal310.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: reveal310
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - python=3.10
8
+ - pip
9
+ - git
10
+ - cmake
11
+ - ninja
12
+ - make
13
+ - gxx_linux-64
14
+ - pkg-config
15
+ - numpy=1.26.*
16
+ - pandas=2.2.*
17
+ - scipy=1.13.*
18
+ - matplotlib=3.8.*
19
+ - pyyaml=6.*
20
+ - imageio
21
+ - trimesh
22
+ - networkx
23
+ - psutil
24
+ - tqdm
25
+ - pytorch=2.3.1
26
+ - torchvision=0.18.1
27
+ - torchaudio=2.3.1
28
+ - pytorch-cuda=12.1
29
+ - pip:
30
+ - accelerate==0.31.0
31
+ - einops==0.8.0
32
+ - hydra-core==1.3.2
33
+ - omegaconf==2.3.0
34
+ - safetensors==0.4.3
35
+ - tensorboard==2.16.2
36
+ - timm==1.0.7
37
+ - transformers==4.41.2
38
+ - wandb==0.18.0
code/reveal_vla_bimanual/envs/rlbench310.yaml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: rlbench310
2
+ channels:
3
+ - pytorch
4
+ - nvidia
5
+ - conda-forge
6
+ dependencies:
7
+ - python=3.10
8
+ - pip
9
+ - git
10
+ - cmake
11
+ - cffi
12
+ - ninja
13
+ - make
14
+ - gxx_linux-64
15
+ - pkg-config
16
+ - numpy=1.26.*
17
+ - pandas=2.2.*
18
+ - scipy=1.13.*
19
+ - matplotlib=3.8.*
20
+ - pyyaml=6.*
21
+ - h5py
22
+ - imageio
23
+ - pillow
24
+ - psutil
25
+ - tqdm
26
+ - trimesh
27
+ - pytorch=2.3.1
28
+ - torchvision=0.18.1
29
+ - torchaudio=2.3.1
30
+ - pytorch-cuda=12.1
31
+ - pip:
32
+ - accelerate==0.31.0
33
+ - absl-py==2.1.0
34
+ - clip @ git+https://github.com/openai/CLIP.git
35
+ - einops==0.8.0
36
+ - ftfy==6.2.0
37
+ - gym==0.26.2
38
+ - hydra-core==1.3.2
39
+ - natsort==8.4.0
40
+ - omegaconf==2.3.0
41
+ - perceiver-pytorch==0.8.8
42
+ - pyrender==0.1.45
43
+ - pytorch-lamb==1.0.0
44
+ - regex==2024.5.15
45
+ - rich==13.9.4
46
+ - rich-click==1.8.9
47
+ - safetensors==0.4.3
48
+ - tensorboard==2.16.2
49
+ - transformers==4.41.2
50
+ - wandb==0.18.0
code/reveal_vla_bimanual/eval/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from eval.metrics import BenchmarkMetrics
2
+
3
+ __all__ = ["BenchmarkMetrics"]
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (215 Bytes). View file
 
code/reveal_vla_bimanual/eval/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (249 Bytes). View file
 
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-310.pyc ADDED
Binary file (344 Bytes). View file
 
code/reveal_vla_bimanual/eval/__pycache__/ablations.cpython-311.pyc ADDED
Binary file (408 Bytes). View file
 
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-310.pyc ADDED
Binary file (2.03 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/metrics.cpython-311.pyc ADDED
Binary file (3.58 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-310.pyc ADDED
Binary file (1.71 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/report.cpython-311.pyc ADDED
Binary file (3.29 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-310.pyc ADDED
Binary file (2.12 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/run_ablations.cpython-311.pyc ADDED
Binary file (3.77 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-310.pyc ADDED
Binary file (7.33 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/run_reveal_benchmark.cpython-311.pyc ADDED
Binary file (14.2 kB). View file
 
code/reveal_vla_bimanual/eval/__pycache__/run_rlbench_rollout_eval.cpython-310.pyc ADDED
Binary file (5.96 kB). View file
 
code/reveal_vla_bimanual/eval/ablations.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ MANDATORY_ABLATIONS: tuple[str, ...] = (
2
+ "no_reveal_state_head",
3
+ "no_world_model",
4
+ "no_planner_reranking",
5
+ "no_support_mode_conditioning",
6
+ "no_wrist_cameras",
7
+ "no_global_camera",
8
+ )
code/reveal_vla_bimanual/eval/metrics.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import numpy as np
6
+
7
+
8
+ @dataclass
9
+ class BenchmarkMetrics:
10
+ per_task_success: dict[str, float]
11
+ mean_success: float
12
+ visibility_integral: float | None = None
13
+ corridor_availability: float | None = None
14
+ reocclusion_rate: float | None = None
15
+ persistence_horizon_mae: float | None = None
16
+ disturbance_cost: float | None = None
17
+
18
+
19
+ def mean_success(per_task_success: dict[str, float]) -> float:
20
+ if not per_task_success:
21
+ return 0.0
22
+ return float(np.mean(list(per_task_success.values())))
23
+
24
+
25
+ def visibility_integral(curve: np.ndarray) -> float:
26
+ curve = np.asarray(curve, dtype=np.float32)
27
+ return float(curve.sum())
28
+
29
+
30
+ def corridor_availability(corridor_open: np.ndarray) -> float:
31
+ corridor_open = np.asarray(corridor_open, dtype=np.float32)
32
+ return float(corridor_open.mean())
33
+
34
+
35
+ def reocclusion_rate(corridor_open: np.ndarray) -> float:
36
+ corridor_open = np.asarray(corridor_open, dtype=np.float32)
37
+ if corridor_open.size < 2:
38
+ return 0.0
39
+ return float(np.logical_and(corridor_open[:-1] > 0.5, corridor_open[1:] <= 0.5).mean())
40
+
41
+
42
+ def persistence_horizon_mae(prediction: np.ndarray, target: np.ndarray) -> float:
43
+ prediction = np.asarray(prediction, dtype=np.float32)
44
+ target = np.asarray(target, dtype=np.float32)
45
+ return float(np.abs(prediction - target).mean())
46
+
47
+
48
+ def mean_disturbance_cost(values: np.ndarray) -> float:
49
+ values = np.asarray(values, dtype=np.float32)
50
+ if values.size == 0:
51
+ return 0.0
52
+ return float(values.mean())
code/reveal_vla_bimanual/eval/report.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from eval.metrics import BenchmarkMetrics
6
+
7
+
8
+ def write_markdown_report(
9
+ output_path: Path,
10
+ title: str,
11
+ metrics: BenchmarkMetrics,
12
+ hardware: str,
13
+ training_settings: dict[str, str],
14
+ published_reference: dict[str, float] | None = None,
15
+ ) -> None:
16
+ lines = [f"# {title}", "", f"- Hardware: {hardware}"]
17
+ for key, value in training_settings.items():
18
+ lines.append(f"- {key}: {value}")
19
+
20
+ lines.extend(["", "## Success"])
21
+ for task, score in metrics.per_task_success.items():
22
+ lines.append(f"- {task}: {score:.3f}")
23
+ lines.append(f"- mean_success: {metrics.mean_success:.3f}")
24
+
25
+ if published_reference:
26
+ lines.extend(["", "## Published Reference"])
27
+ for task, score in published_reference.items():
28
+ lines.append(f"- {task}: {score:.3f}")
29
+
30
+ output_path.parent.mkdir(parents=True, exist_ok=True)
31
+ output_path.write_text("\n".join(lines), encoding="utf-8")
32
+
33
+
34
+ def write_comparison_report(
35
+ output_path: Path,
36
+ title: str,
37
+ sections: dict[str, dict[str, float | str]],
38
+ ) -> None:
39
+ lines = [f"# {title}", ""]
40
+ for section_name, values in sections.items():
41
+ lines.append(f"## {section_name}")
42
+ for key, value in values.items():
43
+ if isinstance(value, float):
44
+ lines.append(f"- {key}: {value:.3f}")
45
+ else:
46
+ lines.append(f"- {key}: {value}")
47
+ lines.append("")
48
+
49
+ output_path.parent.mkdir(parents=True, exist_ok=True)
50
+ output_path.write_text("\n".join(lines).rstrip() + "\n", encoding="utf-8")
code/reveal_vla_bimanual/eval/run_ablations.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from pathlib import Path
6
+
7
+ from eval.ablations import MANDATORY_ABLATIONS
8
+ from eval.report import write_comparison_report
9
+ from eval.run_reveal_benchmark import evaluate_model, load_model
10
+ from sim_reveal import available_proxy_names
11
+
12
+ import torch
13
+
14
+
15
+ def main() -> None:
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--checkpoint", required=True)
18
+ parser.add_argument("--episodes", type=int, default=24)
19
+ parser.add_argument("--resolution", type=int, default=None)
20
+ parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
21
+ parser.add_argument("--proxies", nargs="*", default=None)
22
+ args = parser.parse_args()
23
+
24
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
25
+ model, checkpoint = load_model(args.checkpoint, device=device)
26
+ resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
27
+ proxies = list(args.proxies or available_proxy_names())
28
+ output_root = Path(args.output_root)
29
+ output_root.mkdir(parents=True, exist_ok=True)
30
+
31
+ sections = {}
32
+ raw = {}
33
+ for ablation in (None, *MANDATORY_ABLATIONS):
34
+ label = "full_model" if ablation is None else ablation
35
+ metrics = evaluate_model(
36
+ model=model,
37
+ device=device,
38
+ proxies=proxies,
39
+ episodes=args.episodes,
40
+ resolution=resolution,
41
+ ablation=ablation,
42
+ )
43
+ raw[label] = {
44
+ "per_task_success": metrics.per_task_success,
45
+ "mean_success": metrics.mean_success,
46
+ "visibility_integral": metrics.visibility_integral,
47
+ "corridor_availability": metrics.corridor_availability,
48
+ "reocclusion_rate": metrics.reocclusion_rate,
49
+ "persistence_horizon_mae": metrics.persistence_horizon_mae,
50
+ "disturbance_cost": metrics.disturbance_cost,
51
+ }
52
+ sections[label] = {
53
+ "mean_success": metrics.mean_success,
54
+ "visibility_integral": metrics.visibility_integral or 0.0,
55
+ "corridor_availability": metrics.corridor_availability or 0.0,
56
+ "reocclusion_rate": metrics.reocclusion_rate or 0.0,
57
+ "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
58
+ "disturbance_cost": metrics.disturbance_cost or 0.0,
59
+ }
60
+
61
+ json_path = output_root / "ablations.json"
62
+ json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
63
+ write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
64
+ print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
65
+
66
+
67
+ if __name__ == "__main__":
68
+ main()
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ from dataclasses import asdict
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import torch
11
+ from torch import Tensor
12
+
13
+ from eval.metrics import (
14
+ BenchmarkMetrics,
15
+ corridor_availability,
16
+ mean_disturbance_cost,
17
+ mean_success,
18
+ persistence_horizon_mae,
19
+ reocclusion_rate,
20
+ visibility_integral,
21
+ )
22
+ from eval.report import write_comparison_report
23
+ from models.action_decoder import ChunkDecoderConfig
24
+ from models.backbones import FrozenVLBackboneConfig
25
+ from models.multiview_fusion import MultiViewFusionConfig
26
+ from models.planner import PlannerConfig
27
+ from models.policy import PolicyConfig
28
+ from models.reveal_head import RevealHeadConfig
29
+ from models.world_model import RevealWMConfig
30
+ from sim_reveal import available_proxy_names, make_proxy_env
31
+ from train.trainer import TrainerConfig, build_policy
32
+
33
+
34
+ def _policy_config_from_dict(cfg: dict[str, Any]) -> PolicyConfig:
35
+ return PolicyConfig(
36
+ backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
37
+ fusion=MultiViewFusionConfig(**cfg["fusion"]),
38
+ decoder=ChunkDecoderConfig(**cfg["decoder"]),
39
+ reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
40
+ world_model=RevealWMConfig(**cfg["world_model"]),
41
+ planner=PlannerConfig(**cfg["planner"]),
42
+ )
43
+
44
+
45
+ def _trainer_config_from_dict(cfg: dict[str, Any]) -> TrainerConfig:
46
+ return TrainerConfig(**cfg)
47
+
48
+
49
+ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch.nn.Module, dict[str, Any]]:
50
+ checkpoint = torch.load(Path(checkpoint_path), map_location="cpu")
51
+ policy_config = _policy_config_from_dict(checkpoint["policy_config"])
52
+ trainer_config = _trainer_config_from_dict(checkpoint["trainer_config"])
53
+ model = build_policy(policy_config, trainer_config).to(device)
54
+ model.load_state_dict(checkpoint["state_dict"])
55
+ model.eval()
56
+ return model, checkpoint
57
+
58
+
59
+ def _prepare_batch(observation: dict[str, Any], device: torch.device) -> dict[str, Any]:
60
+ images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
61
+ proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
62
+ return {
63
+ "images": images.to(device),
64
+ "proprio": proprio.to(device),
65
+ "texts": [observation["text"]],
66
+ }
67
+
68
+
69
+ def _apply_camera_ablation(images: Tensor, ablation: str | None) -> Tensor:
70
+ images = images.clone()
71
+ if ablation == "no_wrist_cameras":
72
+ images[:, 1:] = 0.0
73
+ if ablation == "no_global_camera":
74
+ images[:, 0] = 0.0
75
+ return images
76
+
77
+
78
+ def select_chunk(
79
+ model: torch.nn.Module,
80
+ batch: dict[str, Any],
81
+ ablation: str | None = None,
82
+ ) -> tuple[Tensor, dict[str, Tensor]]:
83
+ images = _apply_camera_ablation(batch["images"], ablation)
84
+ forward_kwargs = {
85
+ "images": images,
86
+ "proprio": batch["proprio"],
87
+ "texts": batch["texts"],
88
+ }
89
+ if hasattr(model, "reveal_head"):
90
+ if ablation == "no_world_model":
91
+ outputs = model(**forward_kwargs, plan=False)
92
+ return outputs["action_mean"], outputs
93
+ outputs = model(
94
+ **forward_kwargs,
95
+ plan=True,
96
+ support_mode_conditioning=(ablation != "no_support_mode_conditioning"),
97
+ )
98
+ if ablation == "no_planner_reranking":
99
+ return outputs["candidate_chunks"][:, 0], outputs
100
+ if "planned_chunk" in outputs and ablation != "no_reveal_state_head":
101
+ return outputs["planned_chunk"], outputs
102
+ return outputs["action_mean"], outputs
103
+ outputs = model(**forward_kwargs)
104
+ return outputs["action_mean"], outputs
105
+
106
+
107
+ def evaluate_model(
108
+ model: torch.nn.Module,
109
+ device: torch.device,
110
+ proxies: list[str],
111
+ episodes: int,
112
+ resolution: int,
113
+ ablation: str | None = None,
114
+ ) -> BenchmarkMetrics:
115
+ per_task_success: dict[str, float] = {}
116
+ visibility_scores = []
117
+ corridor_scores = []
118
+ reocclusion_scores = []
119
+ persistence_errors = []
120
+ disturbance_scores = []
121
+
122
+ for proxy_offset, proxy_name in enumerate(proxies):
123
+ successes = []
124
+ for episode_idx in range(episodes):
125
+ env = make_proxy_env(
126
+ proxy_name=proxy_name,
127
+ resolution=resolution,
128
+ seed=proxy_offset * 10_000 + episode_idx,
129
+ )
130
+ observation, privileged_state = env.reset(seed=proxy_offset * 10_000 + episode_idx)
131
+ episode_visibility = [float(privileged_state["visibility"])]
132
+ episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
133
+ episode_disturbance = [float(privileged_state["disturbance_cost"])]
134
+ done = False
135
+ while not done:
136
+ batch = _prepare_batch(observation, device=device)
137
+ with torch.no_grad():
138
+ chunk, outputs = select_chunk(model, batch, ablation=ablation)
139
+ action = chunk[0, 0].detach().cpu().numpy()
140
+ observation, _, terminated, truncated, privileged_state = env.step(action)
141
+ episode_visibility.append(float(privileged_state["visibility"]))
142
+ episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
143
+ episode_disturbance.append(float(privileged_state["disturbance_cost"]))
144
+ if "reveal_state" in outputs and ablation != "no_reveal_state_head":
145
+ persistence_errors.append(
146
+ persistence_horizon_mae(
147
+ outputs["reveal_state"]["persistence_horizon"][0].detach().cpu().numpy(),
148
+ privileged_state["persistence_horizon"],
149
+ )
150
+ )
151
+ done = bool(terminated or truncated)
152
+ successes.append(float(privileged_state["retrieval_success"]))
153
+ visibility_scores.append(visibility_integral(np.asarray(episode_visibility)))
154
+ corridor_scores.append(corridor_availability(np.asarray(episode_corridor)))
155
+ reocclusion_scores.append(reocclusion_rate(np.asarray(episode_corridor)))
156
+ disturbance_scores.append(mean_disturbance_cost(np.asarray(episode_disturbance)))
157
+ per_task_success[proxy_name] = float(np.mean(successes))
158
+
159
+ return BenchmarkMetrics(
160
+ per_task_success=per_task_success,
161
+ mean_success=mean_success(per_task_success),
162
+ visibility_integral=float(np.mean(visibility_scores)) if visibility_scores else None,
163
+ corridor_availability=float(np.mean(corridor_scores)) if corridor_scores else None,
164
+ reocclusion_rate=float(np.mean(reocclusion_scores)) if reocclusion_scores else None,
165
+ persistence_horizon_mae=float(np.mean(persistence_errors)) if persistence_errors else None,
166
+ disturbance_cost=float(np.mean(disturbance_scores)) if disturbance_scores else None,
167
+ )
168
+
169
+
170
+ def _metrics_to_dict(metrics: BenchmarkMetrics) -> dict[str, float | dict[str, float]]:
171
+ return {
172
+ "per_task_success": metrics.per_task_success,
173
+ "mean_success": metrics.mean_success,
174
+ "visibility_integral": metrics.visibility_integral or 0.0,
175
+ "corridor_availability": metrics.corridor_availability or 0.0,
176
+ "reocclusion_rate": metrics.reocclusion_rate or 0.0,
177
+ "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
178
+ "disturbance_cost": metrics.disturbance_cost or 0.0,
179
+ }
180
+
181
+
182
+ def main() -> None:
183
+ parser = argparse.ArgumentParser()
184
+ parser.add_argument("--model", action="append", required=True, help="label=/abs/path/checkpoint.pt")
185
+ parser.add_argument("--episodes", type=int, default=24)
186
+ parser.add_argument("--resolution", type=int, default=None)
187
+ parser.add_argument("--ablation", default=None)
188
+ parser.add_argument("--output-root", default="/workspace/reports/reveal_eval")
189
+ parser.add_argument("--proxies", nargs="*", default=None)
190
+ args = parser.parse_args()
191
+
192
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
193
+ proxies = list(args.proxies or available_proxy_names())
194
+ output_root = Path(args.output_root)
195
+ output_root.mkdir(parents=True, exist_ok=True)
196
+
197
+ sections: dict[str, dict[str, float | str]] = {}
198
+ raw_metrics: dict[str, dict[str, float | dict[str, float]]] = {}
199
+ for item in args.model:
200
+ label, checkpoint_path = item.split("=", maxsplit=1)
201
+ model, checkpoint = load_model(checkpoint_path, device=device)
202
+ resolution = int(args.resolution or checkpoint.get("data_resolution", 96))
203
+ metrics = evaluate_model(
204
+ model=model,
205
+ device=device,
206
+ proxies=proxies,
207
+ episodes=args.episodes,
208
+ resolution=resolution,
209
+ ablation=args.ablation,
210
+ )
211
+ raw_metrics[label] = _metrics_to_dict(metrics)
212
+ sections[label] = {
213
+ "checkpoint": checkpoint_path,
214
+ "mean_success": metrics.mean_success,
215
+ "visibility_integral": metrics.visibility_integral or 0.0,
216
+ "corridor_availability": metrics.corridor_availability or 0.0,
217
+ "reocclusion_rate": metrics.reocclusion_rate or 0.0,
218
+ "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
219
+ "disturbance_cost": metrics.disturbance_cost or 0.0,
220
+ }
221
+ for task_name, score in metrics.per_task_success.items():
222
+ sections[label][f"{task_name}_success"] = score
223
+
224
+ json_path = output_root / "reveal_benchmark.json"
225
+ json_path.write_text(json.dumps(raw_metrics, indent=2), encoding="utf-8")
226
+ write_comparison_report(output_root / "reveal_benchmark.md", "Reveal Proxy Benchmark", sections)
227
+ print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
228
+
229
+
230
+ if __name__ == "__main__":
231
+ main()