lsnu commited on
Commit
c6cdf9b
·
verified ·
1 Parent(s): 10471c5

Add files using upload-large-folder tool

Browse files
Files changed (43) hide show
  1. artifacts/logs/system/rlbench_launch_smoke.txt +84 -0
  2. artifacts/logs/system/x99.conf +33 -0
  3. artifacts/logs/system/x99.log +16 -0
  4. artifacts/logs/system/x99.pid +1 -0
  5. artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml +15 -8
  6. artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json +80 -32
  7. artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml +15 -8
  8. artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json +40 -16
  9. artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml +18 -9
  10. artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json +208 -128
  11. artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml +18 -9
  12. artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json +93 -0
  13. artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md +57 -0
  14. artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json +93 -0
  15. artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md +57 -0
  16. artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json +156 -0
  17. artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json +93 -0
  18. artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md +57 -0
  19. artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json +93 -0
  20. artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md +57 -0
  21. artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml +98 -0
  22. artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json +40 -0
  23. artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml +98 -0
  24. artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json +68 -0
  25. artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json +93 -0
  26. artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md +57 -0
  27. artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json +28 -0
  28. artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md +25 -0
  29. code/reveal_vla_bimanual/eval/run_ablations.py +31 -2
  30. code/reveal_vla_bimanual/eval/run_reveal_benchmark.py +42 -2
  31. code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py +59 -5
  32. code/reveal_vla_bimanual/models/action_decoder.py +110 -19
  33. code/reveal_vla_bimanual/models/observation_memory.py +56 -0
  34. code/reveal_vla_bimanual/models/planner.py +60 -32
  35. code/reveal_vla_bimanual/models/policy.py +89 -10
  36. code/reveal_vla_bimanual/models/reveal_head.py +88 -25
  37. code/reveal_vla_bimanual/models/world_model.py +10 -0
  38. code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO +67 -2
  39. code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt +15 -0
  40. code/reveal_vla_bimanual/scripts/start_rlbench_x.sh +42 -1
  41. code/reveal_vla_bimanual/sim_reveal/dataset.py +102 -3
  42. code/reveal_vla_bimanual/sim_reveal/generate_dataset.py +4 -0
  43. code/reveal_vla_bimanual/sim_reveal/procedural_envs.py +189 -59
artifacts/logs/system/rlbench_launch_smoke.txt ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ qt.qpa.xcb: QXcbConnection: XCB error: 148 (Unknown), sequence: 181, resource id: 0, major code: 140 (Unknown), minor code: 20
2
+ WARNING:root:not sure how _robot_shapes are used is used.
3
+ {
4
+ "display": ":99",
5
+ "headless": true,
6
+ "task": "bimanual_lift_ball",
7
+ "description": "Lift the ball",
8
+ "rgb_shapes": {
9
+ "front": [
10
+ 224,
11
+ 224,
12
+ 3
13
+ ],
14
+ "wrist_left": [
15
+ 224,
16
+ 224,
17
+ 3
18
+ ],
19
+ "wrist_right": [
20
+ 224,
21
+ 224,
22
+ 3
23
+ ]
24
+ },
25
+ "intrinsic_shapes": {
26
+ "front": [
27
+ 3,
28
+ 3
29
+ ],
30
+ "wrist_left": [
31
+ 3,
32
+ 3
33
+ ],
34
+ "wrist_right": [
35
+ 3,
36
+ 3
37
+ ]
38
+ },
39
+ "extrinsic_shapes": {
40
+ "front": [
41
+ 4,
42
+ 4
43
+ ],
44
+ "wrist_left": [
45
+ 4,
46
+ 4
47
+ ],
48
+ "wrist_right": [
49
+ 4,
50
+ 4
51
+ ]
52
+ },
53
+ "point_cloud_shapes": {
54
+ "front": [
55
+ 224,
56
+ 224,
57
+ 3
58
+ ],
59
+ "wrist_left": [
60
+ 224,
61
+ 224,
62
+ 3
63
+ ],
64
+ "wrist_right": [
65
+ 224,
66
+ 224,
67
+ 3
68
+ ]
69
+ },
70
+ "proprio_shape": [
71
+ 6
72
+ ],
73
+ "action_shape": [
74
+ 16
75
+ ],
76
+ "reward": 0.0,
77
+ "done": false,
78
+ "front_rgb_shape_after_step": [
79
+ 224,
80
+ 224,
81
+ 3
82
+ ]
83
+ }
84
+ [CoppeliaSim:loadinfo] done.
artifacts/logs/system/x99.conf ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Section "ServerLayout"
2
+ Identifier "Layout0"
3
+ Screen 0 "Screen0"
4
+ EndSection
5
+
6
+ Section "Monitor"
7
+ Identifier "Monitor0"
8
+ VendorName "Unknown"
9
+ ModelName "Unknown"
10
+ Option "DPMS"
11
+ EndSection
12
+
13
+ Section "Device"
14
+ Identifier "Device0"
15
+ Driver "nvidia"
16
+ VendorName "NVIDIA Corporation"
17
+ BusID "PCI:1:0:0"
18
+ Option "AllowEmptyInitialConfiguration" "True"
19
+ Option "UseDisplayDevice" "None"
20
+ Option "ProbeAllGpus" "False"
21
+ EndSection
22
+
23
+ Section "Screen"
24
+ Identifier "Screen0"
25
+ Device "Device0"
26
+ Monitor "Monitor0"
27
+ DefaultDepth 24
28
+ Option "AllowEmptyInitialConfiguration" "True"
29
+ SubSection "Display"
30
+ Depth 24
31
+ Virtual 1280 1024
32
+ EndSubSection
33
+ EndSection
artifacts/logs/system/x99.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ X.Org X Server 1.21.1.4
3
+ X Protocol Version 11, Revision 0
4
+ Current Operating System: Linux c36959bce5da 6.8.0-52-generic #53~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Jan 15 19:18:46 UTC 2 x86_64
5
+ Kernel command line: BOOT_IMAGE=/boot/vmlinuz-6.8.0-52-generic root=UUID=81695786-7953-442a-89df-662fe6d16003 ro systemd.unified_cgroup_hierarchy=false
6
+ xorg-server 2:21.1.4-2ubuntu1.7~22.04.16 (For technical support please see http://www.ubuntu.com/support)
7
+ Current version of pixman: 0.40.0
8
+ Before reporting problems, check http://wiki.x.org
9
+ to make sure that you have the latest version.
10
+ Markers: (--) probed, (**) from config file, (==) default setting,
11
+ (++) from command line, (!!) notice, (II) informational,
12
+ (WW) warning, (EE) error, (NI) not implemented, (??) unknown.
13
+ (==) Log file: "/var/log/Xorg.99.log", Time: Mon Mar 23 15:30:14 2026
14
+ (++) Using config file: "/workspace/logs/x99.conf"
15
+ (==) Using config directory: "/etc/X11/xorg.conf.d"
16
+ (==) Using system config directory "/usr/share/X11/xorg.conf.d"
artifacts/logs/system/x99.pid ADDED
@@ -0,0 +1 @@
 
 
1
+ 9762
artifacts/outputs/reveal_runs/proxy_backbone_only/config_resolved.yaml CHANGED
@@ -10,11 +10,13 @@ data:
10
  resolution: 96
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
- train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
14
- val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
15
  rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
 
 
18
  seed: 7
19
  optim:
20
  epochs: 8
@@ -45,6 +47,11 @@ policy:
45
  dropout: 0.1
46
  proprio_dim: 32
47
  proprio_tokens: 1
 
 
 
 
 
48
  decoder:
49
  hidden_dim: 128
50
  num_heads: 4
@@ -68,13 +75,10 @@ policy:
68
  num_approach_templates: 32
69
  rollout_horizon: 5
70
  planner:
 
71
  num_candidates: 8
72
- corridor_weight: 1.0
73
- persistence_weight: 0.5
74
- proposal_weight: 0.5
75
- disturbance_weight: 0.75
76
- reocclusion_weight: 0.5
77
- visibility_weight: 0.25
78
  loss_weights:
79
  action: 1.0
80
  support_mode: 0.1
@@ -83,3 +87,6 @@ loss_weights:
83
  disturbance: 0.05
84
  world_model: 0.1
85
  belief: 0.05
 
 
 
 
10
  resolution: 96
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
15
  rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
18
+ history_steps: 2
19
+ planner_candidates: 4
20
  seed: 7
21
  optim:
22
  epochs: 8
 
47
  dropout: 0.1
48
  proprio_dim: 32
49
  proprio_tokens: 1
50
+ memory:
51
+ hidden_dim: 128
52
+ history_steps: 2
53
+ num_layers: 1
54
+ dropout: 0.1
55
  decoder:
56
  hidden_dim: 128
57
  num_heads: 4
 
75
  num_approach_templates: 32
76
  rollout_horizon: 5
77
  planner:
78
+ hidden_dim: 128
79
  num_candidates: 8
80
+ action_dim: 14
81
+ utility_margin: 0.1
 
 
 
 
82
  loss_weights:
83
  action: 1.0
84
  support_mode: 0.1
 
87
  disturbance: 0.05
88
  world_model: 0.1
89
  belief: 0.05
90
+ planner_success: 0.0
91
+ planner_risk: 0.0
92
+ planner_ranking: 0.0
artifacts/outputs/reveal_runs/proxy_backbone_only/metrics.json CHANGED
@@ -2,104 +2,152 @@
2
  {
3
  "epoch": 0,
4
  "train": {
5
- "action": 0.06700062464612226,
6
- "total": 0.06700062464612226,
 
 
 
7
  "world_model": 0.0
8
  },
9
  "val": {
10
- "action": 0.02209080010652542,
11
- "total": 0.02209080010652542,
 
 
 
12
  "world_model": 0.0
13
  }
14
  },
15
  {
16
  "epoch": 1,
17
  "train": {
18
- "action": 0.02441179845482111,
19
- "total": 0.02441179845482111,
 
 
 
20
  "world_model": 0.0
21
  },
22
  "val": {
23
- "action": 0.01861108955927193,
24
- "total": 0.01861108955927193,
 
 
 
25
  "world_model": 0.0
26
  }
27
  },
28
  {
29
  "epoch": 2,
30
  "train": {
31
- "action": 0.020652000947544973,
32
- "total": 0.020652000947544973,
 
 
 
33
  "world_model": 0.0
34
  },
35
  "val": {
36
- "action": 0.01581601658836007,
37
- "total": 0.01581601658836007,
 
 
 
38
  "world_model": 0.0
39
  }
40
  },
41
  {
42
  "epoch": 3,
43
  "train": {
44
- "action": 0.01735153196689983,
45
- "total": 0.01735153196689983,
 
 
 
46
  "world_model": 0.0
47
  },
48
  "val": {
49
- "action": 0.01413003564812243,
50
- "total": 0.01413003564812243,
 
 
 
51
  "world_model": 0.0
52
  }
53
  },
54
  {
55
  "epoch": 4,
56
  "train": {
57
- "action": 0.015502698409060637,
58
- "total": 0.015502698409060637,
 
 
 
59
  "world_model": 0.0
60
  },
61
  "val": {
62
- "action": 0.012679400155320764,
63
- "total": 0.012679400155320764,
 
 
 
64
  "world_model": 0.0
65
  }
66
  },
67
  {
68
  "epoch": 5,
69
  "train": {
70
- "action": 0.015521424783704182,
71
- "total": 0.015521424783704182,
 
 
 
72
  "world_model": 0.0
73
  },
74
  "val": {
75
- "action": 0.011973066837526858,
76
- "total": 0.011973066837526858,
 
 
 
77
  "world_model": 0.0
78
  }
79
  },
80
  {
81
  "epoch": 6,
82
  "train": {
83
- "action": 0.014476912096142769,
84
- "total": 0.014476912096142769,
 
 
 
85
  "world_model": 0.0
86
  },
87
  "val": {
88
- "action": 0.011093099834397435,
89
- "total": 0.011093099834397435,
 
 
 
90
  "world_model": 0.0
91
  }
92
  },
93
  {
94
  "epoch": 7,
95
  "train": {
96
- "action": 0.012226066280466815,
97
- "total": 0.012226066280466815,
 
 
 
98
  "world_model": 0.0
99
  },
100
  "val": {
101
- "action": 0.012411019764840603,
102
- "total": 0.012411019764840603,
 
 
 
103
  "world_model": 0.0
104
  }
105
  }
 
2
  {
3
  "epoch": 0,
4
  "train": {
5
+ "action": 0.07641935829694073,
6
+ "planner_ranking": 0.0,
7
+ "planner_risk": 0.0,
8
+ "planner_success": 0.0,
9
+ "total": 0.07641935829694073,
10
  "world_model": 0.0
11
  },
12
  "val": {
13
+ "action": 0.023501936811953783,
14
+ "planner_ranking": 0.0,
15
+ "planner_risk": 0.0,
16
+ "planner_success": 0.0,
17
+ "total": 0.023501936811953783,
18
  "world_model": 0.0
19
  }
20
  },
21
  {
22
  "epoch": 1,
23
  "train": {
24
+ "action": 0.025264446934064228,
25
+ "planner_ranking": 0.0,
26
+ "planner_risk": 0.0,
27
+ "planner_success": 0.0,
28
+ "total": 0.025264446934064228,
29
  "world_model": 0.0
30
  },
31
  "val": {
32
+ "action": 0.020145865622907877,
33
+ "planner_ranking": 0.0,
34
+ "planner_risk": 0.0,
35
+ "planner_success": 0.0,
36
+ "total": 0.020145865622907877,
37
  "world_model": 0.0
38
  }
39
  },
40
  {
41
  "epoch": 2,
42
  "train": {
43
+ "action": 0.02363461550946037,
44
+ "planner_ranking": 0.0,
45
+ "planner_risk": 0.0,
46
+ "planner_success": 0.0,
47
+ "total": 0.02363461550946037,
48
  "world_model": 0.0
49
  },
50
  "val": {
51
+ "action": 0.019843176240101457,
52
+ "planner_ranking": 0.0,
53
+ "planner_risk": 0.0,
54
+ "planner_success": 0.0,
55
+ "total": 0.019843176240101457,
56
  "world_model": 0.0
57
  }
58
  },
59
  {
60
  "epoch": 3,
61
  "train": {
62
+ "action": 0.022404288329804938,
63
+ "planner_ranking": 0.0,
64
+ "planner_risk": 0.0,
65
+ "planner_success": 0.0,
66
+ "total": 0.022404288329804938,
67
  "world_model": 0.0
68
  },
69
  "val": {
70
+ "action": 0.02007088577374816,
71
+ "planner_ranking": 0.0,
72
+ "planner_risk": 0.0,
73
+ "planner_success": 0.0,
74
+ "total": 0.02007088577374816,
75
  "world_model": 0.0
76
  }
77
  },
78
  {
79
  "epoch": 4,
80
  "train": {
81
+ "action": 0.022064159469058115,
82
+ "planner_ranking": 0.0,
83
+ "planner_risk": 0.0,
84
+ "planner_success": 0.0,
85
+ "total": 0.022064159469058115,
86
  "world_model": 0.0
87
  },
88
  "val": {
89
+ "action": 0.020531073212623596,
90
+ "planner_ranking": 0.0,
91
+ "planner_risk": 0.0,
92
+ "planner_success": 0.0,
93
+ "total": 0.020531073212623596,
94
  "world_model": 0.0
95
  }
96
  },
97
  {
98
  "epoch": 5,
99
  "train": {
100
+ "action": 0.022056781298791368,
101
+ "planner_ranking": 0.0,
102
+ "planner_risk": 0.0,
103
+ "planner_success": 0.0,
104
+ "total": 0.022056781298791368,
105
  "world_model": 0.0
106
  },
107
  "val": {
108
+ "action": 0.02022958523593843,
109
+ "planner_ranking": 0.0,
110
+ "planner_risk": 0.0,
111
+ "planner_success": 0.0,
112
+ "total": 0.02022958523593843,
113
  "world_model": 0.0
114
  }
115
  },
116
  {
117
  "epoch": 6,
118
  "train": {
119
+ "action": 0.02186405410369237,
120
+ "planner_ranking": 0.0,
121
+ "planner_risk": 0.0,
122
+ "planner_success": 0.0,
123
+ "total": 0.02186405410369237,
124
  "world_model": 0.0
125
  },
126
  "val": {
127
+ "action": 0.02032211748883128,
128
+ "planner_ranking": 0.0,
129
+ "planner_risk": 0.0,
130
+ "planner_success": 0.0,
131
+ "total": 0.02032211748883128,
132
  "world_model": 0.0
133
  }
134
  },
135
  {
136
  "epoch": 7,
137
  "train": {
138
+ "action": 0.0213407213644435,
139
+ "planner_ranking": 0.0,
140
+ "planner_risk": 0.0,
141
+ "planner_success": 0.0,
142
+ "total": 0.0213407213644435,
143
  "world_model": 0.0
144
  },
145
  "val": {
146
+ "action": 0.019940752536058426,
147
+ "planner_ranking": 0.0,
148
+ "planner_risk": 0.0,
149
+ "planner_success": 0.0,
150
+ "total": 0.019940752536058426,
151
  "world_model": 0.0
152
  }
153
  }
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/config_resolved.yaml CHANGED
@@ -10,11 +10,13 @@ data:
10
  resolution: 224
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
- train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
14
- val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
15
  rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
 
 
18
  seed: 7
19
  optim:
20
  epochs: 4
@@ -48,6 +50,11 @@ policy:
48
  dropout: 0.1
49
  proprio_dim: 32
50
  proprio_tokens: 1
 
 
 
 
 
51
  decoder:
52
  hidden_dim: 512
53
  num_heads: 8
@@ -71,13 +78,10 @@ policy:
71
  num_approach_templates: 32
72
  rollout_horizon: 5
73
  planner:
 
74
  num_candidates: 8
75
- corridor_weight: 1.0
76
- persistence_weight: 0.5
77
- proposal_weight: 0.5
78
- disturbance_weight: 0.75
79
- reocclusion_weight: 0.5
80
- visibility_weight: 0.25
81
  loss_weights:
82
  action: 1.0
83
  support_mode: 0.1
@@ -86,3 +90,6 @@ loss_weights:
86
  disturbance: 0.05
87
  world_model: 0.1
88
  belief: 0.05
 
 
 
 
10
  resolution: 224
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
15
  rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
18
+ history_steps: 2
19
+ planner_candidates: 4
20
  seed: 7
21
  optim:
22
  epochs: 4
 
50
  dropout: 0.1
51
  proprio_dim: 32
52
  proprio_tokens: 1
53
+ memory:
54
+ hidden_dim: 512
55
+ history_steps: 2
56
+ num_layers: 1
57
+ dropout: 0.1
58
  decoder:
59
  hidden_dim: 512
60
  num_heads: 8
 
78
  num_approach_templates: 32
79
  rollout_horizon: 5
80
  planner:
81
+ hidden_dim: 512
82
  num_candidates: 8
83
+ action_dim: 14
84
+ utility_margin: 0.1
 
 
 
 
85
  loss_weights:
86
  action: 1.0
87
  support_mode: 0.1
 
90
  disturbance: 0.05
91
  world_model: 0.1
92
  belief: 0.05
93
+ planner_success: 0.0
94
+ planner_risk: 0.0
95
+ planner_ranking: 0.0
artifacts/outputs/reveal_runs/proxy_backbone_only_clip/metrics.json CHANGED
@@ -2,52 +2,76 @@
2
  {
3
  "epoch": 0,
4
  "train": {
5
- "action": 0.14342915779711063,
6
- "total": 0.14342915779711063,
 
 
 
7
  "world_model": 0.0
8
  },
9
  "val": {
10
- "action": 0.026520084648851364,
11
- "total": 0.026520084648851364,
 
 
 
12
  "world_model": 0.0
13
  }
14
  },
15
  {
16
  "epoch": 1,
17
  "train": {
18
- "action": 0.01376689436079944,
19
- "total": 0.01376689436079944,
 
 
 
20
  "world_model": 0.0
21
  },
22
  "val": {
23
- "action": 0.00792281218390498,
24
- "total": 0.00792281218390498,
 
 
 
25
  "world_model": 0.0
26
  }
27
  },
28
  {
29
  "epoch": 2,
30
  "train": {
31
- "action": 0.009396829446095057,
32
- "total": 0.009396829446095057,
 
 
 
33
  "world_model": 0.0
34
  },
35
  "val": {
36
- "action": 0.006728713663058385,
37
- "total": 0.006728713663058385,
 
 
 
38
  "world_model": 0.0
39
  }
40
  },
41
  {
42
  "epoch": 3,
43
  "train": {
44
- "action": 0.007774835790102784,
45
- "total": 0.007774835790102784,
 
 
 
46
  "world_model": 0.0
47
  },
48
  "val": {
49
- "action": 0.005187951255634073,
50
- "total": 0.005187951255634073,
 
 
 
51
  "world_model": 0.0
52
  }
53
  }
 
2
  {
3
  "epoch": 0,
4
  "train": {
5
+ "action": 0.22041595953453275,
6
+ "planner_ranking": 0.0,
7
+ "planner_risk": 0.0,
8
+ "planner_success": 0.0,
9
+ "total": 0.22041595953453275,
10
  "world_model": 0.0
11
  },
12
  "val": {
13
+ "action": 0.030633409138000202,
14
+ "planner_ranking": 0.0,
15
+ "planner_risk": 0.0,
16
+ "planner_success": 0.0,
17
+ "total": 0.030633409138000202,
18
  "world_model": 0.0
19
  }
20
  },
21
  {
22
  "epoch": 1,
23
  "train": {
24
+ "action": 0.02929408008144944,
25
+ "planner_ranking": 0.0,
26
+ "planner_risk": 0.0,
27
+ "planner_success": 0.0,
28
+ "total": 0.02929408008144944,
29
  "world_model": 0.0
30
  },
31
  "val": {
32
+ "action": 0.022482769120307194,
33
+ "planner_ranking": 0.0,
34
+ "planner_risk": 0.0,
35
+ "planner_success": 0.0,
36
+ "total": 0.022482769120307194,
37
  "world_model": 0.0
38
  }
39
  },
40
  {
41
  "epoch": 2,
42
  "train": {
43
+ "action": 0.023234238926106723,
44
+ "planner_ranking": 0.0,
45
+ "planner_risk": 0.0,
46
+ "planner_success": 0.0,
47
+ "total": 0.023234238926106723,
48
  "world_model": 0.0
49
  },
50
  "val": {
51
+ "action": 0.018214622157670203,
52
+ "planner_ranking": 0.0,
53
+ "planner_risk": 0.0,
54
+ "planner_success": 0.0,
55
+ "total": 0.018214622157670203,
56
  "world_model": 0.0
57
  }
58
  },
59
  {
60
  "epoch": 3,
61
  "train": {
62
+ "action": 0.017409040848602644,
63
+ "planner_ranking": 0.0,
64
+ "planner_risk": 0.0,
65
+ "planner_success": 0.0,
66
+ "total": 0.017409040848602644,
67
  "world_model": 0.0
68
  },
69
  "val": {
70
+ "action": 0.010200991117883296,
71
+ "planner_ranking": 0.0,
72
+ "planner_risk": 0.0,
73
+ "planner_success": 0.0,
74
+ "total": 0.010200991117883296,
75
  "world_model": 0.0
76
  }
77
  }
artifacts/outputs/reveal_runs/proxy_reveal_state/config_resolved.yaml CHANGED
@@ -10,11 +10,13 @@ data:
10
  resolution: 96
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
- train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v2.pt
14
- val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v2.pt
15
- rebuild_dataset: false
16
  chunk_horizon: 8
17
  rollout_horizon: 5
 
 
18
  seed: 7
19
  optim:
20
  epochs: 8
@@ -45,6 +47,11 @@ policy:
45
  dropout: 0.1
46
  proprio_dim: 32
47
  proprio_tokens: 1
 
 
 
 
 
48
  decoder:
49
  hidden_dim: 128
50
  num_heads: 4
@@ -60,6 +67,8 @@ policy:
60
  num_approach_templates: 32
61
  rollout_horizon: 5
62
  belief_map_size: 32
 
 
63
  predict_belief_map: true
64
  world_model:
65
  hidden_dim: 128
@@ -68,13 +77,10 @@ policy:
68
  num_approach_templates: 32
69
  rollout_horizon: 5
70
  planner:
 
71
  num_candidates: 8
72
- corridor_weight: 1.0
73
- persistence_weight: 0.65
74
- proposal_weight: 0.35
75
- disturbance_weight: 0.8
76
- reocclusion_weight: 0.6
77
- visibility_weight: 0.35
78
  loss_weights:
79
  action: 1.0
80
  support_mode: 0.15
@@ -83,3 +89,6 @@ loss_weights:
83
  disturbance: 0.1
84
  world_model: 0.2
85
  belief: 0.05
 
 
 
 
10
  resolution: 96
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_v4_noleak_counterfactual.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_v4_noleak_counterfactual.pt
15
+ rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
18
+ history_steps: 2
19
+ planner_candidates: 4
20
  seed: 7
21
  optim:
22
  epochs: 8
 
47
  dropout: 0.1
48
  proprio_dim: 32
49
  proprio_tokens: 1
50
+ memory:
51
+ hidden_dim: 128
52
+ history_steps: 2
53
+ num_layers: 1
54
+ dropout: 0.1
55
  decoder:
56
  hidden_dim: 128
57
  num_heads: 4
 
67
  num_approach_templates: 32
68
  rollout_horizon: 5
69
  belief_map_size: 32
70
+ field_size: 16
71
+ num_heads: 4
72
  predict_belief_map: true
73
  world_model:
74
  hidden_dim: 128
 
77
  num_approach_templates: 32
78
  rollout_horizon: 5
79
  planner:
80
+ hidden_dim: 128
81
  num_candidates: 8
82
+ action_dim: 14
83
+ utility_margin: 0.1
 
 
 
 
84
  loss_weights:
85
  action: 1.0
86
  support_mode: 0.15
 
89
  disturbance: 0.1
90
  world_model: 0.2
91
  belief: 0.05
92
+ planner_success: 0.2
93
+ planner_risk: 0.1
94
+ planner_ranking: 0.1
artifacts/outputs/reveal_runs/proxy_reveal_state/metrics.json CHANGED
@@ -2,185 +2,265 @@
2
  {
3
  "epoch": 0,
4
  "train": {
5
- "action": 0.2602546961667637,
6
- "belief": 0.4802860766649246,
7
- "corridor": 0.6443073948224386,
8
- "disturbance": 0.006578955658672688,
9
- "persistence": 4.514919241269429,
10
- "support_mode": 0.8015391031901041,
11
- "total": 2.0875226110219955,
12
- "world_model": 5.510057131449382
 
 
 
 
 
13
  },
14
  "val": {
15
- "action": 0.04658499173820019,
16
- "belief": 0.280171237885952,
17
- "corridor": 0.5032978095114231,
18
- "disturbance": 0.003645064221927896,
19
- "persistence": 3.8178451359272003,
20
- "support_mode": 0.6714280992746353,
21
- "total": 1.012940600514412,
22
- "world_model": 1.8441212028265
 
 
 
 
 
23
  }
24
  },
25
  {
26
  "epoch": 1,
27
  "train": {
28
- "action": 0.03881739747400085,
29
- "belief": 0.18641860752056041,
30
- "corridor": 0.3944183625280857,
31
- "disturbance": 0.030439561344489146,
32
- "persistence": 3.206294293204943,
33
- "support_mode": 0.5347911287099123,
34
- "total": 0.9082020496328672,
35
- "world_model": 1.8864398151636124
 
 
 
 
 
36
  },
37
  "val": {
38
- "action": 0.04213718790560961,
39
- "belief": 0.15712551027536392,
40
- "corridor": 0.3507457673549652,
41
- "disturbance": 0.006276358384639025,
42
- "persistence": 1.8078171163797379,
43
- "support_mode": 0.10970124043524265,
44
- "total": 0.6724201738834381,
45
- "world_model": 1.772064983844757
 
 
 
 
 
46
  }
47
  },
48
  {
49
  "epoch": 2,
50
  "train": {
51
- "action": 0.031200370130439598,
52
- "belief": 0.13828600694735846,
53
- "corridor": 0.31750819956262905,
54
- "disturbance": 0.011857866222271696,
55
- "persistence": 1.7015922193725903,
56
- "support_mode": 0.02674841312303518,
57
- "total": 0.6129550884167353,
58
- "world_model": 1.6799074759085972
 
 
 
 
 
59
  },
60
  "val": {
61
- "action": 0.019523032009601593,
62
- "belief": 0.09429990872740746,
63
- "corridor": 0.24884792044758797,
64
- "disturbance": 0.0043011417728848755,
65
- "persistence": 1.5114311277866364,
66
- "support_mode": 0.0060500025865621865,
67
- "total": 0.5359727554023266,
68
- "world_model": 1.5474220663309097
 
 
 
 
 
69
  }
70
  },
71
  {
72
  "epoch": 3,
73
  "train": {
74
- "action": 0.022356805779660743,
75
- "belief": 0.09125891048461199,
76
- "corridor": 0.23351835707823435,
77
- "disturbance": 0.006718798467773013,
78
- "persistence": 1.6300043910741806,
79
- "support_mode": 0.004253969304651643,
80
- "total": 0.5548354809482893,
81
- "world_model": 1.5845081210136414
 
 
 
 
 
82
  },
83
  "val": {
84
- "action": 0.01580847823061049,
85
- "belief": 0.09042494650930166,
86
- "corridor": 0.22376472875475883,
87
- "disturbance": 0.018967560958117247,
88
- "persistence": 1.4363956600427628,
89
- "support_mode": 0.03418254409916699,
90
- "total": 0.5279115326702595,
91
- "world_model": 1.5608257874846458
 
 
 
 
 
92
  }
93
  },
94
  {
95
  "epoch": 4,
96
  "train": {
97
- "action": 0.019881066245337326,
98
- "belief": 0.08954659259567659,
99
- "corridor": 0.21636931287745634,
100
- "disturbance": 0.005539724506282558,
101
- "persistence": 1.592231921851635,
102
- "support_mode": 0.008331454223177085,
103
- "total": 0.5372808227936426,
104
- "world_model": 1.5431083713968594
 
 
 
 
 
105
  },
106
  "val": {
107
- "action": 0.015133287757635117,
108
- "belief": 0.08718204218894243,
109
- "corridor": 0.20481965504586697,
110
- "disturbance": 0.0031357303814729676,
111
- "persistence": 1.3192060887813568,
112
- "support_mode": 0.0030863596766721457,
113
- "total": 0.47997843474149704,
114
- "world_model": 1.4341248571872711
 
 
 
 
 
115
  }
116
  },
117
  {
118
  "epoch": 5,
119
  "train": {
120
- "action": 0.030778280459344387,
121
- "belief": 0.09159998937199514,
122
- "corridor": 0.21967005419234434,
123
- "disturbance": 0.005901901221174437,
124
- "persistence": 1.651158797244231,
125
- "support_mode": 0.0024410486221313477,
126
- "total": 0.5050872204204401,
127
- "world_model": 1.2986134762565296
 
 
 
 
 
128
  },
129
  "val": {
130
- "action": 0.03259791061282158,
131
- "belief": 0.08867455553263426,
132
- "corridor": 0.20528649538755417,
133
- "disturbance": 0.0037689711316488683,
134
- "persistence": 1.3772646486759186,
135
- "support_mode": 0.0007588127191411331,
136
- "total": 0.4101765304803848,
137
- "world_model": 0.9693519398570061
 
 
 
 
 
138
  }
139
  },
140
  {
141
  "epoch": 6,
142
  "train": {
143
- "action": 0.028416083427146077,
144
- "belief": 0.09289384291817744,
145
- "corridor": 0.22298985657592615,
146
- "disturbance": 0.0031898027373244986,
147
- "persistence": 1.2752377291520436,
148
- "support_mode": 0.04850278014297752,
149
- "total": 0.40898223718007404,
150
- "world_model": 0.9810265600681305
 
 
 
 
 
151
  },
152
  "val": {
153
- "action": 0.02159481483977288,
154
- "belief": 0.08797950763255358,
155
- "corridor": 0.20524934865534306,
156
- "disturbance": 0.0015436648827744648,
157
- "persistence": 1.286000706255436,
158
- "support_mode": 0.0010480962373549119,
159
- "total": 0.3605738691985607,
160
- "world_model": 0.8230927512049675
 
 
 
 
 
161
  }
162
  },
163
  {
164
  "epoch": 7,
165
  "train": {
166
- "action": 0.021424691736077268,
167
- "belief": 0.0899931692207853,
168
- "corridor": 0.21607277914881706,
169
- "disturbance": 0.0034827212220989168,
170
- "persistence": 0.9069182885189851,
171
- "support_mode": 0.00435957100125961,
172
- "total": 0.3383450036247571,
173
- "world_model": 0.8875602881113688
 
 
 
 
 
174
  },
175
  "val": {
176
- "action": 0.017686392879113555,
177
- "belief": 0.09035013243556023,
178
- "corridor": 0.21036655083298683,
179
- "disturbance": 0.004888073919573799,
180
- "persistence": 0.5709216743707657,
181
- "support_mode": 0.001884725206764415,
182
- "total": 0.31777225248515606,
183
- "world_model": 0.978156752884388
 
 
 
 
 
184
  }
185
  }
186
  ]
 
2
  {
3
  "epoch": 0,
4
  "train": {
5
+ "action": 0.06191213273753723,
6
+ "belief": 0.3969618324190378,
7
+ "corridor": 0.4305709345887105,
8
+ "disturbance": 0.03469782391524253,
9
+ "persistence": 4.175889949003856,
10
+ "planner_ranking": 0.09875048324465752,
11
+ "planner_risk": 0.04163226300928121,
12
+ "planner_success": 0.6267699748277664,
13
+ "reocclusion": 0.6964956695834795,
14
+ "support_mode": 0.7304433186848959,
15
+ "total": 2.203073134024938,
16
+ "uncertainty": 0.3537220476816098,
17
+ "world_model": 6.738570133845012
18
  },
19
  "val": {
20
+ "action": 0.024067950202152133,
21
+ "belief": 0.12672400567680597,
22
+ "corridor": 0.24239582754671574,
23
+ "disturbance": 0.002230136582511477,
24
+ "persistence": 3.987179756164551,
25
+ "planner_ranking": 0.09912661369889975,
26
+ "planner_risk": 0.011295226053334773,
27
+ "planner_success": 0.6335860788822174,
28
+ "reocclusion": 0.6723387092351913,
29
+ "support_mode": 0.6936560198664665,
30
+ "total": 1.2380555346608162,
31
+ "uncertainty": 0.14553490467369556,
32
+ "world_model": 2.517606645822525
33
  }
34
  },
35
  {
36
  "epoch": 1,
37
  "train": {
38
+ "action": 0.02929696316520373,
39
+ "belief": 0.1461242881293098,
40
+ "corridor": 0.25149450699488324,
41
+ "disturbance": 0.004778304447730382,
42
+ "persistence": 4.120666732390721,
43
+ "planner_ranking": 0.09903711639344692,
44
+ "planner_risk": 0.0110635906457901,
45
+ "planner_success": 0.6181311855713526,
46
+ "reocclusion": 0.6676681761940321,
47
+ "support_mode": 0.6968543653686842,
48
+ "total": 1.2383184656500816,
49
+ "uncertainty": 0.07630281266756356,
50
+ "world_model": 2.42795492708683
51
  },
52
  "val": {
53
+ "action": 0.023099895333871245,
54
+ "belief": 0.12967702373862267,
55
+ "corridor": 0.24076062999665737,
56
+ "disturbance": 0.002764956690953113,
57
+ "persistence": 3.8617295920848846,
58
+ "planner_ranking": 0.09899506811052561,
59
+ "planner_risk": 0.011750921490602195,
60
+ "planner_success": 0.6350084543228149,
61
+ "reocclusion": 0.6680542901158333,
62
+ "support_mode": 0.6658758223056793,
63
+ "total": 1.1821558326482773,
64
+ "uncertainty": 0.03126319474540651,
65
+ "world_model": 2.3316954374313354
66
  }
67
  },
68
  {
69
  "epoch": 2,
70
  "train": {
71
+ "action": 0.02398723006869356,
72
+ "belief": 0.1364164650440216,
73
+ "corridor": 0.24753919864694277,
74
+ "disturbance": 0.0023582005330050984,
75
+ "persistence": 4.01392004887263,
76
+ "planner_ranking": 0.09883626519391935,
77
+ "planner_risk": 0.010473574026642988,
78
+ "planner_success": 0.6160491754611334,
79
+ "reocclusion": 0.6690979475776354,
80
+ "support_mode": 0.6750953321655592,
81
+ "total": 1.1870681991179783,
82
+ "uncertainty": 0.030905649531632662,
83
+ "world_model": 2.2801418056090674
84
  },
85
  "val": {
86
+ "action": 0.01999341929331422,
87
+ "belief": 0.12642040569335222,
88
+ "corridor": 0.2386692836880684,
89
+ "disturbance": 0.0017178563502966426,
90
+ "persistence": 3.911038339138031,
91
+ "planner_ranking": 0.09849496744573116,
92
+ "planner_risk": 0.010672552860341966,
93
+ "planner_success": 0.6324039027094841,
94
+ "reocclusion": 0.6649576723575592,
95
+ "support_mode": 0.6655856594443321,
96
+ "total": 1.1648448407649994,
97
+ "uncertainty": 0.02670970605686307,
98
+ "world_model": 2.2435964047908783
99
  }
100
  },
101
  {
102
  "epoch": 3,
103
  "train": {
104
+ "action": 0.023305251883963745,
105
+ "belief": 0.12743763532489538,
106
+ "corridor": 0.2423833180218935,
107
+ "disturbance": 0.002423852672412371,
108
+ "persistence": 4.037976682186127,
109
+ "planner_ranking": 0.09848632694532473,
110
+ "planner_risk": 0.010158603176629791,
111
+ "planner_success": 0.6135045563181242,
112
+ "reocclusion": 0.670435386399428,
113
+ "support_mode": 0.6804824098944664,
114
+ "total": 1.181638777256012,
115
+ "uncertainty": 0.020501127738195162,
116
+ "world_model": 2.250967080394427
117
  },
118
  "val": {
119
+ "action": 0.0216117303352803,
120
+ "belief": 0.11998547799885273,
121
+ "corridor": 0.23061690665781498,
122
+ "disturbance": 0.0017021069324982818,
123
+ "persistence": 3.871658682823181,
124
+ "planner_ranking": 0.0980530520901084,
125
+ "planner_risk": 0.010595057916361839,
126
+ "planner_success": 0.6308894380927086,
127
+ "reocclusion": 0.663639560341835,
128
+ "support_mode": 0.6656133309006691,
129
+ "total": 1.1477141454815865,
130
+ "uncertainty": 0.018870073137804866,
131
+ "world_model": 2.181487277150154
132
  }
133
  },
134
  {
135
  "epoch": 4,
136
  "train": {
137
+ "action": 0.022862333881979186,
138
+ "belief": 0.1157925771549344,
139
+ "corridor": 0.23798241962989172,
140
+ "disturbance": 0.0024595247232355177,
141
+ "persistence": 4.04269211490949,
142
+ "planner_ranking": 0.0980245132620136,
143
+ "planner_risk": 0.010186576827739676,
144
+ "planner_success": 0.6112014849980673,
145
+ "reocclusion": 0.6680525466799736,
146
+ "support_mode": 0.6751382003227869,
147
+ "total": 1.1686089982589085,
148
+ "uncertainty": 0.01871865172870457,
149
+ "world_model": 2.1998249938090644
150
  },
151
  "val": {
152
+ "action": 0.020634466782212257,
153
+ "belief": 0.11527534108608961,
154
+ "corridor": 0.23497656919062138,
155
+ "disturbance": 0.002371684633544646,
156
+ "persistence": 3.8176176249980927,
157
+ "planner_ranking": 0.09711439348757267,
158
+ "planner_risk": 0.010582514689303935,
159
+ "planner_success": 0.6349476724863052,
160
+ "reocclusion": 0.6623468473553658,
161
+ "support_mode": 0.6932553052902222,
162
+ "total": 1.1341337114572525,
163
+ "uncertainty": 0.024459586245939136,
164
+ "world_model": 2.11751089990139
165
  }
166
  },
167
  {
168
  "epoch": 5,
169
  "train": {
170
+ "action": 0.024459178171431024,
171
+ "belief": 0.14042565568039814,
172
+ "corridor": 0.25979805178940296,
173
+ "disturbance": 0.010113566526949095,
174
+ "persistence": 4.210421055555344,
175
+ "planner_ranking": 0.0971421217545867,
176
+ "planner_risk": 0.01027063278403754,
177
+ "planner_success": 0.6094371701280276,
178
+ "reocclusion": 0.6682968338330587,
179
+ "support_mode": 0.6893241529663404,
180
+ "total": 1.1891141335169475,
181
+ "uncertainty": 0.05712907208362594,
182
+ "world_model": 2.1682801693677902
183
  },
184
  "val": {
185
+ "action": 0.022404357325285673,
186
+ "belief": 0.1317315762862563,
187
+ "corridor": 0.248648414388299,
188
+ "disturbance": 0.001959386427188292,
189
+ "persistence": 3.8251605927944183,
190
+ "planner_ranking": 0.09636734332889318,
191
+ "planner_risk": 0.010734643263276666,
192
+ "planner_success": 0.6269454136490822,
193
+ "reocclusion": 0.6585175022482872,
194
+ "support_mode": 0.687481202185154,
195
+ "total": 1.129515826702118,
196
+ "uncertainty": 0.022852399852126837,
197
+ "world_model": 2.0773144513368607
198
  }
199
  },
200
  {
201
  "epoch": 6,
202
  "train": {
203
+ "action": 0.023649626101056736,
204
+ "belief": 0.10499086945007245,
205
+ "corridor": 0.23988350170354047,
206
+ "disturbance": 0.0025371607140793153,
207
+ "persistence": 4.0185394287109375,
208
+ "planner_ranking": 0.09640810824930668,
209
+ "planner_risk": 0.010166237130761147,
210
+ "planner_success": 0.6055587517718474,
211
+ "reocclusion": 0.6686983207861582,
212
+ "support_mode": 0.6687941774725914,
213
+ "total": 1.1488103543718655,
214
+ "uncertainty": 0.011886686998574683,
215
+ "world_model": 2.1212283273537955
216
  },
217
  "val": {
218
+ "action": 0.02155012753792107,
219
+ "belief": 0.10505348909646273,
220
+ "corridor": 0.22914408333599567,
221
+ "disturbance": 0.0021536786225624382,
222
+ "persistence": 3.736493021249771,
223
+ "planner_ranking": 0.09544396214187145,
224
+ "planner_risk": 0.010955312522128224,
225
+ "planner_success": 0.6418561860918999,
226
+ "reocclusion": 0.6577628254890442,
227
+ "support_mode": 0.665456235408783,
228
+ "total": 1.0746963024139404,
229
+ "uncertainty": 0.013590520713478327,
230
+ "world_model": 1.8803961426019669
231
  }
232
  },
233
  {
234
  "epoch": 7,
235
  "train": {
236
+ "action": 0.024166353822996218,
237
+ "belief": 0.1225533156345288,
238
+ "corridor": 0.24451578905185065,
239
+ "disturbance": 0.0034010016097454354,
240
+ "persistence": 3.975986421108246,
241
+ "planner_ranking": 0.09560706652700901,
242
+ "planner_risk": 0.010631242844586572,
243
+ "planner_success": 0.6020257460574309,
244
+ "reocclusion": 0.6639501129587492,
245
+ "support_mode": 0.6688573931654295,
246
+ "total": 1.0815023109316826,
247
+ "uncertainty": 0.01951570008532144,
248
+ "world_model": 1.7976730863253276
249
  },
250
  "val": {
251
+ "action": 0.020741463406011462,
252
+ "belief": 0.09398300107568502,
253
+ "corridor": 0.22893342934548855,
254
+ "disturbance": 0.0030847050511511043,
255
+ "persistence": 4.020223438739777,
256
+ "planner_ranking": 0.09519834443926811,
257
+ "planner_risk": 0.011243910586927086,
258
+ "planner_success": 0.630848728120327,
259
+ "reocclusion": 0.6507743820548058,
260
+ "support_mode": 0.6755735874176025,
261
+ "total": 1.0336408764123917,
262
+ "uncertainty": 0.004630188253941014,
263
+ "world_model": 1.544354408979416
264
  }
265
  }
266
  ]
artifacts/outputs/reveal_runs/proxy_reveal_state_clip/config_resolved.yaml CHANGED
@@ -10,11 +10,13 @@ data:
10
  resolution: 224
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
- train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224.pt
14
- val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224.pt
15
- rebuild_dataset: false
16
  chunk_horizon: 8
17
  rollout_horizon: 5
 
 
18
  seed: 7
19
  optim:
20
  epochs: 4
@@ -48,6 +50,11 @@ policy:
48
  dropout: 0.1
49
  proprio_dim: 32
50
  proprio_tokens: 1
 
 
 
 
 
51
  decoder:
52
  hidden_dim: 512
53
  num_heads: 8
@@ -63,6 +70,8 @@ policy:
63
  num_approach_templates: 32
64
  rollout_horizon: 5
65
  belief_map_size: 32
 
 
66
  predict_belief_map: true
67
  world_model:
68
  hidden_dim: 512
@@ -71,13 +80,10 @@ policy:
71
  num_approach_templates: 32
72
  rollout_horizon: 5
73
  planner:
 
74
  num_candidates: 8
75
- corridor_weight: 1.0
76
- persistence_weight: 0.65
77
- proposal_weight: 0.35
78
- disturbance_weight: 0.8
79
- reocclusion_weight: 0.6
80
- visibility_weight: 0.35
81
  loss_weights:
82
  action: 1.0
83
  support_mode: 0.15
@@ -86,3 +92,6 @@ loss_weights:
86
  disturbance: 0.1
87
  world_model: 0.2
88
  belief: 0.05
 
 
 
 
10
  resolution: 224
11
  train_episodes_per_proxy: 48
12
  val_episodes_per_proxy: 16
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_clip224_v4_noleak_counterfactual.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_clip224_v4_noleak_counterfactual.pt
15
+ rebuild_dataset: true
16
  chunk_horizon: 8
17
  rollout_horizon: 5
18
+ history_steps: 2
19
+ planner_candidates: 4
20
  seed: 7
21
  optim:
22
  epochs: 4
 
50
  dropout: 0.1
51
  proprio_dim: 32
52
  proprio_tokens: 1
53
+ memory:
54
+ hidden_dim: 512
55
+ history_steps: 2
56
+ num_layers: 1
57
+ dropout: 0.1
58
  decoder:
59
  hidden_dim: 512
60
  num_heads: 8
 
70
  num_approach_templates: 32
71
  rollout_horizon: 5
72
  belief_map_size: 32
73
+ field_size: 16
74
+ num_heads: 4
75
  predict_belief_map: true
76
  world_model:
77
  hidden_dim: 512
 
80
  num_approach_templates: 32
81
  rollout_horizon: 5
82
  planner:
83
+ hidden_dim: 512
84
  num_candidates: 8
85
+ action_dim: 14
86
+ utility_margin: 0.1
 
 
 
 
87
  loss_weights:
88
  action: 1.0
89
  support_mode: 0.15
 
92
  disturbance: 0.1
93
  world_model: 0.2
94
  belief: 0.05
95
+ planner_success: 0.2
96
+ planner_risk: 0.1
97
+ planner_ranking: 0.1
artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full_model": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.16666666666666666,
5
+ "bag_proxy": 0.25,
6
+ "cloth_proxy": 0.5
7
+ },
8
+ "mean_success": 0.3055555555555555,
9
+ "visibility_integral": 45.800796369711556,
10
+ "corridor_availability": 0.9180314590533575,
11
+ "reocclusion_rate": 0.027777777777777776,
12
+ "persistence_horizon_mae": 2.3963313409379188,
13
+ "disturbance_cost": 0.7033085679221485
14
+ },
15
+ "no_reveal_state_head": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.0,
18
+ "bag_proxy": 0.0,
19
+ "cloth_proxy": 0.0
20
+ },
21
+ "mean_success": 0.0,
22
+ "visibility_integral": 62.73463360468546,
23
+ "corridor_availability": 0.9864540547132492,
24
+ "reocclusion_rate": 0.0,
25
+ "persistence_horizon_mae": null,
26
+ "disturbance_cost": 0.8931084167626169
27
+ },
28
+ "no_world_model": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.0,
31
+ "bag_proxy": 0.0,
32
+ "cloth_proxy": 0.0
33
+ },
34
+ "mean_success": 0.0,
35
+ "visibility_integral": 62.73463360468546,
36
+ "corridor_availability": 0.9864540547132492,
37
+ "reocclusion_rate": 0.0,
38
+ "persistence_horizon_mae": 2.419941000816309,
39
+ "disturbance_cost": 0.8931084167626169
40
+ },
41
+ "no_planner_reranking": {
42
+ "per_task_success": {
43
+ "foliage_proxy": 0.0,
44
+ "bag_proxy": 0.0,
45
+ "cloth_proxy": 0.0
46
+ },
47
+ "mean_success": 0.0,
48
+ "visibility_integral": 62.73463360468546,
49
+ "corridor_availability": 0.9864540547132492,
50
+ "reocclusion_rate": 0.0,
51
+ "persistence_horizon_mae": 2.419941000816309,
52
+ "disturbance_cost": 0.8931084167626169
53
+ },
54
+ "no_support_mode_conditioning": {
55
+ "per_task_success": {
56
+ "foliage_proxy": 0.375,
57
+ "bag_proxy": 0.25,
58
+ "cloth_proxy": 0.5416666666666666
59
+ },
60
+ "mean_success": 0.38888888888888884,
61
+ "visibility_integral": 41.20910889903704,
62
+ "corridor_availability": 0.9009349540703826,
63
+ "reocclusion_rate": 0.027437974833808165,
64
+ "persistence_horizon_mae": 2.406442765584018,
65
+ "disturbance_cost": 0.6425008794499768
66
+ },
67
+ "no_wrist_cameras": {
68
+ "per_task_success": {
69
+ "foliage_proxy": 0.25,
70
+ "bag_proxy": 0.2916666666666667,
71
+ "cloth_proxy": 0.5416666666666666
72
+ },
73
+ "mean_success": 0.36111111111111116,
74
+ "visibility_integral": 42.94617295927472,
75
+ "corridor_availability": 0.911839393277963,
76
+ "reocclusion_rate": 0.026319995590828923,
77
+ "persistence_horizon_mae": 2.5709509358907683,
78
+ "disturbance_cost": 0.6674723047763109
79
+ },
80
+ "no_global_camera": {
81
+ "per_task_success": {
82
+ "foliage_proxy": 0.16666666666666666,
83
+ "bag_proxy": 0.3333333333333333,
84
+ "cloth_proxy": 0.3333333333333333
85
+ },
86
+ "mean_success": 0.27777777777777773,
87
+ "visibility_integral": 47.384350614415276,
88
+ "corridor_availability": 0.9166230356527699,
89
+ "reocclusion_rate": 0.025818452380952383,
90
+ "persistence_horizon_mae": 2.957454740526246,
91
+ "disturbance_cost": 0.7210023639102777
92
+ }
93
+ }
artifacts/outputs/reveal_runs/reveal_ablation_v4/ablations.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.306
5
+ - visibility_integral: 45.801
6
+ - corridor_availability: 0.918
7
+ - reocclusion_rate: 0.028
8
+ - persistence_horizon_mae: 2.396
9
+ - disturbance_cost: 0.703
10
+
11
+ ## no_reveal_state_head
12
+ - mean_success: 0.000
13
+ - visibility_integral: 62.735
14
+ - corridor_availability: 0.986
15
+ - reocclusion_rate: 0.000
16
+ - persistence_horizon_mae: 0.000
17
+ - disturbance_cost: 0.893
18
+
19
+ ## no_world_model
20
+ - mean_success: 0.000
21
+ - visibility_integral: 62.735
22
+ - corridor_availability: 0.986
23
+ - reocclusion_rate: 0.000
24
+ - persistence_horizon_mae: 2.420
25
+ - disturbance_cost: 0.893
26
+
27
+ ## no_planner_reranking
28
+ - mean_success: 0.000
29
+ - visibility_integral: 62.735
30
+ - corridor_availability: 0.986
31
+ - reocclusion_rate: 0.000
32
+ - persistence_horizon_mae: 2.420
33
+ - disturbance_cost: 0.893
34
+
35
+ ## no_support_mode_conditioning
36
+ - mean_success: 0.389
37
+ - visibility_integral: 41.209
38
+ - corridor_availability: 0.901
39
+ - reocclusion_rate: 0.027
40
+ - persistence_horizon_mae: 2.406
41
+ - disturbance_cost: 0.643
42
+
43
+ ## no_wrist_cameras
44
+ - mean_success: 0.361
45
+ - visibility_integral: 42.946
46
+ - corridor_availability: 0.912
47
+ - reocclusion_rate: 0.026
48
+ - persistence_horizon_mae: 2.571
49
+ - disturbance_cost: 0.667
50
+
51
+ ## no_global_camera
52
+ - mean_success: 0.278
53
+ - visibility_integral: 47.384
54
+ - corridor_availability: 0.917
55
+ - reocclusion_rate: 0.026
56
+ - persistence_horizon_mae: 2.957
57
+ - disturbance_cost: 0.721
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full_model": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.3333333333333333,
5
+ "bag_proxy": 0.3888888888888889,
6
+ "cloth_proxy": 0.3333333333333333
7
+ },
8
+ "mean_success": 0.35185185185185186,
9
+ "visibility_integral": 41.31481176614761,
10
+ "corridor_availability": 0.8930041271227377,
11
+ "reocclusion_rate": 0.002777777777777778,
12
+ "persistence_horizon_mae": 0.9662547600565393,
13
+ "disturbance_cost": 0.6302865350411998
14
+ },
15
+ "no_reveal_state_head": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.3333333333333333,
18
+ "bag_proxy": 0.3888888888888889,
19
+ "cloth_proxy": 0.2777777777777778
20
+ },
21
+ "mean_success": 0.3333333333333333,
22
+ "visibility_integral": 6.436306021831654,
23
+ "corridor_availability": 0.33413351644520406,
24
+ "reocclusion_rate": 0.008333333333333335,
25
+ "persistence_horizon_mae": null,
26
+ "disturbance_cost": 0.5424560326393004
27
+ },
28
+ "no_world_model": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.3333333333333333,
31
+ "bag_proxy": 0.3888888888888889,
32
+ "cloth_proxy": 0.2777777777777778
33
+ },
34
+ "mean_success": 0.3333333333333333,
35
+ "visibility_integral": 6.436306021831654,
36
+ "corridor_availability": 0.33413351644520406,
37
+ "reocclusion_rate": 0.008333333333333335,
38
+ "persistence_horizon_mae": 4.395576057914128,
39
+ "disturbance_cost": 0.5424560326393004
40
+ },
41
+ "no_planner_reranking": {
42
+ "per_task_success": {
43
+ "foliage_proxy": 0.3333333333333333,
44
+ "bag_proxy": 0.3888888888888889,
45
+ "cloth_proxy": 0.2777777777777778
46
+ },
47
+ "mean_success": 0.3333333333333333,
48
+ "visibility_integral": 6.436306021831654,
49
+ "corridor_availability": 0.33413351644520406,
50
+ "reocclusion_rate": 0.008333333333333335,
51
+ "persistence_horizon_mae": 4.395576057914128,
52
+ "disturbance_cost": 0.5424560326393004
53
+ },
54
+ "no_support_mode_conditioning": {
55
+ "per_task_success": {
56
+ "foliage_proxy": 0.3333333333333333,
57
+ "bag_proxy": 0.3888888888888889,
58
+ "cloth_proxy": 0.3333333333333333
59
+ },
60
+ "mean_success": 0.35185185185185186,
61
+ "visibility_integral": 41.31481176614761,
62
+ "corridor_availability": 0.8930041271227377,
63
+ "reocclusion_rate": 0.002777777777777778,
64
+ "persistence_horizon_mae": 0.9662547600565393,
65
+ "disturbance_cost": 0.6302865350411998
66
+ },
67
+ "no_wrist_cameras": {
68
+ "per_task_success": {
69
+ "foliage_proxy": 0.3333333333333333,
70
+ "bag_proxy": 0.3888888888888889,
71
+ "cloth_proxy": 0.3333333333333333
72
+ },
73
+ "mean_success": 0.35185185185185186,
74
+ "visibility_integral": 41.34216132428911,
75
+ "corridor_availability": 0.8971193510073202,
76
+ "reocclusion_rate": 0.0011574074074074073,
77
+ "persistence_horizon_mae": 0.9659118890357264,
78
+ "disturbance_cost": 0.6302977896950863
79
+ },
80
+ "no_global_camera": {
81
+ "per_task_success": {
82
+ "foliage_proxy": 0.3333333333333333,
83
+ "bag_proxy": 0.3888888888888889,
84
+ "cloth_proxy": 0.3333333333333333
85
+ },
86
+ "mean_success": 0.35185185185185186,
87
+ "visibility_integral": 41.33038121020353,
88
+ "corridor_availability": 0.8943758684175985,
89
+ "reocclusion_rate": 0.002777777777777778,
90
+ "persistence_horizon_mae": 0.9659084288095618,
91
+ "disturbance_cost": 0.6303076523321646
92
+ }
93
+ }
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.352
5
+ - visibility_integral: 41.315
6
+ - corridor_availability: 0.893
7
+ - reocclusion_rate: 0.003
8
+ - persistence_horizon_mae: 0.966
9
+ - disturbance_cost: 0.630
10
+
11
+ ## no_reveal_state_head
12
+ - mean_success: 0.333
13
+ - visibility_integral: 6.436
14
+ - corridor_availability: 0.334
15
+ - reocclusion_rate: 0.008
16
+ - persistence_horizon_mae: 0.000
17
+ - disturbance_cost: 0.542
18
+
19
+ ## no_world_model
20
+ - mean_success: 0.333
21
+ - visibility_integral: 6.436
22
+ - corridor_availability: 0.334
23
+ - reocclusion_rate: 0.008
24
+ - persistence_horizon_mae: 4.396
25
+ - disturbance_cost: 0.542
26
+
27
+ ## no_planner_reranking
28
+ - mean_success: 0.333
29
+ - visibility_integral: 6.436
30
+ - corridor_availability: 0.334
31
+ - reocclusion_rate: 0.008
32
+ - persistence_horizon_mae: 4.396
33
+ - disturbance_cost: 0.542
34
+
35
+ ## no_support_mode_conditioning
36
+ - mean_success: 0.352
37
+ - visibility_integral: 41.315
38
+ - corridor_availability: 0.893
39
+ - reocclusion_rate: 0.003
40
+ - persistence_horizon_mae: 0.966
41
+ - disturbance_cost: 0.630
42
+
43
+ ## no_wrist_cameras
44
+ - mean_success: 0.352
45
+ - visibility_integral: 41.342
46
+ - corridor_availability: 0.897
47
+ - reocclusion_rate: 0.001
48
+ - persistence_horizon_mae: 0.966
49
+ - disturbance_cost: 0.630
50
+
51
+ ## no_global_camera
52
+ - mean_success: 0.352
53
+ - visibility_integral: 41.330
54
+ - corridor_availability: 0.894
55
+ - reocclusion_rate: 0.003
56
+ - persistence_horizon_mae: 0.966
57
+ - disturbance_cost: 0.630
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep18/ablations.partial.json ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/workspace/outputs/reveal_runs/proxy_reveal_state_clip/checkpoint_best.pt",
3
+ "episodes": 18,
4
+ "sections": {
5
+ "full_model": {
6
+ "mean_success": 0.35185185185185186,
7
+ "visibility_integral": 41.31481176614761,
8
+ "corridor_availability": 0.8930041271227377,
9
+ "reocclusion_rate": 0.002777777777777778,
10
+ "persistence_horizon_mae": 0.9662547600565393,
11
+ "disturbance_cost": 0.6302865350411998
12
+ },
13
+ "no_reveal_state_head": {
14
+ "mean_success": 0.3333333333333333,
15
+ "visibility_integral": 6.436306021831654,
16
+ "corridor_availability": 0.33413351644520406,
17
+ "reocclusion_rate": 0.008333333333333335,
18
+ "persistence_horizon_mae": 0.0,
19
+ "disturbance_cost": 0.5424560326393004
20
+ },
21
+ "no_world_model": {
22
+ "mean_success": 0.3333333333333333,
23
+ "visibility_integral": 6.436306021831654,
24
+ "corridor_availability": 0.33413351644520406,
25
+ "reocclusion_rate": 0.008333333333333335,
26
+ "persistence_horizon_mae": 4.395576057914128,
27
+ "disturbance_cost": 0.5424560326393004
28
+ },
29
+ "no_planner_reranking": {
30
+ "mean_success": 0.3333333333333333,
31
+ "visibility_integral": 6.436306021831654,
32
+ "corridor_availability": 0.33413351644520406,
33
+ "reocclusion_rate": 0.008333333333333335,
34
+ "persistence_horizon_mae": 4.395576057914128,
35
+ "disturbance_cost": 0.5424560326393004
36
+ },
37
+ "no_support_mode_conditioning": {
38
+ "mean_success": 0.35185185185185186,
39
+ "visibility_integral": 41.31481176614761,
40
+ "corridor_availability": 0.8930041271227377,
41
+ "reocclusion_rate": 0.002777777777777778,
42
+ "persistence_horizon_mae": 0.9662547600565393,
43
+ "disturbance_cost": 0.6302865350411998
44
+ },
45
+ "no_wrist_cameras": {
46
+ "mean_success": 0.35185185185185186,
47
+ "visibility_integral": 41.34216132428911,
48
+ "corridor_availability": 0.8971193510073202,
49
+ "reocclusion_rate": 0.0011574074074074073,
50
+ "persistence_horizon_mae": 0.9659118890357264,
51
+ "disturbance_cost": 0.6302977896950863
52
+ },
53
+ "no_global_camera": {
54
+ "mean_success": 0.35185185185185186,
55
+ "visibility_integral": 41.33038121020353,
56
+ "corridor_availability": 0.8943758684175985,
57
+ "reocclusion_rate": 0.002777777777777778,
58
+ "persistence_horizon_mae": 0.9659084288095618,
59
+ "disturbance_cost": 0.6303076523321646
60
+ }
61
+ },
62
+ "raw": {
63
+ "full_model": {
64
+ "per_task_success": {
65
+ "foliage_proxy": 0.3333333333333333,
66
+ "bag_proxy": 0.3888888888888889,
67
+ "cloth_proxy": 0.3333333333333333
68
+ },
69
+ "mean_success": 0.35185185185185186,
70
+ "visibility_integral": 41.31481176614761,
71
+ "corridor_availability": 0.8930041271227377,
72
+ "reocclusion_rate": 0.002777777777777778,
73
+ "persistence_horizon_mae": 0.9662547600565393,
74
+ "disturbance_cost": 0.6302865350411998
75
+ },
76
+ "no_reveal_state_head": {
77
+ "per_task_success": {
78
+ "foliage_proxy": 0.3333333333333333,
79
+ "bag_proxy": 0.3888888888888889,
80
+ "cloth_proxy": 0.2777777777777778
81
+ },
82
+ "mean_success": 0.3333333333333333,
83
+ "visibility_integral": 6.436306021831654,
84
+ "corridor_availability": 0.33413351644520406,
85
+ "reocclusion_rate": 0.008333333333333335,
86
+ "persistence_horizon_mae": null,
87
+ "disturbance_cost": 0.5424560326393004
88
+ },
89
+ "no_world_model": {
90
+ "per_task_success": {
91
+ "foliage_proxy": 0.3333333333333333,
92
+ "bag_proxy": 0.3888888888888889,
93
+ "cloth_proxy": 0.2777777777777778
94
+ },
95
+ "mean_success": 0.3333333333333333,
96
+ "visibility_integral": 6.436306021831654,
97
+ "corridor_availability": 0.33413351644520406,
98
+ "reocclusion_rate": 0.008333333333333335,
99
+ "persistence_horizon_mae": 4.395576057914128,
100
+ "disturbance_cost": 0.5424560326393004
101
+ },
102
+ "no_planner_reranking": {
103
+ "per_task_success": {
104
+ "foliage_proxy": 0.3333333333333333,
105
+ "bag_proxy": 0.3888888888888889,
106
+ "cloth_proxy": 0.2777777777777778
107
+ },
108
+ "mean_success": 0.3333333333333333,
109
+ "visibility_integral": 6.436306021831654,
110
+ "corridor_availability": 0.33413351644520406,
111
+ "reocclusion_rate": 0.008333333333333335,
112
+ "persistence_horizon_mae": 4.395576057914128,
113
+ "disturbance_cost": 0.5424560326393004
114
+ },
115
+ "no_support_mode_conditioning": {
116
+ "per_task_success": {
117
+ "foliage_proxy": 0.3333333333333333,
118
+ "bag_proxy": 0.3888888888888889,
119
+ "cloth_proxy": 0.3333333333333333
120
+ },
121
+ "mean_success": 0.35185185185185186,
122
+ "visibility_integral": 41.31481176614761,
123
+ "corridor_availability": 0.8930041271227377,
124
+ "reocclusion_rate": 0.002777777777777778,
125
+ "persistence_horizon_mae": 0.9662547600565393,
126
+ "disturbance_cost": 0.6302865350411998
127
+ },
128
+ "no_wrist_cameras": {
129
+ "per_task_success": {
130
+ "foliage_proxy": 0.3333333333333333,
131
+ "bag_proxy": 0.3888888888888889,
132
+ "cloth_proxy": 0.3333333333333333
133
+ },
134
+ "mean_success": 0.35185185185185186,
135
+ "visibility_integral": 41.34216132428911,
136
+ "corridor_availability": 0.8971193510073202,
137
+ "reocclusion_rate": 0.0011574074074074073,
138
+ "persistence_horizon_mae": 0.9659118890357264,
139
+ "disturbance_cost": 0.6302977896950863
140
+ },
141
+ "no_global_camera": {
142
+ "per_task_success": {
143
+ "foliage_proxy": 0.3333333333333333,
144
+ "bag_proxy": 0.3888888888888889,
145
+ "cloth_proxy": 0.3333333333333333
146
+ },
147
+ "mean_success": 0.35185185185185186,
148
+ "visibility_integral": 41.33038121020353,
149
+ "corridor_availability": 0.8943758684175985,
150
+ "reocclusion_rate": 0.002777777777777778,
151
+ "persistence_horizon_mae": 0.9659084288095618,
152
+ "disturbance_cost": 0.6303076523321646
153
+ }
154
+ },
155
+ "elapsed_seconds": 4835.7074475847185
156
+ }
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full_model": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.0,
5
+ "bag_proxy": 0.5,
6
+ "cloth_proxy": 0.5
7
+ },
8
+ "mean_success": 0.3333333333333333,
9
+ "visibility_integral": 42.51341059803963,
10
+ "corridor_availability": 0.9005658576885859,
11
+ "reocclusion_rate": 0.004166666666666667,
12
+ "persistence_horizon_mae": 1.121226767712281,
13
+ "disturbance_cost": 0.6488037866850694
14
+ },
15
+ "no_reveal_state_head": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.0,
18
+ "bag_proxy": 0.5,
19
+ "cloth_proxy": 0.25
20
+ },
21
+ "mean_success": 0.25,
22
+ "visibility_integral": 7.045467118422191,
23
+ "corridor_availability": 0.29356995907922584,
24
+ "reocclusion_rate": 0.009375,
25
+ "persistence_horizon_mae": null,
26
+ "disturbance_cost": 0.6037605715294679
27
+ },
28
+ "no_world_model": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.0,
31
+ "bag_proxy": 0.5,
32
+ "cloth_proxy": 0.25
33
+ },
34
+ "mean_success": 0.25,
35
+ "visibility_integral": 7.045467118422191,
36
+ "corridor_availability": 0.29356995907922584,
37
+ "reocclusion_rate": 0.009375,
38
+ "persistence_horizon_mae": 4.413126634701692,
39
+ "disturbance_cost": 0.6037605715294679
40
+ },
41
+ "no_planner_reranking": {
42
+ "per_task_success": {
43
+ "foliage_proxy": 0.0,
44
+ "bag_proxy": 0.5,
45
+ "cloth_proxy": 0.25
46
+ },
47
+ "mean_success": 0.25,
48
+ "visibility_integral": 7.045467118422191,
49
+ "corridor_availability": 0.29356995907922584,
50
+ "reocclusion_rate": 0.009375,
51
+ "persistence_horizon_mae": 4.413126634701692,
52
+ "disturbance_cost": 0.6037605715294679
53
+ },
54
+ "no_support_mode_conditioning": {
55
+ "per_task_success": {
56
+ "foliage_proxy": 0.0,
57
+ "bag_proxy": 0.5,
58
+ "cloth_proxy": 0.5
59
+ },
60
+ "mean_success": 0.3333333333333333,
61
+ "visibility_integral": 42.51341059803963,
62
+ "corridor_availability": 0.9005658576885859,
63
+ "reocclusion_rate": 0.004166666666666667,
64
+ "persistence_horizon_mae": 1.121226767712281,
65
+ "disturbance_cost": 0.6488037866850694
66
+ },
67
+ "no_wrist_cameras": {
68
+ "per_task_success": {
69
+ "foliage_proxy": 0.0,
70
+ "bag_proxy": 0.5,
71
+ "cloth_proxy": 0.5
72
+ },
73
+ "mean_success": 0.3333333333333333,
74
+ "visibility_integral": 42.57222665349642,
75
+ "corridor_availability": 0.9067386935154597,
76
+ "reocclusion_rate": 0.0020833333333333333,
77
+ "persistence_horizon_mae": 1.1191915943883144,
78
+ "disturbance_cost": 0.6488144403944412
79
+ },
80
+ "no_global_camera": {
81
+ "per_task_success": {
82
+ "foliage_proxy": 0.0,
83
+ "bag_proxy": 0.5,
84
+ "cloth_proxy": 0.5
85
+ },
86
+ "mean_success": 0.3333333333333333,
87
+ "visibility_integral": 42.558002611001335,
88
+ "corridor_availability": 0.9036522756020228,
89
+ "reocclusion_rate": 0.004166666666666667,
90
+ "persistence_horizon_mae": 1.1191876937919583,
91
+ "disturbance_cost": 0.6488229470948378
92
+ }
93
+ }
artifacts/outputs/reveal_runs/reveal_ablation_v4_clip_ep4/ablations.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.333
5
+ - visibility_integral: 42.513
6
+ - corridor_availability: 0.901
7
+ - reocclusion_rate: 0.004
8
+ - persistence_horizon_mae: 1.121
9
+ - disturbance_cost: 0.649
10
+
11
+ ## no_reveal_state_head
12
+ - mean_success: 0.250
13
+ - visibility_integral: 7.045
14
+ - corridor_availability: 0.294
15
+ - reocclusion_rate: 0.009
16
+ - persistence_horizon_mae: 0.000
17
+ - disturbance_cost: 0.604
18
+
19
+ ## no_world_model
20
+ - mean_success: 0.250
21
+ - visibility_integral: 7.045
22
+ - corridor_availability: 0.294
23
+ - reocclusion_rate: 0.009
24
+ - persistence_horizon_mae: 4.413
25
+ - disturbance_cost: 0.604
26
+
27
+ ## no_planner_reranking
28
+ - mean_success: 0.250
29
+ - visibility_integral: 7.045
30
+ - corridor_availability: 0.294
31
+ - reocclusion_rate: 0.009
32
+ - persistence_horizon_mae: 4.413
33
+ - disturbance_cost: 0.604
34
+
35
+ ## no_support_mode_conditioning
36
+ - mean_success: 0.333
37
+ - visibility_integral: 42.513
38
+ - corridor_availability: 0.901
39
+ - reocclusion_rate: 0.004
40
+ - persistence_horizon_mae: 1.121
41
+ - disturbance_cost: 0.649
42
+
43
+ ## no_wrist_cameras
44
+ - mean_success: 0.333
45
+ - visibility_integral: 42.572
46
+ - corridor_availability: 0.907
47
+ - reocclusion_rate: 0.002
48
+ - persistence_horizon_mae: 1.119
49
+ - disturbance_cost: 0.649
50
+
51
+ ## no_global_camera
52
+ - mean_success: 0.333
53
+ - visibility_integral: 42.558
54
+ - corridor_availability: 0.904
55
+ - reocclusion_rate: 0.004
56
+ - persistence_horizon_mae: 1.119
57
+ - disturbance_cost: 0.649
artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full_model": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.0,
5
+ "bag_proxy": 0.0,
6
+ "cloth_proxy": 0.0
7
+ },
8
+ "mean_success": 0.0,
9
+ "visibility_integral": 62.73463360468546,
10
+ "corridor_availability": 0.9864540547132492,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.419941000816309,
13
+ "disturbance_cost": 0.8931084167626169
14
+ },
15
+ "no_reveal_state_head": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.0,
18
+ "bag_proxy": 0.0,
19
+ "cloth_proxy": 0.0
20
+ },
21
+ "mean_success": 0.0,
22
+ "visibility_integral": 62.73463360468546,
23
+ "corridor_availability": 0.9864540547132492,
24
+ "reocclusion_rate": 0.0,
25
+ "persistence_horizon_mae": null,
26
+ "disturbance_cost": 0.8931084167626169
27
+ },
28
+ "no_world_model": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.0,
31
+ "bag_proxy": 0.0,
32
+ "cloth_proxy": 0.0
33
+ },
34
+ "mean_success": 0.0,
35
+ "visibility_integral": 62.73463360468546,
36
+ "corridor_availability": 0.9864540547132492,
37
+ "reocclusion_rate": 0.0,
38
+ "persistence_horizon_mae": 2.419941000816309,
39
+ "disturbance_cost": 0.8931084167626169
40
+ },
41
+ "no_planner_reranking": {
42
+ "per_task_success": {
43
+ "foliage_proxy": 0.0,
44
+ "bag_proxy": 0.0,
45
+ "cloth_proxy": 0.0
46
+ },
47
+ "mean_success": 0.0,
48
+ "visibility_integral": 62.73463360468546,
49
+ "corridor_availability": 0.9864540547132492,
50
+ "reocclusion_rate": 0.0,
51
+ "persistence_horizon_mae": 2.419941000816309,
52
+ "disturbance_cost": 0.8931084167626169
53
+ },
54
+ "no_support_mode_conditioning": {
55
+ "per_task_success": {
56
+ "foliage_proxy": 0.0,
57
+ "bag_proxy": 0.0,
58
+ "cloth_proxy": 0.0
59
+ },
60
+ "mean_success": 0.0,
61
+ "visibility_integral": 62.73463360468546,
62
+ "corridor_availability": 0.9864540547132492,
63
+ "reocclusion_rate": 0.0,
64
+ "persistence_horizon_mae": 2.419941000816309,
65
+ "disturbance_cost": 0.8931084167626169
66
+ },
67
+ "no_wrist_cameras": {
68
+ "per_task_success": {
69
+ "foliage_proxy": 0.0,
70
+ "bag_proxy": 0.0,
71
+ "cloth_proxy": 0.0
72
+ },
73
+ "mean_success": 0.0,
74
+ "visibility_integral": 62.738075998094345,
75
+ "corridor_availability": 0.9864540547132492,
76
+ "reocclusion_rate": 0.0,
77
+ "persistence_horizon_mae": 2.602942177767141,
78
+ "disturbance_cost": 0.8930594937668906
79
+ },
80
+ "no_global_camera": {
81
+ "per_task_success": {
82
+ "foliage_proxy": 0.0,
83
+ "bag_proxy": 0.0,
84
+ "cloth_proxy": 0.0
85
+ },
86
+ "mean_success": 0.0,
87
+ "visibility_integral": 62.777364783816864,
88
+ "corridor_availability": 0.9864540547132492,
89
+ "reocclusion_rate": 0.0,
90
+ "persistence_horizon_mae": 3.000429857770602,
91
+ "disturbance_cost": 0.8916233273016082
92
+ }
93
+ }
artifacts/outputs/reveal_runs/reveal_ablation_v4_det/ablations.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.000
5
+ - visibility_integral: 62.735
6
+ - corridor_availability: 0.986
7
+ - reocclusion_rate: 0.000
8
+ - persistence_horizon_mae: 2.420
9
+ - disturbance_cost: 0.893
10
+
11
+ ## no_reveal_state_head
12
+ - mean_success: 0.000
13
+ - visibility_integral: 62.735
14
+ - corridor_availability: 0.986
15
+ - reocclusion_rate: 0.000
16
+ - persistence_horizon_mae: 0.000
17
+ - disturbance_cost: 0.893
18
+
19
+ ## no_world_model
20
+ - mean_success: 0.000
21
+ - visibility_integral: 62.735
22
+ - corridor_availability: 0.986
23
+ - reocclusion_rate: 0.000
24
+ - persistence_horizon_mae: 2.420
25
+ - disturbance_cost: 0.893
26
+
27
+ ## no_planner_reranking
28
+ - mean_success: 0.000
29
+ - visibility_integral: 62.735
30
+ - corridor_availability: 0.986
31
+ - reocclusion_rate: 0.000
32
+ - persistence_horizon_mae: 2.420
33
+ - disturbance_cost: 0.893
34
+
35
+ ## no_support_mode_conditioning
36
+ - mean_success: 0.000
37
+ - visibility_integral: 62.735
38
+ - corridor_availability: 0.986
39
+ - reocclusion_rate: 0.000
40
+ - persistence_horizon_mae: 2.420
41
+ - disturbance_cost: 0.893
42
+
43
+ ## no_wrist_cameras
44
+ - mean_success: 0.000
45
+ - visibility_integral: 62.738
46
+ - corridor_availability: 0.986
47
+ - reocclusion_rate: 0.000
48
+ - persistence_horizon_mae: 2.603
49
+ - disturbance_cost: 0.893
50
+
51
+ ## no_global_camera
52
+ - mean_success: 0.000
53
+ - visibility_integral: 62.777
54
+ - corridor_availability: 0.986
55
+ - reocclusion_rate: 0.000
56
+ - persistence_horizon_mae: 3.000
57
+ - disturbance_cost: 0.892
artifacts/outputs/smoke/proxy_backbone_only_smoke/config_resolved.yaml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_backbone_only_smoke
2
+ output_dir: /workspace/outputs/smoke
3
+ device: cuda
4
+ seed: 11
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 64
11
+ train_episodes_per_proxy: 6
12
+ val_episodes_per_proxy: 2
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
15
+ rebuild_dataset: true
16
+ chunk_horizon: 4
17
+ rollout_horizon: 3
18
+ history_steps: 2
19
+ planner_candidates: 4
20
+ seed: 11
21
+ optim:
22
+ epochs: 2
23
+ batch_size: 8
24
+ num_workers: 0
25
+ lr: 0.001
26
+ weight_decay: 0.0001
27
+ trainer:
28
+ policy_type: backbone_only
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: false
34
+ plan_during_eval: false
35
+ support_mode_conditioning: true
36
+ policy:
37
+ backbone:
38
+ model_name: openai/clip-vit-base-patch32
39
+ hidden_dim: 64
40
+ max_text_tokens: 32
41
+ freeze_backbone: true
42
+ gradient_checkpointing: false
43
+ use_dummy_backbone: true
44
+ fusion:
45
+ hidden_dim: 64
46
+ num_cameras: 3
47
+ num_layers: 2
48
+ num_heads: 4
49
+ ff_dim: 128
50
+ dropout: 0.1
51
+ proprio_dim: 32
52
+ proprio_tokens: 1
53
+ memory:
54
+ hidden_dim: 64
55
+ history_steps: 2
56
+ num_layers: 1
57
+ dropout: 0.1
58
+ decoder:
59
+ hidden_dim: 64
60
+ num_heads: 4
61
+ num_layers: 2
62
+ ff_dim: 128
63
+ dropout: 0.1
64
+ chunk_size: 4
65
+ action_dim: 14
66
+ arm_action_dim: 7
67
+ num_candidates: 4
68
+ reveal_head:
69
+ hidden_dim: 64
70
+ num_support_modes: 3
71
+ num_approach_templates: 32
72
+ rollout_horizon: 3
73
+ belief_map_size: 32
74
+ field_size: 16
75
+ num_heads: 4
76
+ predict_belief_map: true
77
+ world_model:
78
+ hidden_dim: 64
79
+ action_dim: 14
80
+ num_support_modes: 3
81
+ num_approach_templates: 32
82
+ rollout_horizon: 3
83
+ planner:
84
+ hidden_dim: 64
85
+ num_candidates: 4
86
+ action_dim: 14
87
+ utility_margin: 0.1
88
+ loss_weights:
89
+ action: 1.0
90
+ support_mode: 0.0
91
+ corridor: 0.0
92
+ persistence: 0.0
93
+ disturbance: 0.0
94
+ world_model: 0.0
95
+ belief: 0.0
96
+ planner_success: 0.0
97
+ planner_risk: 0.0
98
+ planner_ranking: 0.0
artifacts/outputs/smoke/proxy_backbone_only_smoke/metrics.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.16355294627802713,
6
+ "planner_ranking": 0.0,
7
+ "planner_risk": 0.0,
8
+ "planner_success": 0.0,
9
+ "total": 0.16355294627802713,
10
+ "world_model": 0.0
11
+ },
12
+ "val": {
13
+ "action": 0.06914255395531654,
14
+ "planner_ranking": 0.0,
15
+ "planner_risk": 0.0,
16
+ "planner_success": 0.0,
17
+ "total": 0.06914255395531654,
18
+ "world_model": 0.0
19
+ }
20
+ },
21
+ {
22
+ "epoch": 1,
23
+ "train": {
24
+ "action": 0.07098196234021868,
25
+ "planner_ranking": 0.0,
26
+ "planner_risk": 0.0,
27
+ "planner_success": 0.0,
28
+ "total": 0.07098196234021868,
29
+ "world_model": 0.0
30
+ },
31
+ "val": {
32
+ "action": 0.05190564692020416,
33
+ "planner_ranking": 0.0,
34
+ "planner_risk": 0.0,
35
+ "planner_success": 0.0,
36
+ "total": 0.05190564692020416,
37
+ "world_model": 0.0
38
+ }
39
+ }
40
+ ]
artifacts/outputs/smoke/proxy_reveal_state_smoke/config_resolved.yaml ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_reveal_state_smoke
2
+ output_dir: /workspace/outputs/smoke
3
+ device: cuda
4
+ seed: 11
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 64
11
+ train_episodes_per_proxy: 6
12
+ val_episodes_per_proxy: 2
13
+ train_dataset_path: /workspace/data/reveal_proxy/proxy_train_smoke_v4.pt
14
+ val_dataset_path: /workspace/data/reveal_proxy/proxy_val_smoke_v4.pt
15
+ rebuild_dataset: false
16
+ chunk_horizon: 4
17
+ rollout_horizon: 3
18
+ history_steps: 2
19
+ planner_candidates: 4
20
+ seed: 11
21
+ optim:
22
+ epochs: 2
23
+ batch_size: 8
24
+ num_workers: 0
25
+ lr: 0.001
26
+ weight_decay: 0.0001
27
+ trainer:
28
+ policy_type: reveal_state
29
+ use_bf16: true
30
+ grad_clip_norm: 1.0
31
+ freeze_backbone: true
32
+ gradient_checkpointing: false
33
+ plan_during_train: true
34
+ plan_during_eval: true
35
+ support_mode_conditioning: true
36
+ policy:
37
+ backbone:
38
+ model_name: openai/clip-vit-base-patch32
39
+ hidden_dim: 64
40
+ max_text_tokens: 32
41
+ freeze_backbone: true
42
+ gradient_checkpointing: false
43
+ use_dummy_backbone: true
44
+ fusion:
45
+ hidden_dim: 64
46
+ num_cameras: 3
47
+ num_layers: 2
48
+ num_heads: 4
49
+ ff_dim: 128
50
+ dropout: 0.1
51
+ proprio_dim: 32
52
+ proprio_tokens: 1
53
+ memory:
54
+ hidden_dim: 64
55
+ history_steps: 2
56
+ num_layers: 1
57
+ dropout: 0.1
58
+ decoder:
59
+ hidden_dim: 64
60
+ num_heads: 4
61
+ num_layers: 2
62
+ ff_dim: 128
63
+ dropout: 0.1
64
+ chunk_size: 4
65
+ action_dim: 14
66
+ arm_action_dim: 7
67
+ num_candidates: 4
68
+ reveal_head:
69
+ hidden_dim: 64
70
+ num_support_modes: 3
71
+ num_approach_templates: 32
72
+ rollout_horizon: 3
73
+ belief_map_size: 32
74
+ field_size: 16
75
+ num_heads: 4
76
+ predict_belief_map: true
77
+ world_model:
78
+ hidden_dim: 64
79
+ action_dim: 14
80
+ num_support_modes: 3
81
+ num_approach_templates: 32
82
+ rollout_horizon: 3
83
+ planner:
84
+ hidden_dim: 64
85
+ num_candidates: 4
86
+ action_dim: 14
87
+ utility_margin: 0.1
88
+ loss_weights:
89
+ action: 1.0
90
+ support_mode: 0.15
91
+ corridor: 0.2
92
+ persistence: 0.1
93
+ disturbance: 0.1
94
+ world_model: 0.2
95
+ belief: 0.05
96
+ planner_success: 0.2
97
+ planner_risk: 0.1
98
+ planner_ranking: 0.1
artifacts/outputs/smoke/proxy_reveal_state_smoke/metrics.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.1632463155048234,
6
+ "belief": 0.5288754488740649,
7
+ "corridor": 0.5060673143182483,
8
+ "disturbance": 0.08440807514957019,
9
+ "persistence": 1.7013718911579676,
10
+ "planner_ranking": 0.09609956932919365,
11
+ "planner_risk": 0.08803637006453105,
12
+ "planner_success": 0.6524881209645953,
13
+ "reocclusion": 0.7463519998959133,
14
+ "support_mode": 0.8362165178571429,
15
+ "total": 2.236373339380537,
16
+ "uncertainty": 0.572694114276341,
17
+ "world_model": 7.359470299312046
18
+ },
19
+ "val": {
20
+ "action": 0.07105841860175133,
21
+ "belief": 0.3773516118526459,
22
+ "corridor": 0.37264925241470337,
23
+ "disturbance": 0.03807383216917515,
24
+ "persistence": 1.6277075409889221,
25
+ "planner_ranking": 0.09990942850708961,
26
+ "planner_risk": 0.08137237653136253,
27
+ "planner_success": 0.6634204685688019,
28
+ "reocclusion": 0.7349686622619629,
29
+ "support_mode": 0.7270728349685669,
30
+ "total": 1.5740689039230347,
31
+ "uncertainty": 0.5246226787567139,
32
+ "world_model": 4.816080331802368
33
+ }
34
+ },
35
+ {
36
+ "epoch": 1,
37
+ "train": {
38
+ "action": 0.07130091797028269,
39
+ "belief": 0.31657502480915617,
40
+ "corridor": 0.35610165766307283,
41
+ "disturbance": 0.022400280194623128,
42
+ "persistence": 1.5677628857748849,
43
+ "planner_ranking": 0.09598331153392792,
44
+ "planner_risk": 0.07021375798753329,
45
+ "planner_success": 0.6178554126194545,
46
+ "reocclusion": 0.7833022390093122,
47
+ "support_mode": 0.7107979910714286,
48
+ "total": 1.2336589864322118,
49
+ "uncertainty": 0.40418908851487295,
50
+ "world_model": 3.2488711902073453
51
+ },
52
+ "val": {
53
+ "action": 0.0499270036816597,
54
+ "belief": 0.24123625457286835,
55
+ "corridor": 0.2881518602371216,
56
+ "disturbance": 0.009773310273885727,
57
+ "persistence": 1.7411235570907593,
58
+ "planner_ranking": 0.1002776250243187,
59
+ "planner_risk": 0.05218701809644699,
60
+ "planner_success": 0.6680125892162323,
61
+ "reocclusion": 0.7738973498344421,
62
+ "support_mode": 0.6901583671569824,
63
+ "total": 0.9521919786930084,
64
+ "uncertainty": 0.25171157717704773,
65
+ "world_model": 1.9355762600898743
66
+ }
67
+ }
68
+ ]
artifacts/outputs/smoke/reveal_ablation_ep2/ablations.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full_model": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.0,
5
+ "bag_proxy": 0.0,
6
+ "cloth_proxy": 0.0
7
+ },
8
+ "mean_success": 0.0,
9
+ "visibility_integral": 62.62329292297363,
10
+ "corridor_availability": 0.9855967164039612,
11
+ "reocclusion_rate": 0.0020833333333333333,
12
+ "persistence_horizon_mae": 2.8106139592826365,
13
+ "disturbance_cost": 0.8984478116035461
14
+ },
15
+ "no_reveal_state_head": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.0,
18
+ "bag_proxy": 0.0,
19
+ "cloth_proxy": 0.0
20
+ },
21
+ "mean_success": 0.0,
22
+ "visibility_integral": 62.666042963663735,
23
+ "corridor_availability": 0.9876543283462524,
24
+ "reocclusion_rate": 0.0,
25
+ "persistence_horizon_mae": null,
26
+ "disturbance_cost": 0.8984478116035461
27
+ },
28
+ "no_world_model": {
29
+ "per_task_success": {
30
+ "foliage_proxy": 0.0,
31
+ "bag_proxy": 0.0,
32
+ "cloth_proxy": 0.0
33
+ },
34
+ "mean_success": 0.0,
35
+ "visibility_integral": 62.666042963663735,
36
+ "corridor_availability": 0.9876543283462524,
37
+ "reocclusion_rate": 0.0,
38
+ "persistence_horizon_mae": 2.8120017565786837,
39
+ "disturbance_cost": 0.8984478116035461
40
+ },
41
+ "no_planner_reranking": {
42
+ "per_task_success": {
43
+ "foliage_proxy": 0.0,
44
+ "bag_proxy": 0.0,
45
+ "cloth_proxy": 0.0
46
+ },
47
+ "mean_success": 0.0,
48
+ "visibility_integral": 62.666042963663735,
49
+ "corridor_availability": 0.9876543283462524,
50
+ "reocclusion_rate": 0.0,
51
+ "persistence_horizon_mae": 2.8120017565786837,
52
+ "disturbance_cost": 0.8984478116035461
53
+ },
54
+ "no_support_mode_conditioning": {
55
+ "per_task_success": {
56
+ "foliage_proxy": 0.0,
57
+ "bag_proxy": 0.0,
58
+ "cloth_proxy": 0.0
59
+ },
60
+ "mean_success": 0.0,
61
+ "visibility_integral": 62.53779284159342,
62
+ "corridor_availability": 0.9855967164039612,
63
+ "reocclusion_rate": 0.0020833333333333333,
64
+ "persistence_horizon_mae": 2.808507453898589,
65
+ "disturbance_cost": 0.8984478116035461
66
+ },
67
+ "no_wrist_cameras": {
68
+ "per_task_success": {
69
+ "foliage_proxy": 0.0,
70
+ "bag_proxy": 0.0,
71
+ "cloth_proxy": 0.0
72
+ },
73
+ "mean_success": 0.0,
74
+ "visibility_integral": 62.67268816630045,
75
+ "corridor_availability": 0.9876543283462524,
76
+ "reocclusion_rate": 0.0,
77
+ "persistence_horizon_mae": 2.8158031940460204,
78
+ "disturbance_cost": 0.8982548316319784
79
+ },
80
+ "no_global_camera": {
81
+ "per_task_success": {
82
+ "foliage_proxy": 0.0,
83
+ "bag_proxy": 0.0,
84
+ "cloth_proxy": 0.0
85
+ },
86
+ "mean_success": 0.0,
87
+ "visibility_integral": 62.668721516927086,
88
+ "corridor_availability": 0.9876543283462524,
89
+ "reocclusion_rate": 0.0,
90
+ "persistence_horizon_mae": 2.8175474738081294,
91
+ "disturbance_cost": 0.8983920911947886
92
+ }
93
+ }
artifacts/outputs/smoke/reveal_ablation_ep2/ablations.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Ablations
2
+
3
+ ## full_model
4
+ - mean_success: 0.000
5
+ - visibility_integral: 62.623
6
+ - corridor_availability: 0.986
7
+ - reocclusion_rate: 0.002
8
+ - persistence_horizon_mae: 2.811
9
+ - disturbance_cost: 0.898
10
+
11
+ ## no_reveal_state_head
12
+ - mean_success: 0.000
13
+ - visibility_integral: 62.666
14
+ - corridor_availability: 0.988
15
+ - reocclusion_rate: 0.000
16
+ - persistence_horizon_mae: 0.000
17
+ - disturbance_cost: 0.898
18
+
19
+ ## no_world_model
20
+ - mean_success: 0.000
21
+ - visibility_integral: 62.666
22
+ - corridor_availability: 0.988
23
+ - reocclusion_rate: 0.000
24
+ - persistence_horizon_mae: 2.812
25
+ - disturbance_cost: 0.898
26
+
27
+ ## no_planner_reranking
28
+ - mean_success: 0.000
29
+ - visibility_integral: 62.666
30
+ - corridor_availability: 0.988
31
+ - reocclusion_rate: 0.000
32
+ - persistence_horizon_mae: 2.812
33
+ - disturbance_cost: 0.898
34
+
35
+ ## no_support_mode_conditioning
36
+ - mean_success: 0.000
37
+ - visibility_integral: 62.538
38
+ - corridor_availability: 0.986
39
+ - reocclusion_rate: 0.002
40
+ - persistence_horizon_mae: 2.809
41
+ - disturbance_cost: 0.898
42
+
43
+ ## no_wrist_cameras
44
+ - mean_success: 0.000
45
+ - visibility_integral: 62.673
46
+ - corridor_availability: 0.988
47
+ - reocclusion_rate: 0.000
48
+ - persistence_horizon_mae: 2.816
49
+ - disturbance_cost: 0.898
50
+
51
+ ## no_global_camera
52
+ - mean_success: 0.000
53
+ - visibility_integral: 62.669
54
+ - corridor_availability: 0.988
55
+ - reocclusion_rate: 0.000
56
+ - persistence_horizon_mae: 2.818
57
+ - disturbance_cost: 0.898
artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backbone": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.0,
5
+ "bag_proxy": 0.0,
6
+ "cloth_proxy": 0.0
7
+ },
8
+ "mean_success": 0.0,
9
+ "visibility_integral": 62.556946436564125,
10
+ "corridor_availability": 0.9876543283462524,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 0.0,
13
+ "disturbance_cost": 0.9013666311899821
14
+ },
15
+ "reveal": {
16
+ "per_task_success": {
17
+ "foliage_proxy": 0.0,
18
+ "bag_proxy": 0.0,
19
+ "cloth_proxy": 0.0
20
+ },
21
+ "mean_success": 0.0,
22
+ "visibility_integral": 62.6706740061442,
23
+ "corridor_availability": 0.9876543283462524,
24
+ "reocclusion_rate": 0.0,
25
+ "persistence_horizon_mae": 2.812001740684112,
26
+ "disturbance_cost": 0.8983492453893026
27
+ }
28
+ }
artifacts/outputs/smoke/reveal_eval_ep2/reveal_benchmark.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## backbone
4
+ - checkpoint: /workspace/outputs/smoke/proxy_backbone_only_smoke/checkpoint_best.pt
5
+ - mean_success: 0.000
6
+ - visibility_integral: 62.557
7
+ - corridor_availability: 0.988
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 0.000
10
+ - disturbance_cost: 0.901
11
+ - foliage_proxy_success: 0.000
12
+ - bag_proxy_success: 0.000
13
+ - cloth_proxy_success: 0.000
14
+
15
+ ## reveal
16
+ - checkpoint: /workspace/outputs/smoke/proxy_reveal_state_smoke/checkpoint_best.pt
17
+ - mean_success: 0.000
18
+ - visibility_integral: 62.671
19
+ - corridor_availability: 0.988
20
+ - reocclusion_rate: 0.000
21
+ - persistence_horizon_mae: 2.812
22
+ - disturbance_cost: 0.898
23
+ - foliage_proxy_success: 0.000
24
+ - bag_proxy_success: 0.000
25
+ - cloth_proxy_success: 0.000
code/reveal_vla_bimanual/eval/run_ablations.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
  import argparse
4
  import json
5
  from pathlib import Path
 
6
 
7
  from eval.ablations import MANDATORY_ABLATIONS
8
  from eval.report import write_comparison_report
@@ -19,6 +20,7 @@ def main() -> None:
19
  parser.add_argument("--resolution", type=int, default=None)
20
  parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
21
  parser.add_argument("--proxies", nargs="*", default=None)
 
22
  args = parser.parse_args()
23
 
24
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -28,10 +30,25 @@ def main() -> None:
28
  output_root = Path(args.output_root)
29
  output_root.mkdir(parents=True, exist_ok=True)
30
 
 
 
31
  sections = {}
32
  raw = {}
33
- for ablation in (None, *MANDATORY_ABLATIONS):
 
 
 
 
 
 
 
 
 
 
34
  label = "full_model" if ablation is None else ablation
 
 
 
35
  metrics = evaluate_model(
36
  model=model,
37
  device=device,
@@ -57,8 +74,20 @@ def main() -> None:
57
  "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
58
  "disturbance_cost": metrics.disturbance_cost or 0.0,
59
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
- json_path = output_root / "ablations.json"
62
  json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
63
  write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
64
  print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
 
3
  import argparse
4
  import json
5
  from pathlib import Path
6
+ import time
7
 
8
  from eval.ablations import MANDATORY_ABLATIONS
9
  from eval.report import write_comparison_report
 
20
  parser.add_argument("--resolution", type=int, default=None)
21
  parser.add_argument("--output-root", default="/workspace/reports/reveal_ablation")
22
  parser.add_argument("--proxies", nargs="*", default=None)
23
+ parser.add_argument("--resume", action="store_true")
24
  args = parser.parse_args()
25
 
26
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
30
  output_root = Path(args.output_root)
31
  output_root.mkdir(parents=True, exist_ok=True)
32
 
33
+ json_path = output_root / "ablations.json"
34
+ partial_path = output_root / "ablations.partial.json"
35
  sections = {}
36
  raw = {}
37
+ completed_labels: set[str] = set()
38
+ if args.resume and partial_path.exists():
39
+ partial = json.loads(partial_path.read_text(encoding="utf-8"))
40
+ raw = partial.get("raw", {})
41
+ sections = partial.get("sections", {})
42
+ completed_labels = set(raw)
43
+ print(json.dumps({"resume_from": str(partial_path), "completed": sorted(completed_labels)}, indent=2))
44
+
45
+ ablations = (None, *MANDATORY_ABLATIONS)
46
+ start_time = time.monotonic()
47
+ for index, ablation in enumerate(ablations, start=1):
48
  label = "full_model" if ablation is None else ablation
49
+ if label in completed_labels:
50
+ continue
51
+ print(json.dumps({"running": label, "index": index, "total": len(ablations)}, indent=2))
52
  metrics = evaluate_model(
53
  model=model,
54
  device=device,
 
74
  "persistence_horizon_mae": metrics.persistence_horizon_mae or 0.0,
75
  "disturbance_cost": metrics.disturbance_cost or 0.0,
76
  }
77
+ partial_path.write_text(
78
+ json.dumps(
79
+ {
80
+ "checkpoint": args.checkpoint,
81
+ "episodes": args.episodes,
82
+ "sections": sections,
83
+ "raw": raw,
84
+ "elapsed_seconds": time.monotonic() - start_time,
85
+ },
86
+ indent=2,
87
+ ),
88
+ encoding="utf-8",
89
+ )
90
 
 
91
  json_path.write_text(json.dumps(raw, indent=2), encoding="utf-8")
92
  write_comparison_report(output_root / "ablations.md", "Reveal Ablations", sections)
93
  print(json.dumps({"output_json": str(json_path), "sections": sections}, indent=2))
code/reveal_vla_bimanual/eval/run_reveal_benchmark.py CHANGED
@@ -23,6 +23,7 @@ from eval.report import write_comparison_report
23
  from models.action_decoder import ChunkDecoderConfig
24
  from models.backbones import FrozenVLBackboneConfig
25
  from models.multiview_fusion import MultiViewFusionConfig
 
26
  from models.planner import PlannerConfig
27
  from models.policy import PolicyConfig
28
  from models.reveal_head import RevealHeadConfig
@@ -35,6 +36,7 @@ def _policy_config_from_dict(cfg: dict[str, Any]) -> PolicyConfig:
35
  return PolicyConfig(
36
  backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
37
  fusion=MultiViewFusionConfig(**cfg["fusion"]),
 
38
  decoder=ChunkDecoderConfig(**cfg["decoder"]),
39
  reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
40
  world_model=RevealWMConfig(**cfg["world_model"]),
@@ -56,11 +58,33 @@ def load_model(checkpoint_path: str | Path, device: torch.device) -> tuple[torch
56
  return model, checkpoint
57
 
58
 
59
- def _prepare_batch(observation: dict[str, Any], device: torch.device) -> dict[str, Any]:
 
 
 
 
 
60
  images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
61
  proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  return {
63
  "images": images.to(device),
 
 
64
  "proprio": proprio.to(device),
65
  "texts": [observation["text"]],
66
  }
@@ -83,6 +107,8 @@ def select_chunk(
83
  images = _apply_camera_ablation(batch["images"], ablation)
84
  forward_kwargs = {
85
  "images": images,
 
 
86
  "proprio": batch["proprio"],
87
  "texts": batch["texts"],
88
  }
@@ -118,6 +144,7 @@ def evaluate_model(
118
  reocclusion_scores = []
119
  persistence_errors = []
120
  disturbance_scores = []
 
121
 
122
  for proxy_offset, proxy_name in enumerate(proxies):
123
  successes = []
@@ -131,12 +158,25 @@ def evaluate_model(
131
  episode_visibility = [float(privileged_state["visibility"])]
132
  episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
133
  episode_disturbance = [float(privileged_state["disturbance_cost"])]
 
 
134
  done = False
135
  while not done:
136
- batch = _prepare_batch(observation, device=device)
 
 
 
 
 
137
  with torch.no_grad():
138
  chunk, outputs = select_chunk(model, batch, ablation=ablation)
139
  action = chunk[0, 0].detach().cpu().numpy()
 
 
 
 
 
 
140
  observation, _, terminated, truncated, privileged_state = env.step(action)
141
  episode_visibility.append(float(privileged_state["visibility"]))
142
  episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
 
23
  from models.action_decoder import ChunkDecoderConfig
24
  from models.backbones import FrozenVLBackboneConfig
25
  from models.multiview_fusion import MultiViewFusionConfig
26
+ from models.observation_memory import ObservationMemoryConfig
27
  from models.planner import PlannerConfig
28
  from models.policy import PolicyConfig
29
  from models.reveal_head import RevealHeadConfig
 
36
  return PolicyConfig(
37
  backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
38
  fusion=MultiViewFusionConfig(**cfg["fusion"]),
39
+ memory=ObservationMemoryConfig(**cfg.get("memory", {})),
40
  decoder=ChunkDecoderConfig(**cfg["decoder"]),
41
  reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
42
  world_model=RevealWMConfig(**cfg["world_model"]),
 
58
  return model, checkpoint
59
 
60
 
61
+ def _prepare_batch(
62
+ observation: dict[str, Any],
63
+ device: torch.device,
64
+ history_images: list[np.ndarray] | None = None,
65
+ history_proprio: list[np.ndarray] | None = None,
66
+ ) -> dict[str, Any]:
67
  images = torch.from_numpy(observation["images"]).permute(0, 3, 1, 2).unsqueeze(0).float() / 255.0
68
  proprio = torch.from_numpy(observation["proprio"]).unsqueeze(0).float()
69
+ history_images = history_images or []
70
+ history_proprio = history_proprio or []
71
+ if history_images:
72
+ history_images_tensor = (
73
+ torch.from_numpy(np.stack(history_images, axis=0)).permute(0, 1, 4, 2, 3).unsqueeze(0).float() / 255.0
74
+ )
75
+ else:
76
+ history_images_tensor = torch.zeros(
77
+ (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
78
+ dtype=torch.float32,
79
+ )
80
+ if history_proprio:
81
+ history_proprio_tensor = torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).float()
82
+ else:
83
+ history_proprio_tensor = torch.zeros((1, 0, proprio.shape[-1]), dtype=torch.float32)
84
  return {
85
  "images": images.to(device),
86
+ "history_images": history_images_tensor.to(device),
87
+ "history_proprio": history_proprio_tensor.to(device),
88
  "proprio": proprio.to(device),
89
  "texts": [observation["text"]],
90
  }
 
107
  images = _apply_camera_ablation(batch["images"], ablation)
108
  forward_kwargs = {
109
  "images": images,
110
+ "history_images": batch.get("history_images"),
111
+ "history_proprio": batch.get("history_proprio"),
112
  "proprio": batch["proprio"],
113
  "texts": batch["texts"],
114
  }
 
144
  reocclusion_scores = []
145
  persistence_errors = []
146
  disturbance_scores = []
147
+ history_steps = int(getattr(model.config.memory, "history_steps", 0)) if hasattr(model, "config") else 0
148
 
149
  for proxy_offset, proxy_name in enumerate(proxies):
150
  successes = []
 
158
  episode_visibility = [float(privileged_state["visibility"])]
159
  episode_corridor = [float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any())]
160
  episode_disturbance = [float(privileged_state["disturbance_cost"])]
161
+ history_images: list[np.ndarray] = []
162
+ history_proprio: list[np.ndarray] = []
163
  done = False
164
  while not done:
165
+ batch = _prepare_batch(
166
+ observation,
167
+ device=device,
168
+ history_images=history_images,
169
+ history_proprio=history_proprio,
170
+ )
171
  with torch.no_grad():
172
  chunk, outputs = select_chunk(model, batch, ablation=ablation)
173
  action = chunk[0, 0].detach().cpu().numpy()
174
+ if history_steps > 0:
175
+ if len(history_images) >= history_steps:
176
+ history_images = history_images[-history_steps + 1 :]
177
+ history_proprio = history_proprio[-history_steps + 1 :]
178
+ history_images.append(observation["images"])
179
+ history_proprio.append(observation["proprio"])
180
  observation, _, terminated, truncated, privileged_state = env.step(action)
181
  episode_visibility.append(float(privileged_state["visibility"]))
182
  episode_corridor.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
code/reveal_vla_bimanual/eval/run_rlbench_rollout_eval.py CHANGED
@@ -18,6 +18,7 @@ from rlbench.environment import Environment
18
  from models.action_decoder import ChunkDecoderConfig
19
  from models.backbones import FrozenVLBackboneConfig
20
  from models.multiview_fusion import MultiViewFusionConfig
 
21
  from models.planner import PlannerConfig
22
  from models.policy import PolicyConfig
23
  from models.reveal_head import RevealHeadConfig
@@ -31,6 +32,7 @@ def _policy_config_from_checkpoint(checkpoint: dict[str, Any]) -> PolicyConfig:
31
  return PolicyConfig(
32
  backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
33
  fusion=MultiViewFusionConfig(**cfg["fusion"]),
 
34
  decoder=ChunkDecoderConfig(**cfg["decoder"]),
35
  reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
36
  world_model=RevealWMConfig(**cfg["world_model"]),
@@ -56,6 +58,7 @@ def main() -> None:
56
  parser.add_argument("--resolution", type=int, default=224)
57
  parser.add_argument("--device", default="cuda")
58
  parser.add_argument("--plan", action="store_true")
 
59
  parser.add_argument("--disable-support-mode-conditioning", action="store_true")
60
  parser.add_argument("--headless", action="store_true", default=True)
61
  args = parser.parse_args()
@@ -67,6 +70,18 @@ def main() -> None:
67
  model = build_policy(policy_config, trainer_config).to(device)
68
  model.load_state_dict(checkpoint["state_dict"], strict=True)
69
  model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  obs_config = create_obs_config(
72
  ["front", "wrist_left", "wrist_right"],
@@ -87,13 +102,16 @@ def main() -> None:
87
 
88
  results: dict[str, Any] = {
89
  "checkpoint": str(Path(args.checkpoint).resolve()),
90
- "plan": bool(args.plan),
 
91
  "support_mode_conditioning": not args.disable_support_mode_conditioning,
92
  "episodes_per_task": args.episodes_per_task,
93
  "episode_length": args.episode_length,
94
  "resolution": args.resolution,
95
  "tasks": {},
96
  }
 
 
97
 
98
  try:
99
  env.launch()
@@ -107,6 +125,9 @@ def main() -> None:
107
  language_goal = _episode_language_goal(descriptions)
108
  total_reward = 0.0
109
  success = 0.0
 
 
 
110
  for timestep in range(args.episode_length):
111
  images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
112
  proprio = torch.from_numpy(
@@ -117,21 +138,53 @@ def main() -> None:
117
  target_dim=policy_config.fusion.proprio_dim,
118
  )
119
  ).unsqueeze(0).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  with torch.no_grad():
121
  if trainer_config.policy_type == "reveal_state":
122
  outputs = model(
123
  images=images,
124
  proprio=proprio,
125
  texts=[language_goal],
126
- plan=args.plan,
 
 
127
  support_mode_conditioning=not args.disable_support_mode_conditioning,
128
  )
129
  else:
130
- outputs = model(images=images, proprio=proprio, texts=[language_goal])
 
 
 
 
 
 
131
  chosen_chunk = outputs["action_mean"]
132
- if args.plan and "planned_chunk" in outputs:
133
  chosen_chunk = outputs["planned_chunk"]
134
  step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
 
 
 
 
 
 
135
  env_action = absolute_action_from_delta(obs, step_action, ignore_collisions=True)
136
  obs, reward, done = task.step(env_action)
137
  total_reward += float(reward)
@@ -160,7 +213,8 @@ def main() -> None:
160
  "# RLBench Rollout Eval",
161
  "",
162
  f"- Checkpoint: `{results['checkpoint']}`",
163
- f"- Plan enabled: `{results['plan']}`",
 
164
  f"- Support-mode conditioning: `{results['support_mode_conditioning']}`",
165
  f"- Mean success: `{results['mean_success']:.3f}`",
166
  "",
 
18
  from models.action_decoder import ChunkDecoderConfig
19
  from models.backbones import FrozenVLBackboneConfig
20
  from models.multiview_fusion import MultiViewFusionConfig
21
+ from models.observation_memory import ObservationMemoryConfig
22
  from models.planner import PlannerConfig
23
  from models.policy import PolicyConfig
24
  from models.reveal_head import RevealHeadConfig
 
32
  return PolicyConfig(
33
  backbone=FrozenVLBackboneConfig(**cfg["backbone"]),
34
  fusion=MultiViewFusionConfig(**cfg["fusion"]),
35
+ memory=ObservationMemoryConfig(**cfg.get("memory", {})),
36
  decoder=ChunkDecoderConfig(**cfg["decoder"]),
37
  reveal_head=RevealHeadConfig(**cfg["reveal_head"]),
38
  world_model=RevealWMConfig(**cfg["world_model"]),
 
58
  parser.add_argument("--resolution", type=int, default=224)
59
  parser.add_argument("--device", default="cuda")
60
  parser.add_argument("--plan", action="store_true")
61
+ parser.add_argument("--allow-unsupervised-planning", action="store_true")
62
  parser.add_argument("--disable-support-mode-conditioning", action="store_true")
63
  parser.add_argument("--headless", action="store_true", default=True)
64
  args = parser.parse_args()
 
70
  model = build_policy(policy_config, trainer_config).to(device)
71
  model.load_state_dict(checkpoint["state_dict"], strict=True)
72
  model.eval()
73
+ plan_requested = bool(args.plan)
74
+ plan_applied = plan_requested and trainer_config.policy_type == "reveal_state"
75
+ planning_note = None
76
+ if plan_requested and trainer_config.policy_type != "reveal_state":
77
+ plan_applied = False
78
+ planning_note = "Planner requested for a backbone-only checkpoint; evaluating the backbone policy only."
79
+ elif plan_requested and trainer_config.policy_type == "reveal_state" and not args.allow_unsupervised_planning:
80
+ plan_applied = False
81
+ planning_note = (
82
+ "RLBench batches do not provide reveal supervision. Unsupervised reveal planning was disabled; "
83
+ "use --allow-unsupervised-planning to override."
84
+ )
85
 
86
  obs_config = create_obs_config(
87
  ["front", "wrist_left", "wrist_right"],
 
102
 
103
  results: dict[str, Any] = {
104
  "checkpoint": str(Path(args.checkpoint).resolve()),
105
+ "plan_requested": plan_requested,
106
+ "plan_applied": plan_applied,
107
  "support_mode_conditioning": not args.disable_support_mode_conditioning,
108
  "episodes_per_task": args.episodes_per_task,
109
  "episode_length": args.episode_length,
110
  "resolution": args.resolution,
111
  "tasks": {},
112
  }
113
+ if planning_note is not None:
114
+ results["planning_note"] = planning_note
115
 
116
  try:
117
  env.launch()
 
125
  language_goal = _episode_language_goal(descriptions)
126
  total_reward = 0.0
127
  success = 0.0
128
+ history_images: list[np.ndarray] = []
129
+ history_proprio: list[np.ndarray] = []
130
+ history_steps = int(getattr(policy_config.memory, "history_steps", 0))
131
  for timestep in range(args.episode_length):
132
  images = stack_live_rgb_obs(obs, resolution=args.resolution).unsqueeze(0).to(device)
133
  proprio = torch.from_numpy(
 
138
  target_dim=policy_config.fusion.proprio_dim,
139
  )
140
  ).unsqueeze(0).to(device)
141
+ if history_images:
142
+ history_images_tensor = (
143
+ torch.from_numpy(np.stack(history_images, axis=0)).unsqueeze(0).to(device)
144
+ )
145
+ history_proprio_tensor = (
146
+ torch.from_numpy(np.stack(history_proprio, axis=0)).unsqueeze(0).to(device)
147
+ )
148
+ else:
149
+ history_images_tensor = torch.zeros(
150
+ (1, 0, images.shape[1], images.shape[2], images.shape[3], images.shape[4]),
151
+ device=device,
152
+ dtype=images.dtype,
153
+ )
154
+ history_proprio_tensor = torch.zeros(
155
+ (1, 0, proprio.shape[-1]),
156
+ device=device,
157
+ dtype=proprio.dtype,
158
+ )
159
  with torch.no_grad():
160
  if trainer_config.policy_type == "reveal_state":
161
  outputs = model(
162
  images=images,
163
  proprio=proprio,
164
  texts=[language_goal],
165
+ history_images=history_images_tensor,
166
+ history_proprio=history_proprio_tensor,
167
+ plan=plan_applied,
168
  support_mode_conditioning=not args.disable_support_mode_conditioning,
169
  )
170
  else:
171
+ outputs = model(
172
+ images=images,
173
+ proprio=proprio,
174
+ texts=[language_goal],
175
+ history_images=history_images_tensor,
176
+ history_proprio=history_proprio_tensor,
177
+ )
178
  chosen_chunk = outputs["action_mean"]
179
+ if plan_applied and "planned_chunk" in outputs:
180
  chosen_chunk = outputs["planned_chunk"]
181
  step_action = chosen_chunk[0, 0].detach().float().cpu().numpy()
182
+ if history_steps > 0:
183
+ if len(history_images) >= history_steps:
184
+ history_images = history_images[-history_steps + 1 :]
185
+ history_proprio = history_proprio[-history_steps + 1 :]
186
+ history_images.append(images[0].detach().cpu().numpy())
187
+ history_proprio.append(proprio[0].detach().cpu().numpy())
188
  env_action = absolute_action_from_delta(obs, step_action, ignore_collisions=True)
189
  obs, reward, done = task.step(env_action)
190
  total_reward += float(reward)
 
213
  "# RLBench Rollout Eval",
214
  "",
215
  f"- Checkpoint: `{results['checkpoint']}`",
216
+ f"- Plan requested: `{results['plan_requested']}`",
217
+ f"- Plan applied: `{results['plan_applied']}`",
218
  f"- Support-mode conditioning: `{results['support_mode_conditioning']}`",
219
  f"- Mean success: `{results['mean_success']:.3f}`",
220
  "",
code/reveal_vla_bimanual/models/action_decoder.py CHANGED
@@ -15,6 +15,7 @@ class ChunkDecoderConfig:
15
  dropout: float = 0.1
16
  chunk_size: int = 8
17
  action_dim: int = 14
 
18
  num_candidates: int = 8
19
 
20
 
@@ -30,24 +31,111 @@ class ACTBimanualChunkDecoder(nn.Module):
30
  batch_first=True,
31
  norm_first=True,
32
  )
33
- self.decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
 
 
 
 
 
 
 
 
 
34
  self.query_embed = nn.Embedding(config.chunk_size, config.hidden_dim)
35
- self.action_mean = nn.Linear(config.hidden_dim, config.action_dim)
36
- self.action_log_std = nn.Linear(config.hidden_dim, config.action_dim)
 
 
 
 
 
 
 
 
 
37
  self.proposal_score = nn.Sequential(
38
- nn.LayerNorm(config.hidden_dim),
39
- nn.Linear(config.hidden_dim, 1),
40
  )
41
 
42
- def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  batch_size = scene_tokens.shape[0]
44
  query = self.query_embed.weight.unsqueeze(0).expand(batch_size, -1, -1)
45
- decoded = self.decoder(query, scene_tokens)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  return {
47
- "decoded_tokens": decoded,
48
- "action_mean": self.action_mean(decoded),
49
- "action_log_std": self.action_log_std(decoded).clamp(min=-5.0, max=2.0),
50
- "proposal_score": self.proposal_score(decoded.mean(dim=1)).squeeze(-1),
 
 
 
51
  }
52
 
53
  def sample_candidates(self, action_mean: Tensor, action_log_std: Tensor, num_candidates: int | None = None) -> Tensor:
@@ -55,14 +143,17 @@ class ACTBimanualChunkDecoder(nn.Module):
55
  if num_candidates <= 1:
56
  return action_mean.unsqueeze(1)
57
  std = action_log_std.exp()
58
- noise = torch.randn(
59
- action_mean.size(0),
60
- num_candidates,
61
- action_mean.size(1),
62
- action_mean.size(2),
63
- device=action_mean.device,
64
- dtype=action_mean.dtype,
65
- )
 
 
 
66
  candidates = action_mean.unsqueeze(1) + noise * std.unsqueeze(1)
67
  candidates[:, 0] = action_mean
68
  return candidates
 
15
  dropout: float = 0.1
16
  chunk_size: int = 8
17
  action_dim: int = 14
18
+ arm_action_dim: int = 7
19
  num_candidates: int = 8
20
 
21
 
 
31
  batch_first=True,
32
  norm_first=True,
33
  )
34
+ self.revealer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=config.num_layers)
35
+ actor_layer = nn.TransformerDecoderLayer(
36
+ d_model=config.hidden_dim,
37
+ nhead=config.num_heads,
38
+ dim_feedforward=config.ff_dim,
39
+ dropout=config.dropout,
40
+ batch_first=True,
41
+ norm_first=True,
42
+ )
43
+ self.actor_decoder = nn.TransformerDecoder(actor_layer, num_layers=config.num_layers)
44
  self.query_embed = nn.Embedding(config.chunk_size, config.hidden_dim)
45
+ self.actor_role_bias = nn.Parameter(torch.zeros(1, config.chunk_size, config.hidden_dim))
46
+ self.revealer_mean = nn.Linear(config.hidden_dim, config.arm_action_dim)
47
+ self.revealer_log_std = nn.Linear(config.hidden_dim, config.arm_action_dim)
48
+ self.actor_mean = nn.Linear(config.hidden_dim, config.action_dim - config.arm_action_dim)
49
+ self.actor_log_std = nn.Linear(config.hidden_dim, config.action_dim - config.arm_action_dim)
50
+ self.coordination = nn.Sequential(
51
+ nn.LayerNorm(config.hidden_dim * 3),
52
+ nn.Linear(config.hidden_dim * 3, config.hidden_dim),
53
+ nn.GELU(),
54
+ nn.Linear(config.hidden_dim, config.hidden_dim),
55
+ )
56
  self.proposal_score = nn.Sequential(
57
+ nn.LayerNorm(config.hidden_dim * 3),
58
+ nn.Linear(config.hidden_dim * 3, 1),
59
  )
60
 
61
+ def _deterministic_candidate_noise(
62
+ self,
63
+ action_mean: Tensor,
64
+ num_candidates: int,
65
+ ) -> Tensor:
66
+ batch_size, chunk_size, action_dim = action_mean.shape
67
+ noise = torch.zeros(
68
+ batch_size,
69
+ num_candidates,
70
+ chunk_size,
71
+ action_dim,
72
+ device=action_mean.device,
73
+ dtype=action_mean.dtype,
74
+ )
75
+ if num_candidates <= 1:
76
+ return noise
77
+
78
+ candidate_index = torch.arange(1, num_candidates, device=action_mean.device, dtype=action_mean.dtype).view(
79
+ num_candidates - 1, 1, 1
80
+ )
81
+ step_index = torch.arange(chunk_size, device=action_mean.device, dtype=action_mean.dtype).view(1, chunk_size, 1)
82
+ dim_index = torch.arange(action_dim, device=action_mean.device, dtype=action_mean.dtype).view(1, 1, action_dim)
83
+
84
+ base = torch.sin(candidate_index * 0.73 + step_index * 0.37 + dim_index * 0.19)
85
+ base = base + torch.cos(candidate_index * 1.11 + step_index * 0.17 + dim_index * 0.41)
86
+ base = base / base.square().mean(dim=(1, 2), keepdim=True).sqrt().clamp_min(1e-6)
87
+ noise[:, 1:] = base.unsqueeze(0).expand(batch_size, -1, -1, -1)
88
+ return noise
89
+
90
+ def forward(
91
+ self,
92
+ scene_tokens: Tensor,
93
+ reveal_tokens: Tensor | None = None,
94
+ memory_token: Tensor | None = None,
95
+ ) -> dict[str, Tensor]:
96
  batch_size = scene_tokens.shape[0]
97
  query = self.query_embed.weight.unsqueeze(0).expand(batch_size, -1, -1)
98
+ decoder_memory = scene_tokens
99
+ if reveal_tokens is not None:
100
+ decoder_memory = torch.cat([decoder_memory, reveal_tokens], dim=1)
101
+ if memory_token is not None:
102
+ decoder_memory = torch.cat([decoder_memory, memory_token], dim=1)
103
+
104
+ revealer_tokens = self.revealer_decoder(query, decoder_memory)
105
+ actor_query = query + self.actor_role_bias
106
+ actor_tokens = self.actor_decoder(actor_query, torch.cat([decoder_memory, revealer_tokens], dim=1))
107
+ if reveal_tokens is not None:
108
+ reveal_context = reveal_tokens.mean(dim=1, keepdim=True).expand(-1, self.config.chunk_size, -1)
109
+ else:
110
+ reveal_context = scene_tokens.mean(dim=1, keepdim=True).expand(-1, self.config.chunk_size, -1)
111
+ coordination_input = torch.cat([revealer_tokens, actor_tokens, reveal_context], dim=-1)
112
+ coordination = torch.tanh(self.coordination(coordination_input))
113
+ revealer_tokens = revealer_tokens + coordination
114
+ actor_tokens = actor_tokens + coordination
115
+ action_mean = torch.cat([self.revealer_mean(revealer_tokens), self.actor_mean(actor_tokens)], dim=-1)
116
+ action_log_std = torch.cat(
117
+ [
118
+ self.revealer_log_std(revealer_tokens),
119
+ self.actor_log_std(actor_tokens),
120
+ ],
121
+ dim=-1,
122
+ ).clamp(min=-5.0, max=2.0)
123
+ proposal_features = torch.cat(
124
+ [
125
+ revealer_tokens.mean(dim=1),
126
+ actor_tokens.mean(dim=1),
127
+ coordination.mean(dim=1),
128
+ ],
129
+ dim=-1,
130
+ )
131
  return {
132
+ "decoded_tokens": torch.cat([revealer_tokens, actor_tokens], dim=-1),
133
+ "revealer_tokens": revealer_tokens,
134
+ "actor_tokens": actor_tokens,
135
+ "coordination_tokens": coordination,
136
+ "action_mean": action_mean,
137
+ "action_log_std": action_log_std,
138
+ "proposal_score": self.proposal_score(proposal_features).squeeze(-1),
139
  }
140
 
141
  def sample_candidates(self, action_mean: Tensor, action_log_std: Tensor, num_candidates: int | None = None) -> Tensor:
 
143
  if num_candidates <= 1:
144
  return action_mean.unsqueeze(1)
145
  std = action_log_std.exp()
146
+ if self.training:
147
+ noise = torch.randn(
148
+ action_mean.size(0),
149
+ num_candidates,
150
+ action_mean.size(1),
151
+ action_mean.size(2),
152
+ device=action_mean.device,
153
+ dtype=action_mean.dtype,
154
+ )
155
+ else:
156
+ noise = self._deterministic_candidate_noise(action_mean, num_candidates)
157
  candidates = action_mean.unsqueeze(1) + noise * std.unsqueeze(1)
158
  candidates[:, 0] = action_mean
159
  return candidates
code/reveal_vla_bimanual/models/observation_memory.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import torch
6
+ from torch import Tensor, nn
7
+
8
+
9
+ @dataclass
10
+ class ObservationMemoryConfig:
11
+ hidden_dim: int = 512
12
+ history_steps: int = 2
13
+ num_layers: int = 1
14
+ dropout: float = 0.1
15
+
16
+
17
+ class ObservationMemory(nn.Module):
18
+ def __init__(self, config: ObservationMemoryConfig) -> None:
19
+ super().__init__()
20
+ self.config = config
21
+ self.gru = nn.GRU(
22
+ input_size=config.hidden_dim,
23
+ hidden_size=config.hidden_dim,
24
+ num_layers=config.num_layers,
25
+ batch_first=True,
26
+ dropout=config.dropout if config.num_layers > 1 else 0.0,
27
+ )
28
+ self.token_proj = nn.Sequential(
29
+ nn.LayerNorm(config.hidden_dim),
30
+ nn.Linear(config.hidden_dim, config.hidden_dim),
31
+ nn.GELU(),
32
+ )
33
+ self.uncertainty_head = nn.Sequential(
34
+ nn.LayerNorm(config.hidden_dim),
35
+ nn.Linear(config.hidden_dim, 1),
36
+ )
37
+
38
+ def forward(
39
+ self,
40
+ scene_tokens: Tensor,
41
+ history_scene_tokens: Tensor | None = None,
42
+ ) -> dict[str, Tensor]:
43
+ pooled_current = scene_tokens.mean(dim=1, keepdim=True)
44
+ if history_scene_tokens is not None and history_scene_tokens.numel() > 0:
45
+ history_pooled = history_scene_tokens.mean(dim=2)
46
+ sequence = torch.cat([history_pooled, pooled_current], dim=1)
47
+ else:
48
+ sequence = pooled_current
49
+ memory_sequence, hidden = self.gru(sequence)
50
+ final_state = hidden[-1]
51
+ return {
52
+ "memory_sequence": memory_sequence,
53
+ "memory_state": final_state,
54
+ "memory_token": self.token_proj(final_state).unsqueeze(1),
55
+ "memory_uncertainty": torch.nn.functional.softplus(self.uncertainty_head(final_state)).squeeze(-1),
56
+ }
code/reveal_vla_bimanual/models/planner.py CHANGED
@@ -3,12 +3,15 @@ from __future__ import annotations
3
  from dataclasses import dataclass
4
 
5
  import torch
6
- from torch import Tensor
7
 
8
 
9
  @dataclass
10
  class PlannerConfig:
 
11
  num_candidates: int = 8
 
 
12
  corridor_weight: float = 1.0
13
  persistence_weight: float = 0.5
14
  proposal_weight: float = 0.5
@@ -18,44 +21,69 @@ class PlannerConfig:
18
  visibility_weight: float = 0.25
19
 
20
 
21
- class RevealPlanner:
22
  def __init__(self, config: PlannerConfig) -> None:
 
23
  self.config = config
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- def score_rollouts(
26
- self,
27
- rollout_state: dict[str, Tensor],
28
- proposal_scores: Tensor,
29
- candidate_chunks: Tensor | None = None,
30
- belief_gain: Tensor | None = None,
31
- ) -> Tensor:
32
- corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=(-1, -2))
33
- persistence = rollout_state["persistence_horizon"].mean(dim=(-1, -2))
34
- disturbance = rollout_state["disturbance_cost"].mean(dim=-1)
35
- reocclusion_penalty = torch.relu(1.0 - rollout_state["corridor_logits"].sigmoid().amax(dim=-1)).mean(dim=(-1, -2))
36
- task_progress = proposal_scores.new_zeros(proposal_scores.shape)
37
- if candidate_chunks is not None:
38
- actor_reach = torch.tanh(candidate_chunks[..., 8]).mean(dim=-1)
39
- actor_retrieve = torch.tanh(candidate_chunks[..., 13]).amax(dim=-1)
40
- task_progress = 0.5 * (actor_reach + 1.0) * 0.5 + 0.5 * (actor_retrieve + 1.0) * 0.5
41
- score = (
42
- self.config.corridor_weight * corridor_prob
43
- + self.config.persistence_weight * persistence
44
- + self.config.proposal_weight * proposal_scores
45
- + self.config.task_progress_weight * task_progress
46
- - self.config.disturbance_weight * disturbance
47
- - self.config.reocclusion_weight * reocclusion_penalty
48
  )
49
- if belief_gain is not None:
50
- score = score + self.config.visibility_weight * belief_gain
51
- return score
52
 
53
- def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor], proposal_scores: Tensor) -> dict[str, Tensor]:
54
- scores = self.score_rollouts(rollout_state, proposal_scores, candidate_chunks=candidate_chunks)
55
- best_idx = scores.argmax(dim=-1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
57
  return {
58
- "scores": scores,
59
  "best_indices": best_idx,
60
  "best_chunk": candidate_chunks[batch_indices, best_idx],
61
  }
 
3
  from dataclasses import dataclass
4
 
5
  import torch
6
+ from torch import Tensor, nn
7
 
8
 
9
  @dataclass
10
  class PlannerConfig:
11
+ hidden_dim: int = 512
12
  num_candidates: int = 8
13
+ action_dim: int = 14
14
+ utility_margin: float = 0.1
15
  corridor_weight: float = 1.0
16
  persistence_weight: float = 0.5
17
  proposal_weight: float = 0.5
 
21
  visibility_weight: float = 0.25
22
 
23
 
24
+ class RevealPlanner(nn.Module):
25
  def __init__(self, config: PlannerConfig) -> None:
26
+ super().__init__()
27
  self.config = config
28
+ summary_dim = (
29
+ config.action_dim * 2
30
+ + 3
31
+ + 3
32
+ + 1
33
+ + 3
34
+ + 1
35
+ )
36
+ self.trunk = nn.Sequential(
37
+ nn.LayerNorm(summary_dim),
38
+ nn.Linear(summary_dim, config.hidden_dim),
39
+ nn.GELU(),
40
+ nn.Linear(config.hidden_dim, config.hidden_dim),
41
+ nn.GELU(),
42
+ )
43
+ self.success_head = nn.Linear(config.hidden_dim, 1)
44
+ self.risk_head = nn.Linear(config.hidden_dim, 1)
45
 
46
+ def summarize_candidates(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
47
+ candidate_mean = candidate_chunks.mean(dim=2)
48
+ candidate_terminal = candidate_chunks[:, :, -1]
49
+ corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=-2)
50
+ persistence = rollout_state["persistence_horizon"].mean(dim=-2)
51
+ disturbance = rollout_state["disturbance_cost"].mean(dim=-1, keepdim=True)
52
+ reocclusion = rollout_state["reocclusion_logit"].sigmoid().mean(dim=-2)
53
+ uncertainty = rollout_state["uncertainty"].mean(dim=-1, keepdim=True)
54
+ return torch.cat(
55
+ [
56
+ candidate_mean,
57
+ candidate_terminal,
58
+ corridor_prob,
59
+ persistence,
60
+ disturbance,
61
+ reocclusion,
62
+ uncertainty,
63
+ ],
64
+ dim=-1,
 
 
 
 
65
  )
 
 
 
66
 
67
+ def score_rollouts(self, rollout_state: dict[str, Tensor], candidate_chunks: Tensor) -> dict[str, Tensor]:
68
+ features = self.summarize_candidates(candidate_chunks, rollout_state)
69
+ hidden = self.trunk(features)
70
+ success_logits = self.success_head(hidden).squeeze(-1)
71
+ risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
72
+ utility_scores = success_logits.sigmoid() - risk_values
73
+ return {
74
+ "planner_features": features,
75
+ "planner_hidden": hidden,
76
+ "success_logits": success_logits,
77
+ "risk_values": risk_values,
78
+ "utility_scores": utility_scores,
79
+ }
80
+
81
+ def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> dict[str, Tensor]:
82
+ outputs = self.score_rollouts(rollout_state=rollout_state, candidate_chunks=candidate_chunks)
83
+ best_idx = outputs["utility_scores"].argmax(dim=-1)
84
  batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
85
  return {
86
+ **outputs,
87
  "best_indices": best_idx,
88
  "best_chunk": candidate_chunks[batch_indices, best_idx],
89
  }
code/reveal_vla_bimanual/models/policy.py CHANGED
@@ -9,6 +9,7 @@ from torch import Tensor, nn
9
  from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
10
  from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
11
  from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
 
12
  from models.planner import PlannerConfig, RevealPlanner
13
  from models.reveal_head import RevealHeadConfig, RevealStateHead
14
  from models.world_model import RevealWM, RevealWMConfig
@@ -18,6 +19,7 @@ from models.world_model import RevealWM, RevealWMConfig
18
  class PolicyConfig:
19
  backbone: FrozenVLBackboneConfig = field(default_factory=FrozenVLBackboneConfig)
20
  fusion: MultiViewFusionConfig = field(default_factory=MultiViewFusionConfig)
 
21
  decoder: ChunkDecoderConfig = field(default_factory=ChunkDecoderConfig)
22
  reveal_head: RevealHeadConfig = field(default_factory=RevealHeadConfig)
23
  world_model: RevealWMConfig = field(default_factory=RevealWMConfig)
@@ -30,6 +32,7 @@ class BackboneOnlyPolicy(nn.Module):
30
  self.config = config
31
  self.backbone = FrozenVLBackbone(config.backbone)
32
  self.fusion = MultiViewFusion(config.fusion)
 
33
  self.decoder = ACTBimanualChunkDecoder(config.decoder)
34
 
35
  def _encode_language(
@@ -58,16 +61,69 @@ class BackboneOnlyPolicy(nn.Module):
58
  text_tokens = self._encode_language(images, texts=texts, language_tokens=language_tokens)
59
  return self.fusion(image_tokens=image_tokens, proprio=proprio, language_tokens=text_tokens)
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def forward(
62
  self,
63
  images: Tensor,
64
  proprio: Tensor,
65
  texts: Sequence[str] | None = None,
66
  language_tokens: dict[str, Tensor] | None = None,
 
 
67
  ) -> dict[str, Tensor]:
68
  scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
69
- decoded = self.decoder(scene_tokens)
 
 
 
 
 
 
 
70
  decoded["scene_tokens"] = scene_tokens
 
 
71
  return decoded
72
 
73
 
@@ -84,21 +140,43 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
84
  proprio: Tensor,
85
  texts: Sequence[str] | None = None,
86
  language_tokens: dict[str, Tensor] | None = None,
 
 
87
  plan: bool = True,
88
  support_mode_conditioning: bool = True,
 
89
  ) -> dict[str, Tensor]:
90
- outputs = super().forward(images, proprio, texts=texts, language_tokens=language_tokens)
91
- reveal_state = self.reveal_head(outputs["scene_tokens"])
 
 
 
 
 
 
 
 
 
 
92
  outputs["reveal_state"] = reveal_state
 
93
 
94
- candidate_chunks = self.decoder.sample_candidates(
95
- outputs["action_mean"],
96
- outputs["action_log_std"],
97
- num_candidates=self.config.decoder.num_candidates,
98
  )
99
- outputs["candidate_chunks"] = candidate_chunks
100
 
101
  if plan:
 
 
 
 
 
 
 
 
102
  batch_size, num_candidates, chunk_size, action_dim = candidate_chunks.shape
103
  flat_chunks = candidate_chunks.view(batch_size * num_candidates, chunk_size, action_dim)
104
  tiled_scene = outputs["scene_tokens"].unsqueeze(1).expand(-1, num_candidates, -1, -1)
@@ -118,10 +196,11 @@ class RevealBimanualPolicy(BackboneOnlyPolicy):
118
  selected = self.planner.select_best(
119
  candidate_chunks=candidate_chunks,
120
  rollout_state=reshaped_rollout,
121
- proposal_scores=outputs["proposal_score"].unsqueeze(-1).expand(-1, num_candidates),
122
  )
123
  outputs["planned_rollout"] = reshaped_rollout
124
  outputs["planned_chunk"] = selected["best_chunk"]
125
- outputs["planner_scores"] = selected["scores"]
 
 
126
  outputs["best_candidate_indices"] = selected["best_indices"]
127
  return outputs
 
9
  from models.action_decoder import ACTBimanualChunkDecoder, ChunkDecoderConfig
10
  from models.backbones import FrozenVLBackbone, FrozenVLBackboneConfig
11
  from models.multiview_fusion import MultiViewFusion, MultiViewFusionConfig
12
+ from models.observation_memory import ObservationMemory, ObservationMemoryConfig
13
  from models.planner import PlannerConfig, RevealPlanner
14
  from models.reveal_head import RevealHeadConfig, RevealStateHead
15
  from models.world_model import RevealWM, RevealWMConfig
 
19
  class PolicyConfig:
20
  backbone: FrozenVLBackboneConfig = field(default_factory=FrozenVLBackboneConfig)
21
  fusion: MultiViewFusionConfig = field(default_factory=MultiViewFusionConfig)
22
+ memory: ObservationMemoryConfig = field(default_factory=ObservationMemoryConfig)
23
  decoder: ChunkDecoderConfig = field(default_factory=ChunkDecoderConfig)
24
  reveal_head: RevealHeadConfig = field(default_factory=RevealHeadConfig)
25
  world_model: RevealWMConfig = field(default_factory=RevealWMConfig)
 
32
  self.config = config
33
  self.backbone = FrozenVLBackbone(config.backbone)
34
  self.fusion = MultiViewFusion(config.fusion)
35
+ self.memory = ObservationMemory(config.memory)
36
  self.decoder = ACTBimanualChunkDecoder(config.decoder)
37
 
38
  def _encode_language(
 
61
  text_tokens = self._encode_language(images, texts=texts, language_tokens=language_tokens)
62
  return self.fusion(image_tokens=image_tokens, proprio=proprio, language_tokens=text_tokens)
63
 
64
+ def _expand_language_tokens_for_history(
65
+ self,
66
+ language_tokens: dict[str, Tensor] | None,
67
+ history_steps: int,
68
+ ) -> dict[str, Tensor] | None:
69
+ if language_tokens is None:
70
+ return None
71
+ return {
72
+ key: value.unsqueeze(1).expand(-1, history_steps, *value.shape[1:]).reshape(
73
+ value.shape[0] * history_steps, *value.shape[1:]
74
+ )
75
+ for key, value in language_tokens.items()
76
+ }
77
+
78
+ def encode_history(
79
+ self,
80
+ history_images: Tensor | None,
81
+ history_proprio: Tensor | None,
82
+ texts: Sequence[str] | None = None,
83
+ language_tokens: dict[str, Tensor] | None = None,
84
+ ) -> Tensor | None:
85
+ if history_images is None or history_proprio is None or history_images.numel() == 0:
86
+ return None
87
+ batch_size, history_steps = history_images.shape[:2]
88
+ flat_images = history_images.reshape(batch_size * history_steps, *history_images.shape[2:])
89
+ flat_proprio = history_proprio.reshape(batch_size * history_steps, history_proprio.shape[-1])
90
+ if language_tokens is None:
91
+ if texts is None:
92
+ raise ValueError("Either texts or language_tokens must be provided.")
93
+ flat_texts = [text for text in texts for _ in range(history_steps)]
94
+ flat_language_tokens = None
95
+ else:
96
+ flat_texts = None
97
+ flat_language_tokens = self._expand_language_tokens_for_history(language_tokens, history_steps)
98
+ history_scene = self.encode_scene(
99
+ flat_images,
100
+ flat_proprio,
101
+ texts=flat_texts,
102
+ language_tokens=flat_language_tokens,
103
+ )
104
+ return history_scene.view(batch_size, history_steps, history_scene.shape[1], history_scene.shape[2])
105
+
106
  def forward(
107
  self,
108
  images: Tensor,
109
  proprio: Tensor,
110
  texts: Sequence[str] | None = None,
111
  language_tokens: dict[str, Tensor] | None = None,
112
+ history_images: Tensor | None = None,
113
+ history_proprio: Tensor | None = None,
114
  ) -> dict[str, Tensor]:
115
  scene_tokens = self.encode_scene(images, proprio, texts=texts, language_tokens=language_tokens)
116
+ history_scene_tokens = self.encode_history(
117
+ history_images,
118
+ history_proprio,
119
+ texts=texts,
120
+ language_tokens=language_tokens,
121
+ )
122
+ memory_output = self.memory(scene_tokens, history_scene_tokens=history_scene_tokens)
123
+ decoded = self.decoder(scene_tokens, memory_token=memory_output["memory_token"])
124
  decoded["scene_tokens"] = scene_tokens
125
+ decoded["history_scene_tokens"] = history_scene_tokens
126
+ decoded["memory_output"] = memory_output
127
  return decoded
128
 
129
 
 
140
  proprio: Tensor,
141
  texts: Sequence[str] | None = None,
142
  language_tokens: dict[str, Tensor] | None = None,
143
+ history_images: Tensor | None = None,
144
+ history_proprio: Tensor | None = None,
145
  plan: bool = True,
146
  support_mode_conditioning: bool = True,
147
+ candidate_chunks_override: Tensor | None = None,
148
  ) -> dict[str, Tensor]:
149
+ outputs = super().forward(
150
+ images,
151
+ proprio,
152
+ texts=texts,
153
+ language_tokens=language_tokens,
154
+ history_images=history_images,
155
+ history_proprio=history_proprio,
156
+ )
157
+ reveal_state = self.reveal_head(
158
+ outputs["scene_tokens"],
159
+ memory_token=outputs["memory_output"]["memory_token"],
160
+ )
161
  outputs["reveal_state"] = reveal_state
162
+ outputs["memory_uncertainty"] = outputs["memory_output"]["memory_uncertainty"]
163
 
164
+ decoded = self.decoder(
165
+ outputs["scene_tokens"],
166
+ reveal_tokens=reveal_state["field_tokens"],
167
+ memory_token=outputs["memory_output"]["memory_token"],
168
  )
169
+ outputs.update(decoded)
170
 
171
  if plan:
172
+ candidate_chunks = candidate_chunks_override
173
+ if candidate_chunks is None:
174
+ candidate_chunks = self.decoder.sample_candidates(
175
+ outputs["action_mean"],
176
+ outputs["action_log_std"],
177
+ num_candidates=self.config.decoder.num_candidates,
178
+ )
179
+ outputs["candidate_chunks"] = candidate_chunks
180
  batch_size, num_candidates, chunk_size, action_dim = candidate_chunks.shape
181
  flat_chunks = candidate_chunks.view(batch_size * num_candidates, chunk_size, action_dim)
182
  tiled_scene = outputs["scene_tokens"].unsqueeze(1).expand(-1, num_candidates, -1, -1)
 
196
  selected = self.planner.select_best(
197
  candidate_chunks=candidate_chunks,
198
  rollout_state=reshaped_rollout,
 
199
  )
200
  outputs["planned_rollout"] = reshaped_rollout
201
  outputs["planned_chunk"] = selected["best_chunk"]
202
+ outputs["planner_success_logits"] = selected["success_logits"]
203
+ outputs["planner_risk_values"] = selected["risk_values"]
204
+ outputs["planner_scores"] = selected["utility_scores"]
205
  outputs["best_candidate_indices"] = selected["best_indices"]
206
  return outputs
code/reveal_vla_bimanual/models/reveal_head.py CHANGED
@@ -2,6 +2,8 @@ from __future__ import annotations
2
 
3
  from dataclasses import dataclass
4
 
 
 
5
  from torch import Tensor, nn
6
 
7
 
@@ -12,6 +14,8 @@ class RevealHeadConfig:
12
  num_approach_templates: int = 32
13
  rollout_horizon: int = 5
14
  belief_map_size: int = 32
 
 
15
  predict_belief_map: bool = False
16
 
17
 
@@ -19,37 +23,96 @@ class RevealStateHead(nn.Module):
19
  def __init__(self, config: RevealHeadConfig) -> None:
20
  super().__init__()
21
  self.config = config
22
- self.trunk = nn.Sequential(
 
 
 
 
 
 
 
 
23
  nn.LayerNorm(config.hidden_dim),
24
  nn.Linear(config.hidden_dim, config.hidden_dim),
25
  nn.GELU(),
 
26
  )
27
- self.support_mode = nn.Linear(config.hidden_dim, config.num_support_modes)
28
- self.corridor = nn.Linear(
29
- config.hidden_dim,
30
- config.num_support_modes * config.num_approach_templates,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  )
32
- self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
33
- self.disturbance = nn.Linear(config.hidden_dim, 1)
34
- self.belief_map = None
35
- if config.predict_belief_map:
36
- map_side = config.belief_map_size
37
- self.belief_map = nn.Linear(config.hidden_dim, map_side * map_side)
38
 
39
- def forward(self, scene_tokens: Tensor) -> dict[str, Tensor]:
40
- pooled = scene_tokens.mean(dim=1)
41
- hidden = self.trunk(pooled)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  output = {
43
- "support_mode_logits": self.support_mode(hidden),
44
- "corridor_logits": self.corridor(hidden).view(
45
- hidden.shape[0],
46
- self.config.num_support_modes,
47
- self.config.num_approach_templates,
48
- ),
49
- "persistence_horizon": self.persistence(hidden),
50
- "disturbance_cost": self.disturbance(hidden).squeeze(-1),
 
 
 
 
51
  }
52
- if self.belief_map is not None:
53
- side = self.config.belief_map_size
54
- output["belief_map"] = self.belief_map(hidden).view(hidden.shape[0], 1, side, side)
55
  return output
 
2
 
3
  from dataclasses import dataclass
4
 
5
+ import torch
6
+ import torch.nn.functional as F
7
  from torch import Tensor, nn
8
 
9
 
 
14
  num_approach_templates: int = 32
15
  rollout_horizon: int = 5
16
  belief_map_size: int = 32
17
+ field_size: int = 16
18
+ num_heads: int = 4
19
  predict_belief_map: bool = False
20
 
21
 
 
23
  def __init__(self, config: RevealHeadConfig) -> None:
24
  super().__init__()
25
  self.config = config
26
+ self.field_queries = nn.Parameter(
27
+ torch.randn(config.field_size * config.field_size, config.hidden_dim) * 0.02
28
+ )
29
+ self.field_attention = nn.MultiheadAttention(
30
+ embed_dim=config.hidden_dim,
31
+ num_heads=config.num_heads,
32
+ batch_first=True,
33
+ )
34
+ self.field_mlp = nn.Sequential(
35
  nn.LayerNorm(config.hidden_dim),
36
  nn.Linear(config.hidden_dim, config.hidden_dim),
37
  nn.GELU(),
38
+ nn.Linear(config.hidden_dim, config.hidden_dim),
39
  )
40
+ self.support_mode = nn.Sequential(
41
+ nn.LayerNorm(config.hidden_dim * 3),
42
+ nn.Linear(config.hidden_dim * 3, config.hidden_dim),
43
+ nn.GELU(),
44
+ nn.Linear(config.hidden_dim, config.num_support_modes),
45
+ )
46
+ self.access_field = nn.Conv2d(config.hidden_dim, config.num_support_modes, kernel_size=1)
47
+ self.persistence_field = nn.Conv2d(config.hidden_dim, config.num_support_modes, kernel_size=1)
48
+ self.disturbance_field = nn.Conv2d(config.hidden_dim, 1, kernel_size=1)
49
+ self.uncertainty_field = nn.Conv2d(config.hidden_dim, 1, kernel_size=1)
50
+ self.reocclusion_head = nn.Sequential(
51
+ nn.LayerNorm(config.hidden_dim * 2),
52
+ nn.Linear(config.hidden_dim * 2, config.hidden_dim),
53
+ nn.GELU(),
54
+ nn.Linear(config.hidden_dim, config.num_support_modes),
55
+ )
56
+ self.latent_summary = nn.Sequential(
57
+ nn.LayerNorm(config.hidden_dim * 2),
58
+ nn.Linear(config.hidden_dim * 2, config.hidden_dim),
59
+ nn.GELU(),
60
  )
 
 
 
 
 
 
61
 
62
+ def forward(self, scene_tokens: Tensor, memory_token: Tensor | None = None) -> dict[str, Tensor]:
63
+ source_tokens = scene_tokens if memory_token is None else torch.cat([scene_tokens, memory_token], dim=1)
64
+ batch_size = source_tokens.shape[0]
65
+ field_queries = self.field_queries.unsqueeze(0).expand(batch_size, -1, -1)
66
+ field_tokens, _ = self.field_attention(field_queries, source_tokens, source_tokens)
67
+ field_tokens = field_tokens + self.field_mlp(field_tokens)
68
+ side = self.config.field_size
69
+ grid = field_tokens.transpose(1, 2).reshape(batch_size, self.config.hidden_dim, side, side)
70
+ pooled_scene = scene_tokens.mean(dim=1)
71
+ pooled_field = field_tokens.mean(dim=1)
72
+ if memory_token is not None:
73
+ pooled_memory = memory_token.squeeze(1)
74
+ else:
75
+ pooled_memory = pooled_scene.new_zeros(pooled_scene.shape)
76
+ support_input = torch.cat([pooled_scene, pooled_field, pooled_memory], dim=-1)
77
+ access_field = self.access_field(grid)
78
+ persistence_field = torch.sigmoid(self.persistence_field(grid))
79
+ disturbance_field = torch.sigmoid(self.disturbance_field(grid))
80
+ uncertainty_field = F.softplus(self.uncertainty_field(grid))
81
+ corridor_source = access_field.amax(dim=-2)
82
+ corridor_logits = F.interpolate(
83
+ corridor_source,
84
+ size=self.config.num_approach_templates,
85
+ mode="linear",
86
+ align_corners=False,
87
+ )
88
+ access_prob = torch.sigmoid(access_field)
89
+ weighted_persistence = (persistence_field * access_prob).sum(dim=(-1, -2))
90
+ access_mass = access_prob.sum(dim=(-1, -2)).clamp_min(1e-4)
91
+ persistence_horizon = self.config.rollout_horizon * weighted_persistence / access_mass
92
+ disturbance_cost = disturbance_field.mean(dim=(-1, -2)).squeeze(1)
93
+ belief_map = access_field.max(dim=1, keepdim=True).values
94
+ if belief_map.shape[-1] != self.config.belief_map_size:
95
+ belief_map = F.interpolate(
96
+ belief_map,
97
+ size=(self.config.belief_map_size, self.config.belief_map_size),
98
+ mode="bilinear",
99
+ align_corners=False,
100
+ )
101
+ latent_summary = self.latent_summary(torch.cat([pooled_scene, pooled_field], dim=-1))
102
  output = {
103
+ "support_mode_logits": self.support_mode(support_input),
104
+ "corridor_logits": corridor_logits,
105
+ "persistence_horizon": persistence_horizon,
106
+ "disturbance_cost": disturbance_cost,
107
+ "access_field": access_field,
108
+ "persistence_field": persistence_field,
109
+ "disturbance_field": disturbance_field,
110
+ "uncertainty_field": uncertainty_field,
111
+ "field_tokens": field_tokens,
112
+ "latent_summary": latent_summary,
113
+ "reocclusion_logit": self.reocclusion_head(torch.cat([pooled_field, pooled_memory], dim=-1)),
114
+ "persistence_uncertainty": uncertainty_field.mean(dim=(-1, -2)).squeeze(1),
115
  }
116
+ if self.config.predict_belief_map:
117
+ output["belief_map"] = belief_map
 
118
  return output
code/reveal_vla_bimanual/models/world_model.py CHANGED
@@ -24,6 +24,9 @@ class RevealWM(nn.Module):
24
  + config.num_support_modes * config.num_approach_templates
25
  + config.num_support_modes
26
  + 1
 
 
 
27
  )
28
  self.initial = nn.Sequential(
29
  nn.LayerNorm(config.hidden_dim + reveal_dim),
@@ -39,6 +42,8 @@ class RevealWM(nn.Module):
39
  )
40
  self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
41
  self.disturbance = nn.Linear(config.hidden_dim, 1)
 
 
42
 
43
  def _flatten_reveal(self, reveal_state: dict[str, Tensor]) -> Tensor:
44
  return torch.cat(
@@ -47,6 +52,9 @@ class RevealWM(nn.Module):
47
  reveal_state["corridor_logits"].flatten(start_dim=1),
48
  reveal_state["persistence_horizon"],
49
  reveal_state["disturbance_cost"].unsqueeze(-1),
 
 
 
50
  ],
51
  dim=-1,
52
  )
@@ -67,4 +75,6 @@ class RevealWM(nn.Module):
67
  ),
68
  "persistence_horizon": self.persistence(rollout),
69
  "disturbance_cost": self.disturbance(rollout).squeeze(-1),
 
 
70
  }
 
24
  + config.num_support_modes * config.num_approach_templates
25
  + config.num_support_modes
26
  + 1
27
+ + config.num_support_modes
28
+ + 1
29
+ + config.hidden_dim
30
  )
31
  self.initial = nn.Sequential(
32
  nn.LayerNorm(config.hidden_dim + reveal_dim),
 
42
  )
43
  self.persistence = nn.Linear(config.hidden_dim, config.num_support_modes)
44
  self.disturbance = nn.Linear(config.hidden_dim, 1)
45
+ self.reocclusion = nn.Linear(config.hidden_dim, config.num_support_modes)
46
+ self.uncertainty = nn.Linear(config.hidden_dim, 1)
47
 
48
  def _flatten_reveal(self, reveal_state: dict[str, Tensor]) -> Tensor:
49
  return torch.cat(
 
52
  reveal_state["corridor_logits"].flatten(start_dim=1),
53
  reveal_state["persistence_horizon"],
54
  reveal_state["disturbance_cost"].unsqueeze(-1),
55
+ reveal_state["reocclusion_logit"],
56
+ reveal_state["persistence_uncertainty"].unsqueeze(-1),
57
+ reveal_state["latent_summary"],
58
  ],
59
  dim=-1,
60
  )
 
75
  ),
76
  "persistence_horizon": self.persistence(rollout),
77
  "disturbance_cost": self.disturbance(rollout).squeeze(-1),
78
+ "reocclusion_logit": self.reocclusion(rollout),
79
+ "uncertainty": torch.nn.functional.softplus(self.uncertainty(rollout)).squeeze(-1),
80
  }
code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/PKG-INFO CHANGED
@@ -1,7 +1,7 @@
1
  Metadata-Version: 2.4
2
  Name: reveal-vla-bimanual
3
  Version: 0.1.0
4
- Summary: Simulation-first bimanual reveal-and-retrieve prototype
5
  Requires-Python: <3.11,>=3.10
6
  Description-Content-Type: text/markdown
7
  Requires-Dist: accelerate>=0.31.0
@@ -21,7 +21,9 @@ Requires-Dist: transformers>=4.41.0
21
 
22
  # reveal_vla_bimanual
23
 
24
- Simulation-first prototype for bimanual reveal-and-retrieve under elastic occlusion.
 
 
25
 
26
  This repo is structured around five top-level modules:
27
 
@@ -38,3 +40,66 @@ Current bootstrap priorities:
38
  3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
39
 
40
  Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  Metadata-Version: 2.4
2
  Name: reveal-vla-bimanual
3
  Version: 0.1.0
4
+ Summary: Language-conditioned bimanual reveal-and-retrieve policy prototype
5
  Requires-Python: <3.11,>=3.10
6
  Description-Content-Type: text/markdown
7
  Requires-Dist: accelerate>=0.31.0
 
21
 
22
  # reveal_vla_bimanual
23
 
24
+ Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
25
+
26
+ This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
27
 
28
  This repo is structured around five top-level modules:
29
 
 
40
  3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
41
 
42
  Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
43
+
44
+ ## RLBench env A
45
+
46
+ The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
47
+
48
+ Bring it up with:
49
+
50
+ ```bash
51
+ /workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
52
+ /workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
53
+ /workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
54
+ ```
55
+
56
+ Verify GPU GL on the headless display:
57
+
58
+ ```bash
59
+ DISPLAY=:99 glxinfo -B
60
+ ```
61
+
62
+ Run the RLBench launch/reset/step smoke test:
63
+
64
+ ```bash
65
+ env \
66
+ DISPLAY=:99 \
67
+ XDG_RUNTIME_DIR=/tmp/runtime-root \
68
+ COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
69
+ LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
70
+ QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
71
+ /workspace/.tools/micromamba/bin/micromamba run \
72
+ -r /workspace/.micromamba \
73
+ -p /workspace/envs/rlbench \
74
+ python -m sim_rlbench.launch_smoke --headless
75
+ ```
76
+
77
+ The working benchmark interface is fixed to three cameras only:
78
+
79
+ - `front`
80
+ - `wrist_left`
81
+ - `wrist_right`
82
+
83
+ The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
84
+
85
+ Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
86
+
87
+ ```bash
88
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
89
+ python -m sim_rlbench.smoke_test --print-train-command
90
+ ```
91
+
92
+ Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
93
+
94
+ ```bash
95
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
96
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train
97
+ ```
98
+
99
+ If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
100
+
101
+ ```bash
102
+ apt-get install -y squashfs-tools
103
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
104
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
105
+ ```
code/reveal_vla_bimanual/reveal_vla_bimanual.egg-info/SOURCES.txt CHANGED
@@ -4,10 +4,14 @@ eval/__init__.py
4
  eval/ablations.py
5
  eval/metrics.py
6
  eval/report.py
 
 
 
7
  models/__init__.py
8
  models/action_decoder.py
9
  models/backbones.py
10
  models/multiview_fusion.py
 
11
  models/planner.py
12
  models/policy.py
13
  models/reveal_head.py
@@ -21,15 +25,26 @@ reveal_vla_bimanual.egg-info/requires.txt
21
  reveal_vla_bimanual.egg-info/top_level.txt
22
  sim_reveal/__init__.py
23
  sim_reveal/base.py
 
 
 
 
24
  sim_reveal/labels.py
 
25
  sim_reveal/proxy_specs.py
26
  sim_reveal/teachers.py
27
  sim_rlbench/__init__.py
28
  sim_rlbench/camera_spec.py
 
 
 
 
29
  sim_rlbench/obs_adapter.py
30
  sim_rlbench/peract2_runner.py
31
  sim_rlbench/smoke_test.py
32
  sim_rlbench/task_splits.py
33
  train/__init__.py
34
  train/losses.py
 
 
35
  train/trainer.py
 
4
  eval/ablations.py
5
  eval/metrics.py
6
  eval/report.py
7
+ eval/run_ablations.py
8
+ eval/run_reveal_benchmark.py
9
+ eval/run_rlbench_rollout_eval.py
10
  models/__init__.py
11
  models/action_decoder.py
12
  models/backbones.py
13
  models/multiview_fusion.py
14
+ models/observation_memory.py
15
  models/planner.py
16
  models/policy.py
17
  models/reveal_head.py
 
25
  reveal_vla_bimanual.egg-info/top_level.txt
26
  sim_reveal/__init__.py
27
  sim_reveal/base.py
28
+ sim_reveal/dataset.py
29
+ sim_reveal/generate_dataset.py
30
+ sim_reveal/isaac_smoke.py
31
+ sim_reveal/isaac_wrapper.py
32
  sim_reveal/labels.py
33
+ sim_reveal/procedural_envs.py
34
  sim_reveal/proxy_specs.py
35
  sim_reveal/teachers.py
36
  sim_rlbench/__init__.py
37
  sim_rlbench/camera_spec.py
38
+ sim_rlbench/dataset.py
39
+ sim_rlbench/dataset_download.py
40
+ sim_rlbench/generate_smoke_dataset.py
41
+ sim_rlbench/launch_smoke.py
42
  sim_rlbench/obs_adapter.py
43
  sim_rlbench/peract2_runner.py
44
  sim_rlbench/smoke_test.py
45
  sim_rlbench/task_splits.py
46
  train/__init__.py
47
  train/losses.py
48
+ train/run_experiment.py
49
+ train/run_rlbench_experiment.py
50
  train/trainer.py
code/reveal_vla_bimanual/scripts/start_rlbench_x.sh CHANGED
@@ -8,11 +8,12 @@ DISPLAY_ID=":${DISPLAY_NUM}"
8
  LOG_DIR="${ROOT_DIR}/logs"
9
  LOG_FILE="${LOG_DIR}/x${DISPLAY_NUM}.log"
10
  PID_FILE="${LOG_DIR}/x${DISPLAY_NUM}.pid"
 
 
11
 
12
  DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1)}"
13
  DRIVER_BRANCH="${DRIVER_VERSION%%.*}"
14
  SHIM_ROOT="${ROOT_DIR}/system_shims/nvidia${DRIVER_BRANCH}"
15
- XORG_CONF="${PROJECT_DIR}/docs/xorg.rtx6000.conf"
16
  MODULE_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg,/usr/lib/xorg/modules"
17
  SHIM_LD_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu:${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia"
18
  XORG_BIN="${XORG_BIN:-$(command -v Xorg || true)}"
@@ -22,6 +23,33 @@ fi
22
 
23
  mkdir -p "${LOG_DIR}"
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  find_x_pid() {
26
  ps -eo pid=,args= | awk -v display="${DISPLAY_ID}" -v conf="${XORG_CONF}" '
27
  $0 ~ display && $0 ~ conf && $0 ~ /(^|[[:space:]])([^[:space:]]*\/)?Xorg([[:space:]]|$)/ {
@@ -37,6 +65,19 @@ if [[ ! -f "${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg/libglxserver_nvidi
37
  exit 1
38
  fi
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  PID="$(find_x_pid || true)"
41
  if [[ -n "${PID}" ]]; then
42
  echo "X already running on ${DISPLAY_ID}"
 
8
  LOG_DIR="${ROOT_DIR}/logs"
9
  LOG_FILE="${LOG_DIR}/x${DISPLAY_NUM}.log"
10
  PID_FILE="${LOG_DIR}/x${DISPLAY_NUM}.pid"
11
+ XORG_TEMPLATE="${PROJECT_DIR}/docs/xorg.rtx6000.conf"
12
+ XORG_CONF="${LOG_DIR}/x${DISPLAY_NUM}.conf"
13
 
14
  DRIVER_VERSION="${NVIDIA_DRIVER_VERSION:-$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1)}"
15
  DRIVER_BRANCH="${DRIVER_VERSION%%.*}"
16
  SHIM_ROOT="${ROOT_DIR}/system_shims/nvidia${DRIVER_BRANCH}"
 
17
  MODULE_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia/xorg,/usr/lib/xorg/modules"
18
  SHIM_LD_PATH="${SHIM_ROOT}/usr/lib/x86_64-linux-gnu:${SHIM_ROOT}/usr/lib/x86_64-linux-gnu/nvidia"
19
  XORG_BIN="${XORG_BIN:-$(command -v Xorg || true)}"
 
23
 
24
  mkdir -p "${LOG_DIR}"
25
 
26
+ derive_bus_id() {
27
+ local raw_bus_id="${XORG_BUS_ID_RAW:-$(nvidia-smi --query-gpu=pci.bus_id --format=csv,noheader | head -n1 | tr -d ' ')}"
28
+ if [[ -n "${XORG_BUS_ID:-}" ]]; then
29
+ printf '%s\n' "${XORG_BUS_ID}"
30
+ return 0
31
+ fi
32
+ if [[ -z "${raw_bus_id}" ]]; then
33
+ return 1
34
+ fi
35
+ python3 - "${raw_bus_id}" <<'PY'
36
+ import sys
37
+
38
+ raw = sys.argv[1]
39
+ _, bus, device_func = raw.split(":")
40
+ device, function = device_func.split(".")
41
+ bus = int(bus, 16)
42
+ device = int(device, 16)
43
+ function = int(function, 16)
44
+ print(f"PCI:{bus}:{device}:{function}")
45
+ PY
46
+ }
47
+
48
+ write_xorg_config() {
49
+ local bus_id="$1"
50
+ sed -E "s/BusID[[:space:]]+\"[^\"]+\"/BusID \"${bus_id}\"/" "${XORG_TEMPLATE}" > "${XORG_CONF}"
51
+ }
52
+
53
  find_x_pid() {
54
  ps -eo pid=,args= | awk -v display="${DISPLAY_ID}" -v conf="${XORG_CONF}" '
55
  $0 ~ display && $0 ~ conf && $0 ~ /(^|[[:space:]])([^[:space:]]*\/)?Xorg([[:space:]]|$)/ {
 
65
  exit 1
66
  fi
67
 
68
+ if [[ ! -f "${XORG_TEMPLATE}" ]]; then
69
+ echo "missing Xorg template at ${XORG_TEMPLATE}" >&2
70
+ exit 1
71
+ fi
72
+
73
+ BUS_ID="$(derive_bus_id || true)"
74
+ if [[ -z "${BUS_ID}" ]]; then
75
+ echo "failed to determine NVIDIA BusID from nvidia-smi" >&2
76
+ exit 1
77
+ fi
78
+
79
+ write_xorg_config "${BUS_ID}"
80
+
81
  PID="$(find_x_pid || true)"
82
  if [[ -n "${PID}" ]]; then
83
  echo "X already running on ${DISPLAY_ID}"
code/reveal_vla_bimanual/sim_reveal/dataset.py CHANGED
@@ -7,8 +7,33 @@ import torch
7
  from torch import Tensor
8
  from torch.utils.data import Dataset
9
 
 
 
10
  from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def collect_teacher_dataset(
14
  proxy_names: Sequence[str] | None = None,
@@ -17,6 +42,8 @@ def collect_teacher_dataset(
17
  seed: int = 0,
18
  chunk_horizon: int = 8,
19
  rollout_horizon: int = 5,
 
 
20
  ) -> dict[str, Any]:
21
  proxy_names = tuple(proxy_names or available_proxy_names())
22
  samples: list[dict[str, Any]] = []
@@ -32,19 +59,41 @@ def collect_teacher_dataset(
32
  seed=seed + proxy_offset * 10_000 + episode_idx,
33
  rollout_horizon=rollout_horizon,
34
  )
35
- _, privileged_state = env.reset(seed=seed + proxy_offset * 10_000 + episode_idx)
 
36
  while True:
37
  action_chunk, rollout = env.teacher_chunk_and_rollout(
38
  chunk_horizon=chunk_horizon,
39
  rollout_horizon=rollout_horizon,
40
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  samples.append(
42
  {
 
43
  "proxy_name": proxy_name,
44
  "episode_id": episode_idx,
45
  "render_state": env.render_state(privileged_state),
46
- "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
47
- "language_goal": env.get_observation(privileged_state)["text"],
48
  "action_chunk": action_chunk.astype("float32"),
49
  "support_mode": int(privileged_state["support_mode"]),
50
  "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
@@ -55,10 +104,22 @@ def collect_teacher_dataset(
55
  "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
56
  "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
57
  "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
 
 
 
 
 
 
58
  }
59
  )
60
  proxy_samples += 1
61
  _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
 
 
 
 
 
 
62
  if terminated:
63
  proxy_success += 1
64
  if terminated or truncated:
@@ -69,9 +130,12 @@ def collect_teacher_dataset(
69
  "teacher_success": proxy_success / float(max(1, episodes_per_proxy)),
70
  }
71
  return {
 
72
  "resolution": resolution,
73
  "chunk_horizon": chunk_horizon,
74
  "rollout_horizon": rollout_horizon,
 
 
75
  "samples": samples,
76
  "summary": summary,
77
  }
@@ -98,11 +162,29 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
98
 
99
  def __getitem__(self, index: int) -> dict[str, Any]:
100
  sample = self.samples[index]
 
101
  images = render_views_from_state(
102
  proxy_name=sample["proxy_name"],
103
  render_state=sample["render_state"],
104
  resolution=self.resolution,
105
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  stacked = torch.from_numpy(
107
  torch.stack(
108
  [
@@ -113,8 +195,14 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
113
  dim=0,
114
  ).numpy()
115
  ).permute(0, 3, 1, 2).float() / 255.0
 
 
 
 
116
  return {
117
  "images": stacked,
 
 
118
  "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
119
  "texts": sample["language_goal"],
120
  "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
@@ -127,6 +215,17 @@ class RevealOfflineDataset(Dataset[dict[str, Any]]):
127
  "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
128
  "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
129
  "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
 
 
 
 
 
 
 
 
 
 
 
130
  "proxy_name": sample["proxy_name"],
131
  "episode_id": sample["episode_id"],
132
  }
 
7
  from torch import Tensor
8
  from torch.utils.data import Dataset
9
 
10
+ import numpy as np
11
+
12
  from sim_reveal.procedural_envs import available_proxy_names, make_proxy_env, render_views_from_state
13
 
14
+ NOLEAK_PROXY_DATASET_VERSION = "reveal_proxy_v4_noleak_counterfactual"
15
+ LEGACY_PRIVILEGED_RENDER_KEYS = frozenset(
16
+ {
17
+ "target_template",
18
+ "support_mode",
19
+ "visibility",
20
+ "actor_template",
21
+ "actor_progress",
22
+ "corridor_current",
23
+ }
24
+ )
25
+
26
+
27
+ def _assert_noleak_sample(sample: dict[str, Any]) -> None:
28
+ render_state = sample.get("render_state", {})
29
+ leaked_keys = sorted(LEGACY_PRIVILEGED_RENDER_KEYS.intersection(render_state))
30
+ if leaked_keys:
31
+ joined = ", ".join(leaked_keys)
32
+ raise ValueError(
33
+ "Legacy leaked proxy sample detected. Rebuild the dataset with the current "
34
+ f"sim_reveal/procedural_envs.py. Privileged render keys found: {joined}"
35
+ )
36
+
37
 
38
  def collect_teacher_dataset(
39
  proxy_names: Sequence[str] | None = None,
 
42
  seed: int = 0,
43
  chunk_horizon: int = 8,
44
  rollout_horizon: int = 5,
45
+ history_steps: int = 2,
46
+ planner_candidates: int = 4,
47
  ) -> dict[str, Any]:
48
  proxy_names = tuple(proxy_names or available_proxy_names())
49
  samples: list[dict[str, Any]] = []
 
59
  seed=seed + proxy_offset * 10_000 + episode_idx,
60
  rollout_horizon=rollout_horizon,
61
  )
62
+ observation, privileged_state = env.reset(seed=seed + proxy_offset * 10_000 + episode_idx)
63
+ history_buffer: list[dict[str, Any]] = []
64
  while True:
65
  action_chunk, rollout = env.teacher_chunk_and_rollout(
66
  chunk_horizon=chunk_horizon,
67
  rollout_horizon=rollout_horizon,
68
  )
69
+ observation = env.get_observation(privileged_state)
70
+ candidate_action_chunks, candidate_outcomes = env.sample_candidate_action_chunks(
71
+ teacher_chunk=action_chunk,
72
+ num_candidates=planner_candidates,
73
+ rollout_horizon=rollout_horizon,
74
+ )
75
+ padded_history_render_states = []
76
+ padded_history_proprio = []
77
+ history_count = min(history_steps, len(history_buffer))
78
+ pad_count = history_steps - history_count
79
+ if history_count > 0:
80
+ recent_history = history_buffer[-history_count:]
81
+ else:
82
+ recent_history = []
83
+ for _ in range(pad_count):
84
+ padded_history_render_states.append(env.render_state(privileged_state))
85
+ padded_history_proprio.append(np.zeros_like(observation["proprio"], dtype=np.float32))
86
+ for item in recent_history:
87
+ padded_history_render_states.append(item["render_state"])
88
+ padded_history_proprio.append(item["proprio"])
89
  samples.append(
90
  {
91
+ "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
92
  "proxy_name": proxy_name,
93
  "episode_id": episode_idx,
94
  "render_state": env.render_state(privileged_state),
95
+ "proprio": observation["proprio"].astype("float32"),
96
+ "language_goal": observation["text"],
97
  "action_chunk": action_chunk.astype("float32"),
98
  "support_mode": int(privileged_state["support_mode"]),
99
  "corridor_feasible": privileged_state["corridor_feasible"].astype("float32"),
 
104
  "rollout_corridor_feasible": rollout["rollout_corridor_feasible"].astype("float32"),
105
  "rollout_persistence_horizon": rollout["rollout_persistence_horizon"].astype("float32"),
106
  "rollout_disturbance_cost": rollout["rollout_disturbance_cost"].astype("float32"),
107
+ "history_render_states": padded_history_render_states,
108
+ "history_proprio": np.stack(padded_history_proprio, axis=0).astype("float32")
109
+ if padded_history_proprio
110
+ else np.zeros((0, observation["proprio"].shape[0]), dtype=np.float32),
111
+ "candidate_action_chunks": candidate_action_chunks.astype("float32"),
112
+ **candidate_outcomes,
113
  }
114
  )
115
  proxy_samples += 1
116
  _, _, terminated, truncated, privileged_state = env.step(env.teacher_action())
117
+ history_buffer.append(
118
+ {
119
+ "render_state": env.render_state(privileged_state),
120
+ "proprio": env.get_observation(privileged_state)["proprio"].astype("float32"),
121
+ }
122
+ )
123
  if terminated:
124
  proxy_success += 1
125
  if terminated or truncated:
 
130
  "teacher_success": proxy_success / float(max(1, episodes_per_proxy)),
131
  }
132
  return {
133
+ "dataset_version": NOLEAK_PROXY_DATASET_VERSION,
134
  "resolution": resolution,
135
  "chunk_horizon": chunk_horizon,
136
  "rollout_horizon": rollout_horizon,
137
+ "history_steps": history_steps,
138
+ "planner_candidates": planner_candidates,
139
  "samples": samples,
140
  "summary": summary,
141
  }
 
162
 
163
  def __getitem__(self, index: int) -> dict[str, Any]:
164
  sample = self.samples[index]
165
+ _assert_noleak_sample(sample)
166
  images = render_views_from_state(
167
  proxy_name=sample["proxy_name"],
168
  render_state=sample["render_state"],
169
  resolution=self.resolution,
170
  )
171
+ history_images = []
172
+ for history_state in sample.get("history_render_states", []):
173
+ rendered = render_views_from_state(
174
+ proxy_name=sample["proxy_name"],
175
+ render_state=history_state,
176
+ resolution=self.resolution,
177
+ )
178
+ history_images.append(
179
+ torch.stack(
180
+ [
181
+ torch.from_numpy(rendered["front"]),
182
+ torch.from_numpy(rendered["wrist_left"]),
183
+ torch.from_numpy(rendered["wrist_right"]),
184
+ ],
185
+ dim=0,
186
+ )
187
+ )
188
  stacked = torch.from_numpy(
189
  torch.stack(
190
  [
 
195
  dim=0,
196
  ).numpy()
197
  ).permute(0, 3, 1, 2).float() / 255.0
198
+ if history_images:
199
+ history_stacked = torch.stack(history_images, dim=0).permute(0, 1, 4, 2, 3).float() / 255.0
200
+ else:
201
+ history_stacked = torch.zeros((0, 3, 3, self.resolution, self.resolution), dtype=torch.float32)
202
  return {
203
  "images": stacked,
204
+ "history_images": history_stacked,
205
+ "history_proprio": torch.as_tensor(sample.get("history_proprio", []), dtype=torch.float32),
206
  "proprio": torch.as_tensor(sample["proprio"], dtype=torch.float32),
207
  "texts": sample["language_goal"],
208
  "action_chunk": torch.as_tensor(sample["action_chunk"], dtype=torch.float32),
 
215
  "rollout_corridor_feasible": torch.as_tensor(sample["rollout_corridor_feasible"], dtype=torch.float32),
216
  "rollout_persistence_horizon": torch.as_tensor(sample["rollout_persistence_horizon"], dtype=torch.float32),
217
  "rollout_disturbance_cost": torch.as_tensor(sample["rollout_disturbance_cost"], dtype=torch.float32),
218
+ "candidate_action_chunks": torch.as_tensor(sample["candidate_action_chunks"], dtype=torch.float32),
219
+ "candidate_rollout_support_mode": torch.as_tensor(sample["candidate_rollout_support_mode"], dtype=torch.long),
220
+ "candidate_rollout_corridor_feasible": torch.as_tensor(sample["candidate_rollout_corridor_feasible"], dtype=torch.float32),
221
+ "candidate_rollout_persistence_horizon": torch.as_tensor(sample["candidate_rollout_persistence_horizon"], dtype=torch.float32),
222
+ "candidate_rollout_disturbance_cost": torch.as_tensor(sample["candidate_rollout_disturbance_cost"], dtype=torch.float32),
223
+ "candidate_retrieval_success": torch.as_tensor(sample["candidate_retrieval_success"], dtype=torch.float32),
224
+ "candidate_final_disturbance_cost": torch.as_tensor(sample["candidate_final_disturbance_cost"], dtype=torch.float32),
225
+ "candidate_reocclusion_rate": torch.as_tensor(sample["candidate_reocclusion_rate"], dtype=torch.float32),
226
+ "candidate_visibility_integral": torch.as_tensor(sample["candidate_visibility_integral"], dtype=torch.float32),
227
+ "candidate_risk": torch.as_tensor(sample["candidate_risk"], dtype=torch.float32),
228
+ "candidate_utility": torch.as_tensor(sample["candidate_utility"], dtype=torch.float32),
229
  "proxy_name": sample["proxy_name"],
230
  "episode_id": sample["episode_id"],
231
  }
code/reveal_vla_bimanual/sim_reveal/generate_dataset.py CHANGED
@@ -15,6 +15,8 @@ def main() -> None:
15
  parser.add_argument("--seed", type=int, default=0)
16
  parser.add_argument("--chunk-horizon", type=int, default=8)
17
  parser.add_argument("--rollout-horizon", type=int, default=5)
 
 
18
  parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
19
  args = parser.parse_args()
20
 
@@ -25,6 +27,8 @@ def main() -> None:
25
  seed=args.seed,
26
  chunk_horizon=args.chunk_horizon,
27
  rollout_horizon=args.rollout_horizon,
 
 
28
  )
29
  output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
30
  payload = {
 
15
  parser.add_argument("--seed", type=int, default=0)
16
  parser.add_argument("--chunk-horizon", type=int, default=8)
17
  parser.add_argument("--rollout-horizon", type=int, default=5)
18
+ parser.add_argument("--history-steps", type=int, default=2)
19
+ parser.add_argument("--planner-candidates", type=int, default=4)
20
  parser.add_argument("--output-path", default="/workspace/data/reveal_proxy/reveal_proxy_teacher.pt")
21
  args = parser.parse_args()
22
 
 
27
  seed=args.seed,
28
  chunk_horizon=args.chunk_horizon,
29
  rollout_horizon=args.rollout_horizon,
30
+ history_steps=args.history_steps,
31
+ planner_candidates=args.planner_candidates,
32
  )
33
  output_path = save_teacher_dataset(Path(args.output_path), dataset_bundle)
34
  payload = {
code/reveal_vla_bimanual/sim_reveal/procedural_envs.py CHANGED
@@ -136,6 +136,12 @@ class ProceduralRevealEnv:
136
  "disturbance": self.disturbance,
137
  "target_template": self.target_template,
138
  "target_depth": self.target_depth,
 
 
 
 
 
 
139
  "holding": self.holding,
140
  "transferred": self.transferred,
141
  "retrieved": self.retrieved,
@@ -151,6 +157,12 @@ class ProceduralRevealEnv:
151
  self.disturbance = float(state["disturbance"])
152
  self.target_template = int(state["target_template"])
153
  self.target_depth = float(state["target_depth"])
 
 
 
 
 
 
154
  self.holding = bool(state["holding"])
155
  self.transferred = bool(state["transferred"])
156
  self.retrieved = bool(state["retrieved"])
@@ -167,6 +179,13 @@ class ProceduralRevealEnv:
167
  self.disturbance = float(self.rng.uniform(0.02, 0.12))
168
  self.target_template = int(self.rng.integers(4, self.num_templates - 4))
169
  self.target_depth = float(self.rng.uniform(0.15, 0.45))
 
 
 
 
 
 
 
170
  self.holding = False
171
  self.transferred = False
172
  self.retrieved = False
@@ -287,34 +306,25 @@ class ProceduralRevealEnv:
287
  }
288
 
289
  def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
290
- privileged_state = privileged_state or self.get_privileged_state()
291
- current_mode = int(privileged_state["support_mode"])
292
  return {
293
  "opening": float(self.opening),
294
  "disturbance": float(self.disturbance),
295
- "target_template": int(self.target_template),
296
- "support_mode": current_mode,
297
- "visibility": float(privileged_state["visibility"]),
298
- "actor_template": int(self.last_actor_template),
299
- "actor_progress": float(self.actor_progress),
300
- "corridor_current": privileged_state["corridor_feasible"][current_mode].astype(np.float32),
 
301
  "step_fraction": float(self.step_count / max(1, self.max_steps)),
302
  }
303
 
304
  def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
305
- mode = privileged_state["support_mode"]
306
  features = np.zeros((32,), dtype=np.float32)
307
- features[0] = self.opening
308
- features[1] = self.disturbance
309
- features[2] = privileged_state["visibility"]
310
- features[3 + mode] = 1.0
311
- features[6] = self.target_template / float(self.num_templates - 1)
312
- features[7] = self.last_actor_template / float(self.num_templates - 1)
313
- features[8] = self.step_count / float(max(1, self.max_steps))
314
- features[9:12] = privileged_state["persistence_horizon"] / float(self.rollout_horizon)
315
- features[12] = float(privileged_state["corridor_feasible"][mode].any())
316
- features[13] = float(self.retrieved)
317
- features[14] = self.actor_progress
318
  return features
319
 
320
  def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
@@ -331,7 +341,6 @@ class ProceduralRevealEnv:
331
  "proprio": self._proprio(privileged_state),
332
  "text": PROXY_GOALS[self.proxy_name],
333
  "camera_names": self.camera_names,
334
- "render_state": render_state,
335
  }
336
 
337
  def teacher_action(self) -> np.ndarray:
@@ -402,6 +411,105 @@ class ProceduralRevealEnv:
402
  "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
403
  }
404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
406
  action = np.asarray(action, dtype=np.float32)
407
  mode = self._mode_from_action(action)
@@ -483,12 +591,13 @@ def render_views_from_state(
483
  dynamics = PROXY_DYNAMICS[proxy_name]
484
  opening = float(render_state["opening"])
485
  disturbance = float(render_state["disturbance"])
486
- target_template = int(render_state["target_template"])
487
- support_mode = int(render_state["support_mode"])
488
- visibility = float(render_state["visibility"])
489
- actor_template = int(render_state["actor_template"])
490
- actor_progress = float(render_state["actor_progress"])
491
- corridor_current = np.asarray(render_state["corridor_current"], dtype=np.float32)
 
492
  step_fraction = float(render_state["step_fraction"])
493
 
494
  height = width = resolution
@@ -498,44 +607,65 @@ def render_views_from_state(
498
  x = np.linspace(0.0, 1.0, width, dtype=np.float32)
499
  y = np.linspace(0.0, 1.0, height, dtype=np.float32)
500
  yy, xx = np.meshgrid(y, x, indexing="ij")
501
- center_x = target_template / float(max(1, num_templates - 1))
502
- gap_width = 0.04 + 0.18 * opening
503
- gap_mask = np.abs(xx - center_x) <= gap_width
504
- stripe_mask = (np.sin(xx * np.pi * 18.0) > 0.2).astype(np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
505
 
506
  front = base.copy()
507
- front[..., 1] += 0.22 * stripe_mask
508
- front[..., 0] += 0.07 * stripe_mask
509
- front[gap_mask, :] = np.clip(front[gap_mask, :] + np.asarray([0.18, 0.18, 0.18], dtype=np.float32), 0.0, 1.0)
510
- target_mask = ((xx - center_x) ** 2 + (yy - 0.76) ** 2) <= (0.03 + 0.015 * visibility) ** 2
511
- front[target_mask, 0] = np.clip(front[target_mask, 0] + 0.55 * visibility, 0.0, 1.0)
512
- front[target_mask, 1] *= 0.55
513
- front[..., 2] = np.clip(front[..., 2] + 0.18 * disturbance + 0.05 * step_fraction, 0.0, 1.0)
 
 
 
 
514
 
515
  wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
516
- open_rows = int(opening * height)
517
- wrist_left[height - open_rows :, : width // 3, 1] = 0.75
518
- wrist_left[height - int(disturbance * height) :, width // 3 : (2 * width) // 3, 0] = 0.85
519
- mode_colors = {
520
- SupportMode.HOLD: np.asarray([0.92, 0.82, 0.16], dtype=np.float32),
521
- SupportMode.TRANSFER: np.asarray([0.16, 0.78, 0.92], dtype=np.float32),
522
- SupportMode.PASSIVE: np.asarray([0.86, 0.86, 0.86], dtype=np.float32),
523
- }
524
- wrist_left[:, (2 * width) // 3 :, :] = mode_colors[SupportMode(support_mode)]
 
 
525
 
526
  wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
527
- template_edges = np.linspace(0, width, num_templates + 1, dtype=np.int32)
528
- for template_idx in range(num_templates):
529
- col_start = template_edges[template_idx]
530
- col_end = template_edges[template_idx + 1]
531
- if corridor_current[template_idx] > 0.5:
532
- wrist_right[:, col_start:col_end, 1] = 0.70
533
- if template_idx == target_template:
534
- wrist_right[:, col_start:col_end, 0] = 0.78
535
- if template_idx == actor_template:
536
- wrist_right[:, col_start:col_end, 2] = 0.90
537
- wrist_right[: max(1, int(visibility * height)), :, :] += 0.10
538
- wrist_right[height - max(1, int(actor_progress * height)) :, :, 2] += 0.12
 
 
539
  wrist_right = np.clip(wrist_right, 0.0, 1.0)
540
 
541
  return {
 
136
  "disturbance": self.disturbance,
137
  "target_template": self.target_template,
138
  "target_depth": self.target_depth,
139
+ "target_center": self.target_center,
140
+ "target_radius": self.target_radius,
141
+ "texture_phase": self.texture_phase,
142
+ "texture_scale": self.texture_scale,
143
+ "view_bias": self.view_bias,
144
+ "target_intensity": self.target_intensity,
145
  "holding": self.holding,
146
  "transferred": self.transferred,
147
  "retrieved": self.retrieved,
 
157
  self.disturbance = float(state["disturbance"])
158
  self.target_template = int(state["target_template"])
159
  self.target_depth = float(state["target_depth"])
160
+ self.target_center = float(state["target_center"])
161
+ self.target_radius = float(state["target_radius"])
162
+ self.texture_phase = float(state["texture_phase"])
163
+ self.texture_scale = float(state["texture_scale"])
164
+ self.view_bias = float(state["view_bias"])
165
+ self.target_intensity = float(state["target_intensity"])
166
  self.holding = bool(state["holding"])
167
  self.transferred = bool(state["transferred"])
168
  self.retrieved = bool(state["retrieved"])
 
179
  self.disturbance = float(self.rng.uniform(0.02, 0.12))
180
  self.target_template = int(self.rng.integers(4, self.num_templates - 4))
181
  self.target_depth = float(self.rng.uniform(0.15, 0.45))
182
+ base_center = self.target_template / float(max(1, self.num_templates - 1))
183
+ self.target_center = float(np.clip(base_center + self.rng.uniform(-0.01, 0.01), 0.06, 0.94))
184
+ self.target_radius = float(self.rng.uniform(0.022, 0.036))
185
+ self.texture_phase = float(self.rng.uniform(0.0, 2.0 * np.pi))
186
+ self.texture_scale = float(self.rng.uniform(0.85, 1.25))
187
+ self.view_bias = float(self.rng.uniform(-0.12, 0.12))
188
+ self.target_intensity = float(self.rng.uniform(0.45, 0.8))
189
  self.holding = False
190
  self.transferred = False
191
  self.retrieved = False
 
306
  }
307
 
308
  def render_state(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
 
 
309
  return {
310
  "opening": float(self.opening),
311
  "disturbance": float(self.disturbance),
312
+ "target_center": float(self.target_center),
313
+ "target_depth": float(self.target_depth),
314
+ "target_radius": float(self.target_radius),
315
+ "texture_phase": float(self.texture_phase),
316
+ "texture_scale": float(self.texture_scale),
317
+ "view_bias": float(self.view_bias),
318
+ "target_intensity": float(self.target_intensity),
319
  "step_fraction": float(self.step_count / max(1, self.max_steps)),
320
  }
321
 
322
  def _proprio(self, privileged_state: dict[str, Any]) -> np.ndarray:
 
323
  features = np.zeros((32,), dtype=np.float32)
324
+ step_fraction = self.step_count / float(max(1, self.max_steps))
325
+ features[0] = step_fraction
326
+ features[1] = np.sin(np.pi * step_fraction)
327
+ features[2] = np.cos(np.pi * step_fraction)
 
 
 
 
 
 
 
328
  return features
329
 
330
  def get_observation(self, privileged_state: dict[str, Any] | None = None) -> dict[str, Any]:
 
341
  "proprio": self._proprio(privileged_state),
342
  "text": PROXY_GOALS[self.proxy_name],
343
  "camera_names": self.camera_names,
 
344
  }
345
 
346
  def teacher_action(self) -> np.ndarray:
 
411
  "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
412
  }
413
 
414
+ def evaluate_action_chunk(
415
+ self,
416
+ action_chunk: np.ndarray,
417
+ rollout_horizon: int | None = None,
418
+ ) -> dict[str, np.ndarray | float]:
419
+ rollout_horizon = rollout_horizon or self.rollout_horizon
420
+ snapshot = self.clone_state()
421
+ rollout_support_mode: list[int] = []
422
+ rollout_corridor: list[np.ndarray] = []
423
+ rollout_persistence: list[np.ndarray] = []
424
+ rollout_disturbance: list[float] = []
425
+ corridor_open_trace = [float(self.get_privileged_state()["corridor_feasible"][self._current_support_mode()].any())]
426
+ visibility_trace = [float(self.get_privileged_state()["visibility"])]
427
+ terminated = False
428
+ truncated = False
429
+ privileged_state = self.get_privileged_state()
430
+ for step, action in enumerate(np.asarray(action_chunk, dtype=np.float32)):
431
+ _, _, terminated, truncated, privileged_state = self.step(action)
432
+ if step < rollout_horizon:
433
+ rollout_support_mode.append(int(privileged_state["support_mode"]))
434
+ rollout_corridor.append(privileged_state["corridor_feasible"].astype(np.float32))
435
+ rollout_persistence.append(privileged_state["persistence_horizon"].astype(np.float32))
436
+ rollout_disturbance.append(float(privileged_state["disturbance_cost"]))
437
+ corridor_open_trace.append(float(privileged_state["corridor_feasible"][privileged_state["support_mode"]].any()))
438
+ visibility_trace.append(float(privileged_state["visibility"]))
439
+ if terminated or truncated:
440
+ break
441
+ while len(rollout_support_mode) < rollout_horizon:
442
+ current = self.get_privileged_state()
443
+ rollout_support_mode.append(int(current["support_mode"]))
444
+ rollout_corridor.append(current["corridor_feasible"].astype(np.float32))
445
+ rollout_persistence.append(current["persistence_horizon"].astype(np.float32))
446
+ rollout_disturbance.append(float(current["disturbance_cost"]))
447
+ final_state = self.get_privileged_state()
448
+ reocclusion = float(
449
+ np.logical_and(
450
+ np.asarray(corridor_open_trace[:-1]) > 0.5,
451
+ np.asarray(corridor_open_trace[1:]) <= 0.5,
452
+ ).mean()
453
+ ) if len(corridor_open_trace) > 1 else 0.0
454
+ result: dict[str, np.ndarray | float] = {
455
+ "rollout_support_mode": np.asarray(rollout_support_mode, dtype=np.int64),
456
+ "rollout_corridor_feasible": np.asarray(rollout_corridor, dtype=np.float32),
457
+ "rollout_persistence_horizon": np.asarray(rollout_persistence, dtype=np.float32),
458
+ "rollout_disturbance_cost": np.asarray(rollout_disturbance, dtype=np.float32),
459
+ "retrieval_success": float(final_state["retrieval_success"]),
460
+ "final_disturbance_cost": float(final_state["disturbance_cost"]),
461
+ "reocclusion_rate": reocclusion,
462
+ "visibility_integral": float(np.sum(np.asarray(visibility_trace, dtype=np.float32))),
463
+ }
464
+ self.restore_state(snapshot)
465
+ return result
466
+
467
+ def sample_candidate_action_chunks(
468
+ self,
469
+ teacher_chunk: np.ndarray,
470
+ num_candidates: int = 4,
471
+ rollout_horizon: int | None = None,
472
+ ) -> tuple[np.ndarray, dict[str, np.ndarray]]:
473
+ rollout_horizon = rollout_horizon or self.rollout_horizon
474
+ teacher_chunk = np.asarray(teacher_chunk, dtype=np.float32)
475
+ candidates = [teacher_chunk.astype(np.float32)]
476
+ outcomes = [self.evaluate_action_chunk(teacher_chunk, rollout_horizon=rollout_horizon)]
477
+ for candidate_idx in range(1, num_candidates):
478
+ candidate = teacher_chunk.copy()
479
+ revealer_noise = self.rng.normal(loc=0.0, scale=0.20 + 0.03 * candidate_idx, size=candidate[:, :7].shape)
480
+ actor_noise = self.rng.normal(loc=0.0, scale=0.18 + 0.04 * candidate_idx, size=candidate[:, 7:].shape)
481
+ candidate[:, :7] = np.clip(candidate[:, :7] + revealer_noise.astype(np.float32), -1.0, 1.0)
482
+ candidate[:, 7:] = np.clip(candidate[:, 7:] + actor_noise.astype(np.float32), -1.0, 1.0)
483
+ candidates.append(candidate.astype(np.float32))
484
+ outcomes.append(self.evaluate_action_chunk(candidate, rollout_horizon=rollout_horizon))
485
+ stacked_outcomes = {
486
+ "candidate_rollout_support_mode": np.stack([item["rollout_support_mode"] for item in outcomes], axis=0).astype(np.int64),
487
+ "candidate_rollout_corridor_feasible": np.stack(
488
+ [item["rollout_corridor_feasible"] for item in outcomes], axis=0
489
+ ).astype(np.float32),
490
+ "candidate_rollout_persistence_horizon": np.stack(
491
+ [item["rollout_persistence_horizon"] for item in outcomes], axis=0
492
+ ).astype(np.float32),
493
+ "candidate_rollout_disturbance_cost": np.stack(
494
+ [item["rollout_disturbance_cost"] for item in outcomes], axis=0
495
+ ).astype(np.float32),
496
+ "candidate_retrieval_success": np.asarray([item["retrieval_success"] for item in outcomes], dtype=np.float32),
497
+ "candidate_final_disturbance_cost": np.asarray(
498
+ [item["final_disturbance_cost"] for item in outcomes], dtype=np.float32
499
+ ),
500
+ "candidate_reocclusion_rate": np.asarray([item["reocclusion_rate"] for item in outcomes], dtype=np.float32),
501
+ "candidate_visibility_integral": np.asarray([item["visibility_integral"] for item in outcomes], dtype=np.float32),
502
+ }
503
+ stacked_outcomes["candidate_risk"] = np.clip(
504
+ stacked_outcomes["candidate_final_disturbance_cost"] + stacked_outcomes["candidate_reocclusion_rate"],
505
+ 0.0,
506
+ 1.0,
507
+ ).astype(np.float32)
508
+ stacked_outcomes["candidate_utility"] = (
509
+ stacked_outcomes["candidate_retrieval_success"] - stacked_outcomes["candidate_risk"]
510
+ ).astype(np.float32)
511
+ return np.stack(candidates, axis=0).astype(np.float32), stacked_outcomes
512
+
513
  def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
514
  action = np.asarray(action, dtype=np.float32)
515
  mode = self._mode_from_action(action)
 
591
  dynamics = PROXY_DYNAMICS[proxy_name]
592
  opening = float(render_state["opening"])
593
  disturbance = float(render_state["disturbance"])
594
+ target_center = float(render_state["target_center"])
595
+ target_depth = float(render_state["target_depth"])
596
+ target_radius = float(render_state["target_radius"])
597
+ texture_phase = float(render_state["texture_phase"])
598
+ texture_scale = float(render_state["texture_scale"])
599
+ view_bias = float(render_state["view_bias"])
600
+ target_intensity = float(render_state["target_intensity"])
601
  step_fraction = float(render_state["step_fraction"])
602
 
603
  height = width = resolution
 
607
  x = np.linspace(0.0, 1.0, width, dtype=np.float32)
608
  y = np.linspace(0.0, 1.0, height, dtype=np.float32)
609
  yy, xx = np.meshgrid(y, x, indexing="ij")
610
+ visibility = np.clip(
611
+ 1.25 * opening - 0.68 * disturbance - 0.24 * target_depth + dynamics.visibility_bias,
612
+ 0.0,
613
+ 1.0,
614
+ )
615
+ target_y = 0.74 - 0.22 * target_depth
616
+ gap_width = np.clip(0.05 + 0.16 * opening - 0.08 * disturbance, 0.02, 0.24)
617
+ front_center = np.clip(target_center + 0.03 * view_bias, 0.06, 0.94)
618
+ left_center = np.clip(0.34 + 0.12 * (target_center - 0.5) - 0.05 * view_bias, 0.18, 0.52)
619
+ right_center = np.clip(0.66 + 0.18 * (target_center - 0.5) + 0.06 * view_bias, 0.42, 0.88)
620
+ surface_wave = 0.5 + 0.5 * np.sin((xx * (14.0 * texture_scale) + yy * 7.0) * np.pi + texture_phase)
621
+ weave_wave = 0.5 + 0.5 * np.cos((xx * 6.0 - yy * (10.0 + 2.0 * texture_scale)) * np.pi - 0.6 * texture_phase)
622
+ clutter = 0.65 * surface_wave + 0.35 * weave_wave
623
+ disturbance_map = disturbance * (
624
+ 0.55 + 0.45 * np.sin((xx * 9.0 + yy * (12.0 + texture_scale)) * np.pi + 1.3 * texture_phase)
625
+ )
626
+ target_mask = ((xx - front_center) ** 2 + ((yy - target_y) / 1.2) ** 2) <= target_radius**2
627
 
628
  front = base.copy()
629
+ front *= (0.82 + 0.24 * clutter[..., None]).astype(np.float32)
630
+ occluder_profile = np.abs(xx - front_center) / gap_width + 0.55 * np.abs(yy - (0.56 + 0.08 * view_bias))
631
+ gap_mask = occluder_profile <= (1.15 + 0.35 * opening)
632
+ front[gap_mask] = np.clip(front[gap_mask] + np.asarray([0.14, 0.16, 0.14], dtype=np.float32), 0.0, 1.0)
633
+ target_rgb = np.asarray([0.78, 0.74, 0.58], dtype=np.float32) * target_intensity
634
+ front[target_mask] = np.clip(
635
+ front[target_mask] * (1.0 - 0.45 * visibility) + target_rgb * (0.25 + 0.75 * visibility),
636
+ 0.0,
637
+ 1.0,
638
+ )
639
+ front[..., 2] = np.clip(front[..., 2] + 0.12 * disturbance_map + 0.04 * step_fraction, 0.0, 1.0)
640
 
641
  wrist_left = np.full((height, width, 3), 0.12, dtype=np.float32)
642
+ wrist_left *= (0.8 + 0.18 * clutter[..., None]).astype(np.float32)
643
+ left_slit_width = np.clip(0.04 + 0.18 * opening - 0.10 * disturbance, 0.015, 0.22)
644
+ left_profile = ((xx - left_center) / left_slit_width) ** 2 + ((yy - 0.58) / (0.40 + 0.10 * opening)) ** 2
645
+ left_open = left_profile <= 1.0
646
+ wrist_left[left_open] = np.clip(wrist_left[left_open] + np.asarray([0.08, 0.22, 0.12], dtype=np.float32), 0.0, 1.0)
647
+ wrist_left[..., 0] = np.clip(wrist_left[..., 0] + 0.18 * disturbance_map, 0.0, 1.0)
648
+ wrist_left[target_mask] = np.clip(
649
+ wrist_left[target_mask] * (1.0 - 0.35 * visibility) + target_rgb * (0.18 + 0.52 * visibility),
650
+ 0.0,
651
+ 1.0,
652
+ )
653
 
654
  wrist_right = np.full((height, width, 3), 0.08, dtype=np.float32)
655
+ wrist_right *= (0.78 + 0.22 * clutter[..., None]).astype(np.float32)
656
+ right_band = np.exp(-((xx - right_center) ** 2) / max(1e-4, (0.06 + gap_width) ** 2))
657
+ right_clear = np.exp(-((yy - (0.52 - 0.12 * target_depth)) ** 2) / max(1e-4, (0.12 + 0.18 * opening) ** 2))
658
+ wrist_right[..., 1] = np.clip(
659
+ wrist_right[..., 1] + 0.28 * visibility * right_band * right_clear - 0.10 * disturbance_map,
660
+ 0.0,
661
+ 1.0,
662
+ )
663
+ wrist_right[target_mask] = np.clip(
664
+ wrist_right[target_mask] * (1.0 - 0.40 * visibility) + target_rgb * (0.22 + 0.60 * visibility),
665
+ 0.0,
666
+ 1.0,
667
+ )
668
+ wrist_right[..., 2] = np.clip(wrist_right[..., 2] + 0.08 * step_fraction + 0.06 * right_band, 0.0, 1.0)
669
  wrist_right = np.clip(wrist_right, 0.0, 1.0)
670
 
671
  return {