lsnu commited on
Commit
430a41a
·
verified ·
1 Parent(s): 8c435a3

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json +15 -0
  2. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md +13 -0
  3. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml +149 -0
  4. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json +16 -0
  5. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json +230 -0
  6. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json +557 -0
  7. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json +15 -0
  8. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md +13 -0
  9. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml +149 -0
  10. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json +16 -0
  11. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json +230 -0
  12. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json +557 -0
  13. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json +15 -0
  14. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md +13 -0
  15. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml +149 -0
  16. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json +16 -0
  17. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json +230 -0
  18. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json +557 -0
  19. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json +15 -0
  20. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md +13 -0
  21. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json +15 -0
  22. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md +13 -0
  23. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
  24. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
  25. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml +147 -0
  26. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json +16 -0
  27. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json +230 -0
  28. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json +14 -0
  29. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json +15 -0
  30. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md +13 -0
  31. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json +15 -0
  32. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md +13 -0
  33. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
  34. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
  35. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml +147 -0
  36. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json +16 -0
  37. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json +230 -0
  38. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json +14 -0
  39. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json +15 -0
  40. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md +13 -0
  41. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json +15 -0
  42. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md +13 -0
  43. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json +15 -0
  44. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md +13 -0
  45. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml +147 -0
  46. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json +16 -0
  47. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json +230 -0
  48. artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json +14 -0
  49. artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json +15 -0
  50. artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md +13 -0
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.375,
5
+ "bag_proxy": 0.4583333333333333,
6
+ "cloth_proxy": 0.5833333333333334
7
+ },
8
+ "mean_success": 0.47222222222222215,
9
+ "visibility_integral": 37.36026926173104,
10
+ "corridor_availability": 0.8730104863643646,
11
+ "reocclusion_rate": 0.04405864197530864,
12
+ "persistence_horizon_mae": 1.033145775666108,
13
+ "disturbance_cost": 0.3228136783000082
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt
5
+ - mean_success: 0.472
6
+ - visibility_integral: 37.360
7
+ - corridor_availability: 0.873
8
+ - reocclusion_rate: 0.044
9
+ - persistence_horizon_mae: 1.033
10
+ - disturbance_cost: 0.323
11
+ - foliage_proxy_success: 0.375
12
+ - bag_proxy_success: 0.458
13
+ - cloth_proxy_success: 0.583
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/config_resolved.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_clip_seed7
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 7
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
14
+ train_episodes_per_proxy: 48
15
+ val_episodes_per_proxy: 16
16
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed7.pt
17
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed7.pt
18
+ rebuild_dataset: false
19
+ chunk_horizon: 8
20
+ rollout_horizon: 5
21
+ history_steps: 6
22
+ planner_candidates: 8
23
+ seed: 7
24
+ optim:
25
+ epochs: 4
26
+ batch_size: 2
27
+ num_workers: 4
28
+ lr: 0.0003
29
+ weight_decay: 0.0001
30
+ trainer:
31
+ policy_type: elastic_reveal
32
+ use_bf16: true
33
+ grad_clip_norm: 1.0
34
+ freeze_backbone: true
35
+ gradient_checkpointing: false
36
+ plan_during_train: true
37
+ plan_during_eval: true
38
+ support_mode_conditioning: true
39
+ planner_mode: trainable
40
+ use_depth: false
41
+ use_world_model: true
42
+ use_role_tokens: true
43
+ compute_equivariance_probe: true
44
+ policy:
45
+ backbone:
46
+ model_name: openai/clip-vit-base-patch32
47
+ hidden_dim: 512
48
+ max_text_tokens: 32
49
+ freeze_backbone: true
50
+ gradient_checkpointing: false
51
+ use_dummy_backbone: false
52
+ fusion:
53
+ hidden_dim: 512
54
+ num_cameras: 3
55
+ num_layers: 4
56
+ num_heads: 8
57
+ ff_dim: 2048
58
+ dropout: 0.1
59
+ proprio_dim: 32
60
+ proprio_tokens: 1
61
+ memory:
62
+ hidden_dim: 512
63
+ action_dim: 14
64
+ history_steps: 6
65
+ scene_history_steps: 3
66
+ belief_history_steps: 8
67
+ num_layers: 2
68
+ dropout: 0.1
69
+ memory_bank_size: 4
70
+ scene_bank_size: 2
71
+ belief_bank_size: 2
72
+ num_heads: 8
73
+ max_history_steps: 8
74
+ decoder:
75
+ hidden_dim: 512
76
+ num_heads: 8
77
+ num_layers: 4
78
+ ff_dim: 2048
79
+ dropout: 0.1
80
+ chunk_size: 8
81
+ action_dim: 14
82
+ arm_action_dim: 7
83
+ num_candidates: 8
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_proposal_modes: 6
87
+ planner_top_k: 4
88
+ reveal_head:
89
+ hidden_dim: 512
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ belief_map_size: 32
94
+ field_size: 16
95
+ num_heads: 8
96
+ predict_belief_map: true
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ world_model:
101
+ hidden_dim: 512
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ num_approach_templates: 32
105
+ rollout_horizon: 5
106
+ field_size: 16
107
+ num_heads: 8
108
+ num_phases: 5
109
+ num_arm_roles: 4
110
+ num_interaction_tokens: 8
111
+ belief_map_size: 32
112
+ predict_belief_map: true
113
+ scene_bank_size: 2
114
+ belief_bank_size: 2
115
+ planner:
116
+ hidden_dim: 512
117
+ num_candidates: 8
118
+ action_dim: 14
119
+ num_support_modes: 3
120
+ utility_margin: 0.1
121
+ num_heads: 8
122
+ num_layers: 2
123
+ num_phases: 5
124
+ num_arm_roles: 4
125
+ top_k: 4
126
+ loss_weights:
127
+ action: 1.0
128
+ phase: 0.1
129
+ arm_role: 0.15
130
+ support_mode: 0.1
131
+ corridor: 0.15
132
+ persistence: 0.05
133
+ disturbance: 0.05
134
+ world_model: 0.2
135
+ belief: 0.05
136
+ visibility: 0.05
137
+ clearance: 0.05
138
+ support_stability: 0.05
139
+ reocclusion: 0.05
140
+ occluder_contact: 0.05
141
+ grasp_affordance: 0.05
142
+ planner_success: 0.25
143
+ planner_risk: 0.1
144
+ planner_ranking: 0.2
145
+ proposal_reconstruction: 0.1
146
+ proposal_success: 0.15
147
+ proposal_ranking: 0.2
148
+ proposal_diversity: 0.05
149
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.25396825396825395,
3
+ "planner_regret": 0.024764427915215492,
4
+ "planner_score_utility_spearman": 0.1904761791229248,
5
+ "risk_calibration_mse": 0.010364258661866188,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.022177213802933693,
8
+ "left_right_equivariance_error": 0.0002942846322184778,
9
+ "belief_calibration_brier": 0.003581121563911438,
10
+ "reocclusion_calibration_brier": 0.23373088240623474,
11
+ "support_stability_mae": 0.022998232394456863,
12
+ "clearance_auc": 0.8989269585276155,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.41934600472450256,
15
+ "num_samples": 126
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.025519870977400175,
6
+ "arm_role": 0.03451829462151253,
7
+ "belief": 0.11532339149432656,
8
+ "clearance": 0.09198410963122758,
9
+ "corridor": 0.27232400180664673,
10
+ "disturbance": 0.005858588227789626,
11
+ "grasp_affordance": 0.018751464233153464,
12
+ "occluder_contact": 0.21359099159065967,
13
+ "persistence": 5.231568055785678,
14
+ "phase": 0.7372311896678665,
15
+ "planner_ranking": 0.1646315749647481,
16
+ "planner_risk": 0.014348083711473067,
17
+ "planner_success": 0.6091769787029446,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.253575401780493,
20
+ "proposal_reconstruction": 0.067724266230904,
21
+ "proposal_success": 0.6851897648491785,
22
+ "reocclusion": 0.7031442959895309,
23
+ "role_swap_consistency": 0.00044027801038677857,
24
+ "support_mode": 0.7282283443430956,
25
+ "support_stability": 0.15459337279551627,
26
+ "total": 1.6319934494832424,
27
+ "uncertainty": 0.013496716971069097,
28
+ "visibility": 0.11563199924314833,
29
+ "world_model": 2.671503098223222
30
+ },
31
+ "val": {
32
+ "action": 0.020692157455616526,
33
+ "arm_role": 9.546122843554865e-05,
34
+ "belief": 0.09874132736807778,
35
+ "clearance": 0.08244451738539196,
36
+ "corridor": 0.2306106292775699,
37
+ "disturbance": 0.006118982125097694,
38
+ "grasp_affordance": 0.009981726739732992,
39
+ "occluder_contact": 0.19720953915800368,
40
+ "persistence": 3.8672617465730696,
41
+ "phase": 0.668701058815396,
42
+ "planner_ranking": 0.03794538755975072,
43
+ "planner_risk": 0.009814016923349026,
44
+ "planner_success": 0.5628143776030767,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.1249213124078417,
47
+ "proposal_reconstruction": 0.06329423224642164,
48
+ "proposal_success": 0.6747160203873165,
49
+ "reocclusion": 0.692203164100647,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.6680677216204386,
52
+ "support_stability": 0.1511912994411966,
53
+ "total": 1.358805573175824,
54
+ "uncertainty": 0.003482046378185115,
55
+ "visibility": 0.10417925601913816,
56
+ "world_model": 2.1376701915074907
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.02150821143575988,
63
+ "arm_role": 1.9482293054071397e-05,
64
+ "belief": 0.09863162136280725,
65
+ "clearance": 0.08064276829400924,
66
+ "corridor": 0.24359133383210416,
67
+ "disturbance": 0.002735878452234476,
68
+ "grasp_affordance": 0.009349104797184779,
69
+ "occluder_contact": 0.1937003313558888,
70
+ "persistence": 4.076787073262699,
71
+ "phase": 0.6966290698625655,
72
+ "planner_ranking": 0.04271617977273956,
73
+ "planner_risk": 0.010049402082938681,
74
+ "planner_success": 0.5399472568359674,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.1569982820156357,
77
+ "proposal_reconstruction": 0.06389496966962414,
78
+ "proposal_success": 0.6711133328407847,
79
+ "reocclusion": 0.6940537130957498,
80
+ "role_swap_consistency": 0.00022550253765151655,
81
+ "support_mode": 0.6837139029777487,
82
+ "support_stability": 0.14029162690160474,
83
+ "total": 1.3837347957476271,
84
+ "uncertainty": 0.0016494125736687157,
85
+ "visibility": 0.09400421737922424,
86
+ "world_model": 2.175609592991974
87
+ },
88
+ "val": {
89
+ "action": 0.020051477757829523,
90
+ "arm_role": 2.626385377793451e-06,
91
+ "belief": 0.09183884199176516,
92
+ "clearance": 0.07657587877105153,
93
+ "corridor": 0.22728621321065084,
94
+ "disturbance": 0.0016498260886850951,
95
+ "grasp_affordance": 0.009590831518705403,
96
+ "occluder_contact": 0.1917984854607355,
97
+ "persistence": 3.699212070495363,
98
+ "phase": 0.6689459842348856,
99
+ "planner_ranking": 0.03331218510795715,
100
+ "planner_risk": 0.010092773325076061,
101
+ "planner_success": 0.5014436940352122,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.1606994933552213,
104
+ "proposal_reconstruction": 0.062439400820978104,
105
+ "proposal_success": 0.675733851061927,
106
+ "reocclusion": 0.6921006942552234,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 0.6564426545112853,
109
+ "support_stability": 0.14099458102432508,
110
+ "total": 1.313369631767273,
111
+ "uncertainty": 0.0024020517326240973,
112
+ "visibility": 0.08723713226971172,
113
+ "world_model": 2.0216772158940635
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.018980447901412845,
120
+ "arm_role": 2.3090714559505124e-05,
121
+ "belief": 0.1100015923263827,
122
+ "clearance": 0.0791148773262872,
123
+ "corridor": 0.23030528037001852,
124
+ "disturbance": 0.002447301701405857,
125
+ "grasp_affordance": 0.009001106255400087,
126
+ "occluder_contact": 0.21010415864552504,
127
+ "persistence": 2.0494745795430753,
128
+ "phase": 0.459073231482381,
129
+ "planner_ranking": 0.036845811475892686,
130
+ "planner_risk": 0.011261017404920885,
131
+ "planner_success": 0.5133467099741491,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1499755538570944,
134
+ "proposal_reconstruction": 0.062038555780318395,
135
+ "proposal_success": 0.6672172468370168,
136
+ "reocclusion": 0.41151915600825667,
137
+ "role_swap_consistency": 0.0007739521978125561,
138
+ "support_mode": 0.38595684411013936,
139
+ "support_stability": 0.1425538511912665,
140
+ "total": 1.1811942648513154,
141
+ "uncertainty": 0.000767841034371724,
142
+ "visibility": 0.10209987125315591,
143
+ "world_model": 2.070929214904446
144
+ },
145
+ "val": {
146
+ "action": 0.0138629823627453,
147
+ "arm_role": 0.002011558223822855,
148
+ "belief": 0.10340341582657799,
149
+ "clearance": 0.0855481999497565,
150
+ "corridor": 0.2235906974427284,
151
+ "disturbance": 0.0011637268657111797,
152
+ "grasp_affordance": 0.010592727485807642,
153
+ "occluder_contact": 0.20843842601965343,
154
+ "persistence": 1.1762515253254346,
155
+ "phase": 0.3442955078771486,
156
+ "planner_ranking": 0.03461442932137519,
157
+ "planner_risk": 0.01165175854065825,
158
+ "planner_success": 0.45808544967855724,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.3026971003365895,
161
+ "proposal_reconstruction": 0.05888378312663427,
162
+ "proposal_success": 0.7430036550476438,
163
+ "reocclusion": 0.2871374910076459,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 0.22473623181900215,
166
+ "support_stability": 0.1320991822414928,
167
+ "total": 1.1099917330439129,
168
+ "uncertainty": 0.0005805234163528352,
169
+ "visibility": 0.09557991185122067,
170
+ "world_model": 1.9994045325687952
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.014569098466314883,
177
+ "arm_role": 4.4951576212937916e-05,
178
+ "belief": 0.09620984569582015,
179
+ "clearance": 0.07538617284315106,
180
+ "corridor": 0.21248489566188775,
181
+ "disturbance": 0.0016758848629270635,
182
+ "grasp_affordance": 0.008272631588777167,
183
+ "occluder_contact": 0.19746327033529731,
184
+ "persistence": 1.1089699098374644,
185
+ "phase": 0.3716845961765469,
186
+ "planner_ranking": 0.03254403228879829,
187
+ "planner_risk": 0.010248634800575772,
188
+ "planner_success": 0.47941413580279074,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.153262345578658,
191
+ "proposal_reconstruction": 0.05860933205064055,
192
+ "proposal_success": 0.6466394141706496,
193
+ "reocclusion": 0.2566672772173989,
194
+ "role_swap_consistency": 0.0010398222479868085,
195
+ "support_mode": 0.21815690070546734,
196
+ "support_stability": 0.13650912478449145,
197
+ "total": 1.0633102330861914,
198
+ "uncertainty": 0.0002461711761398012,
199
+ "visibility": 0.09588275449984361,
200
+ "world_model": 1.9903733518111144
201
+ },
202
+ "val": {
203
+ "action": 0.01619998768474611,
204
+ "arm_role": 3.844006559777174e-06,
205
+ "belief": 0.09427393618084136,
206
+ "clearance": 0.07296533326780985,
207
+ "corridor": 0.2100035525148823,
208
+ "disturbance": 0.0013519242122204862,
209
+ "grasp_affordance": 0.007646961093303703,
210
+ "occluder_contact": 0.1950870676646157,
211
+ "persistence": 1.3894045449024628,
212
+ "phase": 0.6804814789192899,
213
+ "planner_ranking": 0.027768202883649677,
214
+ "planner_risk": 0.010219628483081044,
215
+ "planner_success": 0.4819766197885786,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1241777983922807,
218
+ "proposal_reconstruction": 0.060782825840370994,
219
+ "proposal_success": 0.6369421221907177,
220
+ "reocclusion": 0.27461627113913734,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 0.08716485598531093,
223
+ "support_stability": 0.13245442648610425,
224
+ "total": 1.0629130696493483,
225
+ "uncertainty": 8.45672577761145e-05,
226
+ "visibility": 0.1013997554306,
227
+ "world_model": 1.8573077273747278
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/summary.json ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_clip_seed7",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed7/checkpoint_best.pt",
5
+ "final_train_total": 1.0633102330861914,
6
+ "final_val_total": 1.0629130696493483,
7
+ "train_time_sec": 174.85308933258057,
8
+ "peak_gpu_memory_mb": 1919.8251953125,
9
+ "num_train_samples": 382,
10
+ "num_val_samples": 126,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": {
14
+ "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
15
+ "loaded_keys": 461,
16
+ "skipped_shape_mismatch_keys": [
17
+ "memory.gru.weight_ih_l0",
18
+ "memory.gru.weight_hh_l0",
19
+ "memory.gru.bias_ih_l0",
20
+ "memory.gru.bias_hh_l0",
21
+ "memory.token_proj.0.weight",
22
+ "memory.token_proj.0.bias",
23
+ "memory.token_proj.1.weight",
24
+ "memory.token_proj.1.bias",
25
+ "decoder.actor_role_bias",
26
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
27
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
28
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
29
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
30
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
31
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
32
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
33
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
34
+ "decoder.revealer_decoder.layers.0.linear1.weight",
35
+ "decoder.revealer_decoder.layers.0.linear1.bias",
36
+ "decoder.revealer_decoder.layers.0.linear2.weight",
37
+ "decoder.revealer_decoder.layers.0.linear2.bias",
38
+ "decoder.revealer_decoder.layers.0.norm1.weight",
39
+ "decoder.revealer_decoder.layers.0.norm1.bias",
40
+ "decoder.revealer_decoder.layers.0.norm2.weight",
41
+ "decoder.revealer_decoder.layers.0.norm2.bias",
42
+ "decoder.revealer_decoder.layers.0.norm3.weight",
43
+ "decoder.revealer_decoder.layers.0.norm3.bias",
44
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
45
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
46
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
47
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
48
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
49
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
50
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
51
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
52
+ "decoder.revealer_decoder.layers.1.linear1.weight",
53
+ "decoder.revealer_decoder.layers.1.linear1.bias",
54
+ "decoder.revealer_decoder.layers.1.linear2.weight",
55
+ "decoder.revealer_decoder.layers.1.linear2.bias",
56
+ "decoder.revealer_decoder.layers.1.norm1.weight",
57
+ "decoder.revealer_decoder.layers.1.norm1.bias",
58
+ "decoder.revealer_decoder.layers.1.norm2.weight",
59
+ "decoder.revealer_decoder.layers.1.norm2.bias",
60
+ "decoder.revealer_decoder.layers.1.norm3.weight",
61
+ "decoder.revealer_decoder.layers.1.norm3.bias",
62
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
63
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
64
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
65
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
66
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
67
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
68
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
69
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
70
+ "decoder.revealer_decoder.layers.2.linear1.weight",
71
+ "decoder.revealer_decoder.layers.2.linear1.bias",
72
+ "decoder.revealer_decoder.layers.2.linear2.weight",
73
+ "decoder.revealer_decoder.layers.2.linear2.bias",
74
+ "decoder.revealer_decoder.layers.2.norm1.weight",
75
+ "decoder.revealer_decoder.layers.2.norm1.bias",
76
+ "decoder.revealer_decoder.layers.2.norm2.weight",
77
+ "decoder.revealer_decoder.layers.2.norm2.bias",
78
+ "decoder.revealer_decoder.layers.2.norm3.weight",
79
+ "decoder.revealer_decoder.layers.2.norm3.bias",
80
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
81
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
82
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
83
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
84
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
85
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
86
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
87
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
88
+ "decoder.revealer_decoder.layers.3.linear1.weight",
89
+ "decoder.revealer_decoder.layers.3.linear1.bias",
90
+ "decoder.revealer_decoder.layers.3.linear2.weight",
91
+ "decoder.revealer_decoder.layers.3.linear2.bias",
92
+ "decoder.revealer_decoder.layers.3.norm1.weight",
93
+ "decoder.revealer_decoder.layers.3.norm1.bias",
94
+ "decoder.revealer_decoder.layers.3.norm2.weight",
95
+ "decoder.revealer_decoder.layers.3.norm2.bias",
96
+ "decoder.revealer_decoder.layers.3.norm3.weight",
97
+ "decoder.revealer_decoder.layers.3.norm3.bias",
98
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
99
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
100
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
101
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
102
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
103
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
104
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
105
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
106
+ "decoder.actor_decoder.layers.0.linear1.weight",
107
+ "decoder.actor_decoder.layers.0.linear1.bias",
108
+ "decoder.actor_decoder.layers.0.linear2.weight",
109
+ "decoder.actor_decoder.layers.0.linear2.bias",
110
+ "decoder.actor_decoder.layers.0.norm1.weight",
111
+ "decoder.actor_decoder.layers.0.norm1.bias",
112
+ "decoder.actor_decoder.layers.0.norm2.weight",
113
+ "decoder.actor_decoder.layers.0.norm2.bias",
114
+ "decoder.actor_decoder.layers.0.norm3.weight",
115
+ "decoder.actor_decoder.layers.0.norm3.bias",
116
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
117
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
118
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
119
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
120
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
121
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
122
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
123
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
124
+ "decoder.actor_decoder.layers.1.linear1.weight",
125
+ "decoder.actor_decoder.layers.1.linear1.bias",
126
+ "decoder.actor_decoder.layers.1.linear2.weight",
127
+ "decoder.actor_decoder.layers.1.linear2.bias",
128
+ "decoder.actor_decoder.layers.1.norm1.weight",
129
+ "decoder.actor_decoder.layers.1.norm1.bias",
130
+ "decoder.actor_decoder.layers.1.norm2.weight",
131
+ "decoder.actor_decoder.layers.1.norm2.bias",
132
+ "decoder.actor_decoder.layers.1.norm3.weight",
133
+ "decoder.actor_decoder.layers.1.norm3.bias",
134
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
135
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
136
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
137
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
138
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
139
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
140
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
141
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
142
+ "decoder.actor_decoder.layers.2.linear1.weight",
143
+ "decoder.actor_decoder.layers.2.linear1.bias",
144
+ "decoder.actor_decoder.layers.2.linear2.weight",
145
+ "decoder.actor_decoder.layers.2.linear2.bias",
146
+ "decoder.actor_decoder.layers.2.norm1.weight",
147
+ "decoder.actor_decoder.layers.2.norm1.bias",
148
+ "decoder.actor_decoder.layers.2.norm2.weight",
149
+ "decoder.actor_decoder.layers.2.norm2.bias",
150
+ "decoder.actor_decoder.layers.2.norm3.weight",
151
+ "decoder.actor_decoder.layers.2.norm3.bias",
152
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
153
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
154
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
155
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
156
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
157
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
158
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
159
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
160
+ "decoder.actor_decoder.layers.3.linear1.weight",
161
+ "decoder.actor_decoder.layers.3.linear1.bias",
162
+ "decoder.actor_decoder.layers.3.linear2.weight",
163
+ "decoder.actor_decoder.layers.3.linear2.bias",
164
+ "decoder.actor_decoder.layers.3.norm1.weight",
165
+ "decoder.actor_decoder.layers.3.norm1.bias",
166
+ "decoder.actor_decoder.layers.3.norm2.weight",
167
+ "decoder.actor_decoder.layers.3.norm2.bias",
168
+ "decoder.actor_decoder.layers.3.norm3.weight",
169
+ "decoder.actor_decoder.layers.3.norm3.bias",
170
+ "decoder.revealer_mean.weight",
171
+ "decoder.revealer_mean.bias",
172
+ "decoder.revealer_log_std.weight",
173
+ "decoder.revealer_log_std.bias",
174
+ "decoder.actor_mean.weight",
175
+ "decoder.actor_mean.bias",
176
+ "decoder.actor_log_std.weight",
177
+ "decoder.actor_log_std.bias",
178
+ "decoder.proposal_score.0.weight",
179
+ "decoder.proposal_score.0.bias",
180
+ "decoder.proposal_score.1.weight",
181
+ "decoder.proposal_score.1.bias"
182
+ ],
183
+ "missing_keys": [
184
+ "backbone.depth_adapter.depth_proj.0.weight",
185
+ "backbone.depth_adapter.depth_proj.0.bias",
186
+ "backbone.depth_adapter.depth_proj.1.weight",
187
+ "backbone.depth_adapter.depth_proj.1.bias",
188
+ "backbone.depth_adapter.depth_proj.3.weight",
189
+ "backbone.depth_adapter.depth_proj.3.bias",
190
+ "backbone.depth_adapter.geometry_proj.0.weight",
191
+ "backbone.depth_adapter.geometry_proj.0.bias",
192
+ "backbone.depth_adapter.geometry_proj.1.weight",
193
+ "backbone.depth_adapter.geometry_proj.1.bias",
194
+ "backbone.depth_adapter.camera_proj.0.weight",
195
+ "backbone.depth_adapter.camera_proj.0.bias",
196
+ "backbone.depth_adapter.camera_proj.1.weight",
197
+ "backbone.depth_adapter.camera_proj.1.bias",
198
+ "fusion.geometry_fusion.attn.in_proj_weight",
199
+ "fusion.geometry_fusion.attn.in_proj_bias",
200
+ "fusion.geometry_fusion.attn.out_proj.weight",
201
+ "fusion.geometry_fusion.attn.out_proj.bias",
202
+ "fusion.geometry_fusion.gate.0.weight",
203
+ "fusion.geometry_fusion.gate.0.bias",
204
+ "fusion.geometry_fusion.gate.1.weight",
205
+ "fusion.geometry_fusion.gate.1.bias",
206
+ "fusion.geometry_fusion.gate.3.weight",
207
+ "fusion.geometry_fusion.gate.3.bias",
208
+ "fusion.geometry_fusion.out.0.weight",
209
+ "fusion.geometry_fusion.out.0.bias",
210
+ "fusion.geometry_fusion.out.1.weight",
211
+ "fusion.geometry_fusion.out.1.bias",
212
+ "memory.scene_memory.position_embedding",
213
+ "memory.scene_memory.bank_queries",
214
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
215
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
216
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
217
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
218
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
219
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
220
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
221
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
222
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
223
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
224
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
225
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
226
+ "memory.scene_memory.bank_attention.in_proj_weight",
227
+ "memory.scene_memory.bank_attention.in_proj_bias",
228
+ "memory.scene_memory.bank_attention.out_proj.weight",
229
+ "memory.scene_memory.bank_attention.out_proj.bias",
230
+ "memory.scene_memory.action_proj.0.weight",
231
+ "memory.scene_memory.action_proj.0.bias",
232
+ "memory.scene_memory.action_proj.1.weight",
233
+ "memory.scene_memory.action_proj.1.bias",
234
+ "memory.scene_memory.write_gate.0.weight",
235
+ "memory.scene_memory.write_gate.0.bias",
236
+ "memory.scene_memory.write_gate.1.weight",
237
+ "memory.scene_memory.write_gate.1.bias",
238
+ "memory.scene_memory.write_gate.3.weight",
239
+ "memory.scene_memory.write_gate.3.bias",
240
+ "memory.scene_memory.token_proj.0.weight",
241
+ "memory.scene_memory.token_proj.0.bias",
242
+ "memory.scene_memory.token_proj.1.weight",
243
+ "memory.scene_memory.token_proj.1.bias",
244
+ "memory.belief_memory.position_embedding",
245
+ "memory.belief_memory.bank_queries",
246
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
247
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
248
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
249
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
250
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
251
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
252
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
253
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
254
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
255
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
256
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
257
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
258
+ "memory.belief_memory.bank_attention.in_proj_weight",
259
+ "memory.belief_memory.bank_attention.in_proj_bias",
260
+ "memory.belief_memory.bank_attention.out_proj.weight",
261
+ "memory.belief_memory.bank_attention.out_proj.bias",
262
+ "memory.belief_memory.action_proj.0.weight",
263
+ "memory.belief_memory.action_proj.0.bias",
264
+ "memory.belief_memory.action_proj.1.weight",
265
+ "memory.belief_memory.action_proj.1.bias",
266
+ "memory.belief_memory.write_gate.0.weight",
267
+ "memory.belief_memory.write_gate.0.bias",
268
+ "memory.belief_memory.write_gate.1.weight",
269
+ "memory.belief_memory.write_gate.1.bias",
270
+ "memory.belief_memory.write_gate.3.weight",
271
+ "memory.belief_memory.write_gate.3.bias",
272
+ "memory.belief_memory.token_proj.0.weight",
273
+ "memory.belief_memory.token_proj.0.bias",
274
+ "memory.belief_memory.token_proj.1.weight",
275
+ "memory.belief_memory.token_proj.1.bias",
276
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
277
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
278
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
279
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
280
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
281
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
282
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
283
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
284
+ "decoder.arm_decoder.layers.0.linear1.weight",
285
+ "decoder.arm_decoder.layers.0.linear1.bias",
286
+ "decoder.arm_decoder.layers.0.linear2.weight",
287
+ "decoder.arm_decoder.layers.0.linear2.bias",
288
+ "decoder.arm_decoder.layers.0.norm1.weight",
289
+ "decoder.arm_decoder.layers.0.norm1.bias",
290
+ "decoder.arm_decoder.layers.0.norm2.weight",
291
+ "decoder.arm_decoder.layers.0.norm2.bias",
292
+ "decoder.arm_decoder.layers.0.norm3.weight",
293
+ "decoder.arm_decoder.layers.0.norm3.bias",
294
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
295
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
296
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
297
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
298
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
299
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
300
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
301
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
302
+ "decoder.arm_decoder.layers.1.linear1.weight",
303
+ "decoder.arm_decoder.layers.1.linear1.bias",
304
+ "decoder.arm_decoder.layers.1.linear2.weight",
305
+ "decoder.arm_decoder.layers.1.linear2.bias",
306
+ "decoder.arm_decoder.layers.1.norm1.weight",
307
+ "decoder.arm_decoder.layers.1.norm1.bias",
308
+ "decoder.arm_decoder.layers.1.norm2.weight",
309
+ "decoder.arm_decoder.layers.1.norm2.bias",
310
+ "decoder.arm_decoder.layers.1.norm3.weight",
311
+ "decoder.arm_decoder.layers.1.norm3.bias",
312
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
313
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
314
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
315
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
316
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
317
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
318
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
319
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
320
+ "decoder.arm_decoder.layers.2.linear1.weight",
321
+ "decoder.arm_decoder.layers.2.linear1.bias",
322
+ "decoder.arm_decoder.layers.2.linear2.weight",
323
+ "decoder.arm_decoder.layers.2.linear2.bias",
324
+ "decoder.arm_decoder.layers.2.norm1.weight",
325
+ "decoder.arm_decoder.layers.2.norm1.bias",
326
+ "decoder.arm_decoder.layers.2.norm2.weight",
327
+ "decoder.arm_decoder.layers.2.norm2.bias",
328
+ "decoder.arm_decoder.layers.2.norm3.weight",
329
+ "decoder.arm_decoder.layers.2.norm3.bias",
330
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
331
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
332
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
333
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
334
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
335
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
336
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
337
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
338
+ "decoder.arm_decoder.layers.3.linear1.weight",
339
+ "decoder.arm_decoder.layers.3.linear1.bias",
340
+ "decoder.arm_decoder.layers.3.linear2.weight",
341
+ "decoder.arm_decoder.layers.3.linear2.bias",
342
+ "decoder.arm_decoder.layers.3.norm1.weight",
343
+ "decoder.arm_decoder.layers.3.norm1.bias",
344
+ "decoder.arm_decoder.layers.3.norm2.weight",
345
+ "decoder.arm_decoder.layers.3.norm2.bias",
346
+ "decoder.arm_decoder.layers.3.norm3.weight",
347
+ "decoder.arm_decoder.layers.3.norm3.bias",
348
+ "decoder.arm_identity.weight",
349
+ "decoder.phase_adapter.weight",
350
+ "decoder.phase_adapter.bias",
351
+ "decoder.role_adapter.weight",
352
+ "decoder.role_adapter.bias",
353
+ "decoder.context_proj.0.weight",
354
+ "decoder.context_proj.0.bias",
355
+ "decoder.context_proj.1.weight",
356
+ "decoder.context_proj.1.bias",
357
+ "decoder.arm_head.0.weight",
358
+ "decoder.arm_head.0.bias",
359
+ "decoder.arm_head.1.weight",
360
+ "decoder.arm_head.1.bias",
361
+ "decoder.arm_mean.weight",
362
+ "decoder.arm_mean.bias",
363
+ "decoder.arm_log_std.weight",
364
+ "decoder.arm_log_std.bias",
365
+ "decoder.proposal_mode_head.0.weight",
366
+ "decoder.proposal_mode_head.0.bias",
367
+ "decoder.proposal_mode_head.1.weight",
368
+ "decoder.proposal_mode_head.1.bias",
369
+ "decoder.proposal_mode_head.3.weight",
370
+ "decoder.proposal_mode_head.3.bias",
371
+ "decoder.proposal_mode_embeddings.weight",
372
+ "decoder.proposal_slot_embeddings.weight",
373
+ "decoder.mode_residual_heads.0.0.weight",
374
+ "decoder.mode_residual_heads.0.0.bias",
375
+ "decoder.mode_residual_heads.0.1.weight",
376
+ "decoder.mode_residual_heads.0.1.bias",
377
+ "decoder.mode_residual_heads.0.3.weight",
378
+ "decoder.mode_residual_heads.0.3.bias",
379
+ "decoder.mode_residual_heads.1.0.weight",
380
+ "decoder.mode_residual_heads.1.0.bias",
381
+ "decoder.mode_residual_heads.1.1.weight",
382
+ "decoder.mode_residual_heads.1.1.bias",
383
+ "decoder.mode_residual_heads.1.3.weight",
384
+ "decoder.mode_residual_heads.1.3.bias",
385
+ "decoder.mode_residual_heads.2.0.weight",
386
+ "decoder.mode_residual_heads.2.0.bias",
387
+ "decoder.mode_residual_heads.2.1.weight",
388
+ "decoder.mode_residual_heads.2.1.bias",
389
+ "decoder.mode_residual_heads.2.3.weight",
390
+ "decoder.mode_residual_heads.2.3.bias",
391
+ "decoder.mode_residual_heads.3.0.weight",
392
+ "decoder.mode_residual_heads.3.0.bias",
393
+ "decoder.mode_residual_heads.3.1.weight",
394
+ "decoder.mode_residual_heads.3.1.bias",
395
+ "decoder.mode_residual_heads.3.3.weight",
396
+ "decoder.mode_residual_heads.3.3.bias",
397
+ "decoder.mode_residual_heads.4.0.weight",
398
+ "decoder.mode_residual_heads.4.0.bias",
399
+ "decoder.mode_residual_heads.4.1.weight",
400
+ "decoder.mode_residual_heads.4.1.bias",
401
+ "decoder.mode_residual_heads.4.3.weight",
402
+ "decoder.mode_residual_heads.4.3.bias",
403
+ "decoder.mode_residual_heads.5.0.weight",
404
+ "decoder.mode_residual_heads.5.0.bias",
405
+ "decoder.mode_residual_heads.5.1.weight",
406
+ "decoder.mode_residual_heads.5.1.bias",
407
+ "decoder.mode_residual_heads.5.3.weight",
408
+ "decoder.mode_residual_heads.5.3.bias",
409
+ "decoder.slot_delta.0.weight",
410
+ "decoder.slot_delta.0.bias",
411
+ "decoder.slot_delta.1.weight",
412
+ "decoder.slot_delta.1.bias",
413
+ "decoder.slot_delta.3.weight",
414
+ "decoder.slot_delta.3.bias",
415
+ "decoder.proposal_score.0.weight",
416
+ "decoder.proposal_score.0.bias",
417
+ "decoder.proposal_score.1.weight",
418
+ "decoder.proposal_score.1.bias",
419
+ "decoder.proposal_score.3.weight",
420
+ "decoder.proposal_score.3.bias",
421
+ "elastic_state_head.interaction_queries",
422
+ "elastic_state_head.interaction_attention.in_proj_weight",
423
+ "elastic_state_head.interaction_attention.in_proj_bias",
424
+ "elastic_state_head.interaction_attention.out_proj.weight",
425
+ "elastic_state_head.interaction_attention.out_proj.bias",
426
+ "elastic_state_head.interaction_mlp.0.weight",
427
+ "elastic_state_head.interaction_mlp.0.bias",
428
+ "elastic_state_head.interaction_mlp.1.weight",
429
+ "elastic_state_head.interaction_mlp.1.bias",
430
+ "elastic_state_head.interaction_mlp.3.weight",
431
+ "elastic_state_head.interaction_mlp.3.bias",
432
+ "elastic_state_head.decoder.field_queries",
433
+ "elastic_state_head.decoder.field_attention.in_proj_weight",
434
+ "elastic_state_head.decoder.field_attention.in_proj_bias",
435
+ "elastic_state_head.decoder.field_attention.out_proj.weight",
436
+ "elastic_state_head.decoder.field_attention.out_proj.bias",
437
+ "elastic_state_head.decoder.field_mlp.0.weight",
438
+ "elastic_state_head.decoder.field_mlp.0.bias",
439
+ "elastic_state_head.decoder.field_mlp.1.weight",
440
+ "elastic_state_head.decoder.field_mlp.1.bias",
441
+ "elastic_state_head.decoder.field_mlp.3.weight",
442
+ "elastic_state_head.decoder.field_mlp.3.bias",
443
+ "elastic_state_head.decoder.summary_proj.0.weight",
444
+ "elastic_state_head.decoder.summary_proj.0.bias",
445
+ "elastic_state_head.decoder.summary_proj.1.weight",
446
+ "elastic_state_head.decoder.summary_proj.1.bias",
447
+ "elastic_state_head.decoder.phase_head.0.weight",
448
+ "elastic_state_head.decoder.phase_head.0.bias",
449
+ "elastic_state_head.decoder.phase_head.1.weight",
450
+ "elastic_state_head.decoder.phase_head.1.bias",
451
+ "elastic_state_head.decoder.phase_head.3.weight",
452
+ "elastic_state_head.decoder.phase_head.3.bias",
453
+ "elastic_state_head.decoder.arm_role_head.0.weight",
454
+ "elastic_state_head.decoder.arm_role_head.0.bias",
455
+ "elastic_state_head.decoder.arm_role_head.1.weight",
456
+ "elastic_state_head.decoder.arm_role_head.1.bias",
457
+ "elastic_state_head.decoder.arm_role_head.3.weight",
458
+ "elastic_state_head.decoder.arm_role_head.3.bias",
459
+ "elastic_state_head.decoder.arm_identity.weight",
460
+ "elastic_state_head.decoder.support_mode.0.weight",
461
+ "elastic_state_head.decoder.support_mode.0.bias",
462
+ "elastic_state_head.decoder.support_mode.1.weight",
463
+ "elastic_state_head.decoder.support_mode.1.bias",
464
+ "elastic_state_head.decoder.support_mode.3.weight",
465
+ "elastic_state_head.decoder.support_mode.3.bias",
466
+ "elastic_state_head.decoder.access_field.weight",
467
+ "elastic_state_head.decoder.access_field.bias",
468
+ "elastic_state_head.decoder.target_belief_field.weight",
469
+ "elastic_state_head.decoder.target_belief_field.bias",
470
+ "elastic_state_head.decoder.visibility_field.weight",
471
+ "elastic_state_head.decoder.visibility_field.bias",
472
+ "elastic_state_head.decoder.clearance_field.weight",
473
+ "elastic_state_head.decoder.clearance_field.bias",
474
+ "elastic_state_head.decoder.occluder_contact_field.weight",
475
+ "elastic_state_head.decoder.occluder_contact_field.bias",
476
+ "elastic_state_head.decoder.grasp_affordance_field.weight",
477
+ "elastic_state_head.decoder.grasp_affordance_field.bias",
478
+ "elastic_state_head.decoder.support_stability_field.weight",
479
+ "elastic_state_head.decoder.support_stability_field.bias",
480
+ "elastic_state_head.decoder.persistence_field.weight",
481
+ "elastic_state_head.decoder.persistence_field.bias",
482
+ "elastic_state_head.decoder.reocclusion_field.weight",
483
+ "elastic_state_head.decoder.reocclusion_field.bias",
484
+ "elastic_state_head.decoder.disturbance_field.weight",
485
+ "elastic_state_head.decoder.disturbance_field.bias",
486
+ "elastic_state_head.decoder.uncertainty_field.weight",
487
+ "elastic_state_head.decoder.uncertainty_field.bias",
488
+ "elastic_state_head.decoder.reocclusion_head.0.weight",
489
+ "elastic_state_head.decoder.reocclusion_head.0.bias",
490
+ "elastic_state_head.decoder.reocclusion_head.1.weight",
491
+ "elastic_state_head.decoder.reocclusion_head.1.bias",
492
+ "elastic_state_head.decoder.reocclusion_head.3.weight",
493
+ "elastic_state_head.decoder.reocclusion_head.3.bias",
494
+ "world_model.state_encoder.0.weight",
495
+ "world_model.state_encoder.0.bias",
496
+ "world_model.state_encoder.1.weight",
497
+ "world_model.state_encoder.1.bias",
498
+ "world_model.scene_memory_proj.0.weight",
499
+ "world_model.scene_memory_proj.0.bias",
500
+ "world_model.scene_memory_proj.1.weight",
501
+ "world_model.scene_memory_proj.1.bias",
502
+ "world_model.belief_memory_proj.0.weight",
503
+ "world_model.belief_memory_proj.0.bias",
504
+ "world_model.belief_memory_proj.1.weight",
505
+ "world_model.belief_memory_proj.1.bias",
506
+ "world_model.action_encoder.0.weight",
507
+ "world_model.action_encoder.0.bias",
508
+ "world_model.action_encoder.1.weight",
509
+ "world_model.action_encoder.1.bias",
510
+ "world_model.transition.weight_ih",
511
+ "world_model.transition.weight_hh",
512
+ "world_model.transition.bias_ih",
513
+ "world_model.transition.bias_hh",
514
+ "world_model.scene_memory_update.weight",
515
+ "world_model.scene_memory_update.bias",
516
+ "world_model.belief_memory_update.weight",
517
+ "world_model.belief_memory_update.bias",
518
+ "world_model.compact_decoder.weight",
519
+ "world_model.compact_decoder.bias",
520
+ "world_model.target_belief_head.weight",
521
+ "world_model.target_belief_head.bias",
522
+ "world_model.visibility_head.weight",
523
+ "world_model.visibility_head.bias",
524
+ "world_model.clearance_head.weight",
525
+ "world_model.clearance_head.bias",
526
+ "world_model.occluder_contact_head.weight",
527
+ "world_model.occluder_contact_head.bias",
528
+ "world_model.grasp_affordance_head.weight",
529
+ "world_model.grasp_affordance_head.bias",
530
+ "world_model.support_stability_head.weight",
531
+ "world_model.support_stability_head.bias",
532
+ "world_model.persistence_head.weight",
533
+ "world_model.persistence_head.bias",
534
+ "world_model.reocclusion_head.weight",
535
+ "world_model.reocclusion_head.bias",
536
+ "world_model.disturbance_head.weight",
537
+ "world_model.disturbance_head.bias",
538
+ "world_model.uncertainty_head.weight",
539
+ "world_model.uncertainty_head.bias",
540
+ "world_model.access_head.weight",
541
+ "world_model.access_head.bias",
542
+ "planner.residual.trunk.0.weight",
543
+ "planner.residual.trunk.0.bias",
544
+ "planner.residual.trunk.1.weight",
545
+ "planner.residual.trunk.1.bias",
546
+ "planner.residual.trunk.3.weight",
547
+ "planner.residual.trunk.3.bias",
548
+ "planner.residual.success_head.weight",
549
+ "planner.residual.success_head.bias",
550
+ "planner.residual.risk_head.weight",
551
+ "planner.residual.risk_head.bias",
552
+ "planner.residual.residual_head.weight",
553
+ "planner.residual.residual_head.bias"
554
+ ],
555
+ "unexpected_keys": []
556
+ }
557
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5555555555555555,
9
+ "visibility_integral": 31.92372977733612,
10
+ "corridor_availability": 0.8500884034567409,
11
+ "reocclusion_rate": 0.029287114566719827,
12
+ "persistence_horizon_mae": 0.894922278028389,
13
+ "disturbance_cost": 0.28616168903600836
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt
5
+ - mean_success: 0.556
6
+ - visibility_integral: 31.924
7
+ - corridor_availability: 0.850
8
+ - reocclusion_rate: 0.029
9
+ - persistence_horizon_mae: 0.895
10
+ - disturbance_cost: 0.286
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/config_resolved.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_clip_seed8
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 8
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
14
+ train_episodes_per_proxy: 48
15
+ val_episodes_per_proxy: 16
16
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed8.pt
17
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed8.pt
18
+ rebuild_dataset: false
19
+ chunk_horizon: 8
20
+ rollout_horizon: 5
21
+ history_steps: 6
22
+ planner_candidates: 8
23
+ seed: 8
24
+ optim:
25
+ epochs: 4
26
+ batch_size: 2
27
+ num_workers: 4
28
+ lr: 0.0003
29
+ weight_decay: 0.0001
30
+ trainer:
31
+ policy_type: elastic_reveal
32
+ use_bf16: true
33
+ grad_clip_norm: 1.0
34
+ freeze_backbone: true
35
+ gradient_checkpointing: false
36
+ plan_during_train: true
37
+ plan_during_eval: true
38
+ support_mode_conditioning: true
39
+ planner_mode: trainable
40
+ use_depth: false
41
+ use_world_model: true
42
+ use_role_tokens: true
43
+ compute_equivariance_probe: true
44
+ policy:
45
+ backbone:
46
+ model_name: openai/clip-vit-base-patch32
47
+ hidden_dim: 512
48
+ max_text_tokens: 32
49
+ freeze_backbone: true
50
+ gradient_checkpointing: false
51
+ use_dummy_backbone: false
52
+ fusion:
53
+ hidden_dim: 512
54
+ num_cameras: 3
55
+ num_layers: 4
56
+ num_heads: 8
57
+ ff_dim: 2048
58
+ dropout: 0.1
59
+ proprio_dim: 32
60
+ proprio_tokens: 1
61
+ memory:
62
+ hidden_dim: 512
63
+ action_dim: 14
64
+ history_steps: 6
65
+ scene_history_steps: 3
66
+ belief_history_steps: 8
67
+ num_layers: 2
68
+ dropout: 0.1
69
+ memory_bank_size: 4
70
+ scene_bank_size: 2
71
+ belief_bank_size: 2
72
+ num_heads: 8
73
+ max_history_steps: 8
74
+ decoder:
75
+ hidden_dim: 512
76
+ num_heads: 8
77
+ num_layers: 4
78
+ ff_dim: 2048
79
+ dropout: 0.1
80
+ chunk_size: 8
81
+ action_dim: 14
82
+ arm_action_dim: 7
83
+ num_candidates: 8
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_proposal_modes: 6
87
+ planner_top_k: 4
88
+ reveal_head:
89
+ hidden_dim: 512
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ belief_map_size: 32
94
+ field_size: 16
95
+ num_heads: 8
96
+ predict_belief_map: true
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ world_model:
101
+ hidden_dim: 512
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ num_approach_templates: 32
105
+ rollout_horizon: 5
106
+ field_size: 16
107
+ num_heads: 8
108
+ num_phases: 5
109
+ num_arm_roles: 4
110
+ num_interaction_tokens: 8
111
+ belief_map_size: 32
112
+ predict_belief_map: true
113
+ scene_bank_size: 2
114
+ belief_bank_size: 2
115
+ planner:
116
+ hidden_dim: 512
117
+ num_candidates: 8
118
+ action_dim: 14
119
+ num_support_modes: 3
120
+ utility_margin: 0.1
121
+ num_heads: 8
122
+ num_layers: 2
123
+ num_phases: 5
124
+ num_arm_roles: 4
125
+ top_k: 4
126
+ loss_weights:
127
+ action: 1.0
128
+ phase: 0.1
129
+ arm_role: 0.15
130
+ support_mode: 0.1
131
+ corridor: 0.15
132
+ persistence: 0.05
133
+ disturbance: 0.05
134
+ world_model: 0.2
135
+ belief: 0.05
136
+ visibility: 0.05
137
+ clearance: 0.05
138
+ support_stability: 0.05
139
+ reocclusion: 0.05
140
+ occluder_contact: 0.05
141
+ grasp_affordance: 0.05
142
+ planner_success: 0.25
143
+ planner_risk: 0.1
144
+ planner_ranking: 0.2
145
+ proposal_reconstruction: 0.1
146
+ proposal_success: 0.15
147
+ proposal_ranking: 0.2
148
+ proposal_diversity: 0.05
149
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.25984251968503935,
3
+ "planner_regret": 0.024652592837810516,
4
+ "planner_score_utility_spearman": 0.15748029947280884,
5
+ "risk_calibration_mse": 0.010109159164130688,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.02039325051009655,
8
+ "left_right_equivariance_error": 8.317838273796951e-05,
9
+ "belief_calibration_brier": 0.0039802417159080505,
10
+ "reocclusion_calibration_brier": 0.2667863667011261,
11
+ "support_stability_mae": 0.023258011788129807,
12
+ "clearance_auc": 0.9407927438472715,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.5879086852073669,
15
+ "num_samples": 127
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.025799189747862168,
6
+ "arm_role": 0.027215735138398815,
7
+ "belief": 0.11522909954034222,
8
+ "clearance": 0.09597517975181809,
9
+ "corridor": 0.3045216482132673,
10
+ "disturbance": 0.006567074132739083,
11
+ "grasp_affordance": 0.02625927054055074,
12
+ "occluder_contact": 0.2161167692295544,
13
+ "persistence": 7.305491891831004,
14
+ "phase": 0.7473598300474477,
15
+ "planner_ranking": 0.14102927445574143,
16
+ "planner_risk": 0.014660530898254365,
17
+ "planner_success": 0.596433128830026,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.26868818193206,
20
+ "proposal_reconstruction": 0.06815405646387819,
21
+ "proposal_success": 0.6748700912710259,
22
+ "reocclusion": 0.7006335564308765,
23
+ "role_swap_consistency": 0.0005011227108655176,
24
+ "support_mode": 0.7077700629908377,
25
+ "support_stability": 0.1599257462645798,
26
+ "total": 1.733834327203441,
27
+ "uncertainty": 0.022427979406115357,
28
+ "visibility": 0.11316451830155562,
29
+ "world_model": 2.674901399312843
30
+ },
31
+ "val": {
32
+ "action": 0.02199536032276228,
33
+ "arm_role": 9.8040056428772e-06,
34
+ "belief": 0.0978035525768064,
35
+ "clearance": 0.07755720446584746,
36
+ "corridor": 0.24431297194678336,
37
+ "disturbance": 0.0019795258613157785,
38
+ "grasp_affordance": 0.008650467454572208,
39
+ "occluder_contact": 0.20205649081617594,
40
+ "persistence": 4.437129996716976,
41
+ "phase": 0.6695621414110065,
42
+ "planner_ranking": 0.04436381870164041,
43
+ "planner_risk": 0.010196975797498453,
44
+ "planner_success": 0.5646271030418575,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.1638631131500006,
47
+ "proposal_reconstruction": 0.06484090705635026,
48
+ "proposal_success": 0.6649224627763033,
49
+ "reocclusion": 0.7438069470226765,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.673728191293776,
52
+ "support_stability": 0.13629821891663596,
53
+ "total": 1.4150245506316423,
54
+ "uncertainty": 0.002036258225416532,
55
+ "visibility": 0.09110353700816631,
56
+ "world_model": 2.210838695988059
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.02220674532499769,
63
+ "arm_role": 4.0168849585568094e-05,
64
+ "belief": 0.10375202887969491,
65
+ "clearance": 0.08468958432176663,
66
+ "corridor": 0.24882320250282114,
67
+ "disturbance": 0.002981857188692701,
68
+ "grasp_affordance": 0.00994103324857994,
69
+ "occluder_contact": 0.20824503820604054,
70
+ "persistence": 4.263324179262391,
71
+ "phase": 0.7222360341336714,
72
+ "planner_ranking": 0.044953017053952174,
73
+ "planner_risk": 0.010661984013600143,
74
+ "planner_success": 0.5370719069273684,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.1506784087076236,
77
+ "proposal_reconstruction": 0.06470025059674422,
78
+ "proposal_success": 0.6748968515720667,
79
+ "reocclusion": 0.7042920837539652,
80
+ "role_swap_consistency": 0.00024932249915769023,
81
+ "support_mode": 0.6881518938154451,
82
+ "support_stability": 0.1487102357972979,
83
+ "total": 1.3995415040959862,
84
+ "uncertainty": 0.0019858729011069556,
85
+ "visibility": 0.09729615078156531,
86
+ "world_model": 2.178037493952906
87
+ },
88
+ "val": {
89
+ "action": 0.029678026388864964,
90
+ "arm_role": 0.0003116108114227245,
91
+ "belief": 0.10797233448829502,
92
+ "clearance": 0.08150003047194332,
93
+ "corridor": 0.2509052273235284,
94
+ "disturbance": 0.002103368451003007,
95
+ "grasp_affordance": 0.008963905274868011,
96
+ "occluder_contact": 0.2007133779115975,
97
+ "persistence": 4.478599248453975,
98
+ "phase": 0.7040554136037827,
99
+ "planner_ranking": 0.03813048706929578,
100
+ "planner_risk": 0.01057393318569666,
101
+ "planner_success": 0.5217722351662815,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.1685641314834356,
104
+ "proposal_reconstruction": 0.07131227233912796,
105
+ "proposal_success": 0.6757729910314083,
106
+ "reocclusion": 0.6976062525063753,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 0.7273222031071782,
109
+ "support_stability": 0.1463006478443276,
110
+ "total": 1.3876731358468533,
111
+ "uncertainty": 0.0005028243003835087,
112
+ "visibility": 0.10090084094554186,
113
+ "world_model": 2.023001086898148
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.022834130358048446,
120
+ "arm_role": 3.6339485208401505e-05,
121
+ "belief": 0.10015391417978946,
122
+ "clearance": 0.08339313631243418,
123
+ "corridor": 0.24550532728082536,
124
+ "disturbance": 0.002419849791671015,
125
+ "grasp_affordance": 0.011102509094860541,
126
+ "occluder_contact": 0.20242435567041966,
127
+ "persistence": 4.354869382134127,
128
+ "phase": 0.6933721572316754,
129
+ "planner_ranking": 0.04187904763565859,
130
+ "planner_risk": 0.010259467963658331,
131
+ "planner_success": 0.5138571092283538,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1488539314394846,
134
+ "proposal_reconstruction": 0.06509613401758733,
135
+ "proposal_success": 0.6776590312962757,
136
+ "reocclusion": 0.70495132540221,
137
+ "role_swap_consistency": 0.0003516697920602868,
138
+ "support_mode": 0.6823001881544503,
139
+ "support_stability": 0.14350243961116718,
140
+ "total": 1.378995967473035,
141
+ "uncertainty": 0.0031733291824921203,
142
+ "visibility": 0.09716511293465555,
143
+ "world_model": 2.104598100584839
144
+ },
145
+ "val": {
146
+ "action": 0.02644303720444441,
147
+ "arm_role": 4.627731826190029e-06,
148
+ "belief": 0.10258024383801967,
149
+ "clearance": 0.07597982959123328,
150
+ "corridor": 0.2423992605181411,
151
+ "disturbance": 0.0015974244740846189,
152
+ "grasp_affordance": 0.007909159859991632,
153
+ "occluder_contact": 0.19435308501124382,
154
+ "persistence": 3.919285401701927,
155
+ "phase": 0.6770087121985853,
156
+ "planner_ranking": 0.030531517459166935,
157
+ "planner_risk": 0.010262692154356046,
158
+ "planner_success": 0.5169326290488243,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.138186807744205,
161
+ "proposal_reconstruction": 0.06911751109873876,
162
+ "proposal_success": 0.6695848302915692,
163
+ "reocclusion": 0.6975388880819082,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 0.6884247697889805,
166
+ "support_stability": 0.13594868587097153,
167
+ "total": 1.3366163168102503,
168
+ "uncertainty": 0.0006479808544099797,
169
+ "visibility": 0.09649082575924695,
170
+ "world_model": 2.0216304706409574
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.021160060905544235,
177
+ "arm_role": 5.587545364939105e-05,
178
+ "belief": 0.10077974488909956,
179
+ "clearance": 0.08377115065670762,
180
+ "corridor": 0.2723994788211522,
181
+ "disturbance": 0.0028603613238174243,
182
+ "grasp_affordance": 0.011514163958835196,
183
+ "occluder_contact": 0.20602131318983607,
184
+ "persistence": 3.0813600014851317,
185
+ "phase": 0.6817607779777487,
186
+ "planner_ranking": 0.031658034657560674,
187
+ "planner_risk": 0.010394540625284256,
188
+ "planner_success": 0.5069346120532271,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.132226309851202,
191
+ "proposal_reconstruction": 0.06328810811900967,
192
+ "proposal_success": 0.6744790461050902,
193
+ "reocclusion": 0.6852282721022661,
194
+ "role_swap_consistency": 0.0005754872515272832,
195
+ "support_mode": 0.6633978239528796,
196
+ "support_stability": 0.14488365837977468,
197
+ "total": 1.293662095569191,
198
+ "uncertainty": 0.0023333917296635863,
199
+ "visibility": 0.09853576490392235,
200
+ "world_model": 2.0413369105748482
201
+ },
202
+ "val": {
203
+ "action": 0.017367416352499276,
204
+ "arm_role": 7.692722565622034e-07,
205
+ "belief": 0.1027774921967648,
206
+ "clearance": 0.08752925635781139,
207
+ "corridor": 0.26156787533545867,
208
+ "disturbance": 0.0016430629628985116,
209
+ "grasp_affordance": 0.010058694657345768,
210
+ "occluder_contact": 0.21157401148229837,
211
+ "persistence": 1.0993698399979621,
212
+ "phase": 0.6142133427783847,
213
+ "planner_ranking": 0.03328441088268619,
214
+ "planner_risk": 0.010188427979301196,
215
+ "planner_success": 0.4918641885742545,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1239634547382593,
218
+ "proposal_reconstruction": 0.06056849448941648,
219
+ "proposal_success": 0.6778606250882149,
220
+ "reocclusion": 0.5640022717416286,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 0.5024671151768416,
223
+ "support_stability": 0.13648800805094652,
224
+ "total": 1.1350205279886723,
225
+ "uncertainty": 0.0008341338888158134,
226
+ "visibility": 0.0982570193009451,
227
+ "world_model": 1.93993010930717
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/summary.json ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_clip_seed8",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed8/checkpoint_best.pt",
5
+ "final_train_total": 1.293662095569191,
6
+ "final_val_total": 1.1350205279886723,
7
+ "train_time_sec": 146.87081933021545,
8
+ "peak_gpu_memory_mb": 1891.1337890625,
9
+ "num_train_samples": 381,
10
+ "num_val_samples": 127,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": {
14
+ "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
15
+ "loaded_keys": 461,
16
+ "skipped_shape_mismatch_keys": [
17
+ "memory.gru.weight_ih_l0",
18
+ "memory.gru.weight_hh_l0",
19
+ "memory.gru.bias_ih_l0",
20
+ "memory.gru.bias_hh_l0",
21
+ "memory.token_proj.0.weight",
22
+ "memory.token_proj.0.bias",
23
+ "memory.token_proj.1.weight",
24
+ "memory.token_proj.1.bias",
25
+ "decoder.actor_role_bias",
26
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
27
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
28
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
29
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
30
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
31
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
32
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
33
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
34
+ "decoder.revealer_decoder.layers.0.linear1.weight",
35
+ "decoder.revealer_decoder.layers.0.linear1.bias",
36
+ "decoder.revealer_decoder.layers.0.linear2.weight",
37
+ "decoder.revealer_decoder.layers.0.linear2.bias",
38
+ "decoder.revealer_decoder.layers.0.norm1.weight",
39
+ "decoder.revealer_decoder.layers.0.norm1.bias",
40
+ "decoder.revealer_decoder.layers.0.norm2.weight",
41
+ "decoder.revealer_decoder.layers.0.norm2.bias",
42
+ "decoder.revealer_decoder.layers.0.norm3.weight",
43
+ "decoder.revealer_decoder.layers.0.norm3.bias",
44
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
45
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
46
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
47
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
48
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
49
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
50
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
51
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
52
+ "decoder.revealer_decoder.layers.1.linear1.weight",
53
+ "decoder.revealer_decoder.layers.1.linear1.bias",
54
+ "decoder.revealer_decoder.layers.1.linear2.weight",
55
+ "decoder.revealer_decoder.layers.1.linear2.bias",
56
+ "decoder.revealer_decoder.layers.1.norm1.weight",
57
+ "decoder.revealer_decoder.layers.1.norm1.bias",
58
+ "decoder.revealer_decoder.layers.1.norm2.weight",
59
+ "decoder.revealer_decoder.layers.1.norm2.bias",
60
+ "decoder.revealer_decoder.layers.1.norm3.weight",
61
+ "decoder.revealer_decoder.layers.1.norm3.bias",
62
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
63
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
64
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
65
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
66
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
67
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
68
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
69
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
70
+ "decoder.revealer_decoder.layers.2.linear1.weight",
71
+ "decoder.revealer_decoder.layers.2.linear1.bias",
72
+ "decoder.revealer_decoder.layers.2.linear2.weight",
73
+ "decoder.revealer_decoder.layers.2.linear2.bias",
74
+ "decoder.revealer_decoder.layers.2.norm1.weight",
75
+ "decoder.revealer_decoder.layers.2.norm1.bias",
76
+ "decoder.revealer_decoder.layers.2.norm2.weight",
77
+ "decoder.revealer_decoder.layers.2.norm2.bias",
78
+ "decoder.revealer_decoder.layers.2.norm3.weight",
79
+ "decoder.revealer_decoder.layers.2.norm3.bias",
80
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
81
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
82
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
83
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
84
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
85
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
86
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
87
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
88
+ "decoder.revealer_decoder.layers.3.linear1.weight",
89
+ "decoder.revealer_decoder.layers.3.linear1.bias",
90
+ "decoder.revealer_decoder.layers.3.linear2.weight",
91
+ "decoder.revealer_decoder.layers.3.linear2.bias",
92
+ "decoder.revealer_decoder.layers.3.norm1.weight",
93
+ "decoder.revealer_decoder.layers.3.norm1.bias",
94
+ "decoder.revealer_decoder.layers.3.norm2.weight",
95
+ "decoder.revealer_decoder.layers.3.norm2.bias",
96
+ "decoder.revealer_decoder.layers.3.norm3.weight",
97
+ "decoder.revealer_decoder.layers.3.norm3.bias",
98
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
99
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
100
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
101
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
102
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
103
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
104
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
105
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
106
+ "decoder.actor_decoder.layers.0.linear1.weight",
107
+ "decoder.actor_decoder.layers.0.linear1.bias",
108
+ "decoder.actor_decoder.layers.0.linear2.weight",
109
+ "decoder.actor_decoder.layers.0.linear2.bias",
110
+ "decoder.actor_decoder.layers.0.norm1.weight",
111
+ "decoder.actor_decoder.layers.0.norm1.bias",
112
+ "decoder.actor_decoder.layers.0.norm2.weight",
113
+ "decoder.actor_decoder.layers.0.norm2.bias",
114
+ "decoder.actor_decoder.layers.0.norm3.weight",
115
+ "decoder.actor_decoder.layers.0.norm3.bias",
116
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
117
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
118
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
119
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
120
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
121
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
122
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
123
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
124
+ "decoder.actor_decoder.layers.1.linear1.weight",
125
+ "decoder.actor_decoder.layers.1.linear1.bias",
126
+ "decoder.actor_decoder.layers.1.linear2.weight",
127
+ "decoder.actor_decoder.layers.1.linear2.bias",
128
+ "decoder.actor_decoder.layers.1.norm1.weight",
129
+ "decoder.actor_decoder.layers.1.norm1.bias",
130
+ "decoder.actor_decoder.layers.1.norm2.weight",
131
+ "decoder.actor_decoder.layers.1.norm2.bias",
132
+ "decoder.actor_decoder.layers.1.norm3.weight",
133
+ "decoder.actor_decoder.layers.1.norm3.bias",
134
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
135
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
136
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
137
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
138
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
139
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
140
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
141
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
142
+ "decoder.actor_decoder.layers.2.linear1.weight",
143
+ "decoder.actor_decoder.layers.2.linear1.bias",
144
+ "decoder.actor_decoder.layers.2.linear2.weight",
145
+ "decoder.actor_decoder.layers.2.linear2.bias",
146
+ "decoder.actor_decoder.layers.2.norm1.weight",
147
+ "decoder.actor_decoder.layers.2.norm1.bias",
148
+ "decoder.actor_decoder.layers.2.norm2.weight",
149
+ "decoder.actor_decoder.layers.2.norm2.bias",
150
+ "decoder.actor_decoder.layers.2.norm3.weight",
151
+ "decoder.actor_decoder.layers.2.norm3.bias",
152
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
153
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
154
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
155
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
156
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
157
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
158
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
159
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
160
+ "decoder.actor_decoder.layers.3.linear1.weight",
161
+ "decoder.actor_decoder.layers.3.linear1.bias",
162
+ "decoder.actor_decoder.layers.3.linear2.weight",
163
+ "decoder.actor_decoder.layers.3.linear2.bias",
164
+ "decoder.actor_decoder.layers.3.norm1.weight",
165
+ "decoder.actor_decoder.layers.3.norm1.bias",
166
+ "decoder.actor_decoder.layers.3.norm2.weight",
167
+ "decoder.actor_decoder.layers.3.norm2.bias",
168
+ "decoder.actor_decoder.layers.3.norm3.weight",
169
+ "decoder.actor_decoder.layers.3.norm3.bias",
170
+ "decoder.revealer_mean.weight",
171
+ "decoder.revealer_mean.bias",
172
+ "decoder.revealer_log_std.weight",
173
+ "decoder.revealer_log_std.bias",
174
+ "decoder.actor_mean.weight",
175
+ "decoder.actor_mean.bias",
176
+ "decoder.actor_log_std.weight",
177
+ "decoder.actor_log_std.bias",
178
+ "decoder.proposal_score.0.weight",
179
+ "decoder.proposal_score.0.bias",
180
+ "decoder.proposal_score.1.weight",
181
+ "decoder.proposal_score.1.bias"
182
+ ],
183
+ "missing_keys": [
184
+ "backbone.depth_adapter.depth_proj.0.weight",
185
+ "backbone.depth_adapter.depth_proj.0.bias",
186
+ "backbone.depth_adapter.depth_proj.1.weight",
187
+ "backbone.depth_adapter.depth_proj.1.bias",
188
+ "backbone.depth_adapter.depth_proj.3.weight",
189
+ "backbone.depth_adapter.depth_proj.3.bias",
190
+ "backbone.depth_adapter.geometry_proj.0.weight",
191
+ "backbone.depth_adapter.geometry_proj.0.bias",
192
+ "backbone.depth_adapter.geometry_proj.1.weight",
193
+ "backbone.depth_adapter.geometry_proj.1.bias",
194
+ "backbone.depth_adapter.camera_proj.0.weight",
195
+ "backbone.depth_adapter.camera_proj.0.bias",
196
+ "backbone.depth_adapter.camera_proj.1.weight",
197
+ "backbone.depth_adapter.camera_proj.1.bias",
198
+ "fusion.geometry_fusion.attn.in_proj_weight",
199
+ "fusion.geometry_fusion.attn.in_proj_bias",
200
+ "fusion.geometry_fusion.attn.out_proj.weight",
201
+ "fusion.geometry_fusion.attn.out_proj.bias",
202
+ "fusion.geometry_fusion.gate.0.weight",
203
+ "fusion.geometry_fusion.gate.0.bias",
204
+ "fusion.geometry_fusion.gate.1.weight",
205
+ "fusion.geometry_fusion.gate.1.bias",
206
+ "fusion.geometry_fusion.gate.3.weight",
207
+ "fusion.geometry_fusion.gate.3.bias",
208
+ "fusion.geometry_fusion.out.0.weight",
209
+ "fusion.geometry_fusion.out.0.bias",
210
+ "fusion.geometry_fusion.out.1.weight",
211
+ "fusion.geometry_fusion.out.1.bias",
212
+ "memory.scene_memory.position_embedding",
213
+ "memory.scene_memory.bank_queries",
214
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
215
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
216
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
217
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
218
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
219
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
220
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
221
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
222
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
223
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
224
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
225
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
226
+ "memory.scene_memory.bank_attention.in_proj_weight",
227
+ "memory.scene_memory.bank_attention.in_proj_bias",
228
+ "memory.scene_memory.bank_attention.out_proj.weight",
229
+ "memory.scene_memory.bank_attention.out_proj.bias",
230
+ "memory.scene_memory.action_proj.0.weight",
231
+ "memory.scene_memory.action_proj.0.bias",
232
+ "memory.scene_memory.action_proj.1.weight",
233
+ "memory.scene_memory.action_proj.1.bias",
234
+ "memory.scene_memory.write_gate.0.weight",
235
+ "memory.scene_memory.write_gate.0.bias",
236
+ "memory.scene_memory.write_gate.1.weight",
237
+ "memory.scene_memory.write_gate.1.bias",
238
+ "memory.scene_memory.write_gate.3.weight",
239
+ "memory.scene_memory.write_gate.3.bias",
240
+ "memory.scene_memory.token_proj.0.weight",
241
+ "memory.scene_memory.token_proj.0.bias",
242
+ "memory.scene_memory.token_proj.1.weight",
243
+ "memory.scene_memory.token_proj.1.bias",
244
+ "memory.belief_memory.position_embedding",
245
+ "memory.belief_memory.bank_queries",
246
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
247
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
248
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
249
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
250
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
251
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
252
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
253
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
254
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
255
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
256
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
257
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
258
+ "memory.belief_memory.bank_attention.in_proj_weight",
259
+ "memory.belief_memory.bank_attention.in_proj_bias",
260
+ "memory.belief_memory.bank_attention.out_proj.weight",
261
+ "memory.belief_memory.bank_attention.out_proj.bias",
262
+ "memory.belief_memory.action_proj.0.weight",
263
+ "memory.belief_memory.action_proj.0.bias",
264
+ "memory.belief_memory.action_proj.1.weight",
265
+ "memory.belief_memory.action_proj.1.bias",
266
+ "memory.belief_memory.write_gate.0.weight",
267
+ "memory.belief_memory.write_gate.0.bias",
268
+ "memory.belief_memory.write_gate.1.weight",
269
+ "memory.belief_memory.write_gate.1.bias",
270
+ "memory.belief_memory.write_gate.3.weight",
271
+ "memory.belief_memory.write_gate.3.bias",
272
+ "memory.belief_memory.token_proj.0.weight",
273
+ "memory.belief_memory.token_proj.0.bias",
274
+ "memory.belief_memory.token_proj.1.weight",
275
+ "memory.belief_memory.token_proj.1.bias",
276
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
277
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
278
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
279
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
280
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
281
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
282
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
283
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
284
+ "decoder.arm_decoder.layers.0.linear1.weight",
285
+ "decoder.arm_decoder.layers.0.linear1.bias",
286
+ "decoder.arm_decoder.layers.0.linear2.weight",
287
+ "decoder.arm_decoder.layers.0.linear2.bias",
288
+ "decoder.arm_decoder.layers.0.norm1.weight",
289
+ "decoder.arm_decoder.layers.0.norm1.bias",
290
+ "decoder.arm_decoder.layers.0.norm2.weight",
291
+ "decoder.arm_decoder.layers.0.norm2.bias",
292
+ "decoder.arm_decoder.layers.0.norm3.weight",
293
+ "decoder.arm_decoder.layers.0.norm3.bias",
294
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
295
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
296
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
297
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
298
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
299
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
300
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
301
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
302
+ "decoder.arm_decoder.layers.1.linear1.weight",
303
+ "decoder.arm_decoder.layers.1.linear1.bias",
304
+ "decoder.arm_decoder.layers.1.linear2.weight",
305
+ "decoder.arm_decoder.layers.1.linear2.bias",
306
+ "decoder.arm_decoder.layers.1.norm1.weight",
307
+ "decoder.arm_decoder.layers.1.norm1.bias",
308
+ "decoder.arm_decoder.layers.1.norm2.weight",
309
+ "decoder.arm_decoder.layers.1.norm2.bias",
310
+ "decoder.arm_decoder.layers.1.norm3.weight",
311
+ "decoder.arm_decoder.layers.1.norm3.bias",
312
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
313
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
314
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
315
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
316
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
317
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
318
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
319
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
320
+ "decoder.arm_decoder.layers.2.linear1.weight",
321
+ "decoder.arm_decoder.layers.2.linear1.bias",
322
+ "decoder.arm_decoder.layers.2.linear2.weight",
323
+ "decoder.arm_decoder.layers.2.linear2.bias",
324
+ "decoder.arm_decoder.layers.2.norm1.weight",
325
+ "decoder.arm_decoder.layers.2.norm1.bias",
326
+ "decoder.arm_decoder.layers.2.norm2.weight",
327
+ "decoder.arm_decoder.layers.2.norm2.bias",
328
+ "decoder.arm_decoder.layers.2.norm3.weight",
329
+ "decoder.arm_decoder.layers.2.norm3.bias",
330
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
331
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
332
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
333
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
334
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
335
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
336
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
337
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
338
+ "decoder.arm_decoder.layers.3.linear1.weight",
339
+ "decoder.arm_decoder.layers.3.linear1.bias",
340
+ "decoder.arm_decoder.layers.3.linear2.weight",
341
+ "decoder.arm_decoder.layers.3.linear2.bias",
342
+ "decoder.arm_decoder.layers.3.norm1.weight",
343
+ "decoder.arm_decoder.layers.3.norm1.bias",
344
+ "decoder.arm_decoder.layers.3.norm2.weight",
345
+ "decoder.arm_decoder.layers.3.norm2.bias",
346
+ "decoder.arm_decoder.layers.3.norm3.weight",
347
+ "decoder.arm_decoder.layers.3.norm3.bias",
348
+ "decoder.arm_identity.weight",
349
+ "decoder.phase_adapter.weight",
350
+ "decoder.phase_adapter.bias",
351
+ "decoder.role_adapter.weight",
352
+ "decoder.role_adapter.bias",
353
+ "decoder.context_proj.0.weight",
354
+ "decoder.context_proj.0.bias",
355
+ "decoder.context_proj.1.weight",
356
+ "decoder.context_proj.1.bias",
357
+ "decoder.arm_head.0.weight",
358
+ "decoder.arm_head.0.bias",
359
+ "decoder.arm_head.1.weight",
360
+ "decoder.arm_head.1.bias",
361
+ "decoder.arm_mean.weight",
362
+ "decoder.arm_mean.bias",
363
+ "decoder.arm_log_std.weight",
364
+ "decoder.arm_log_std.bias",
365
+ "decoder.proposal_mode_head.0.weight",
366
+ "decoder.proposal_mode_head.0.bias",
367
+ "decoder.proposal_mode_head.1.weight",
368
+ "decoder.proposal_mode_head.1.bias",
369
+ "decoder.proposal_mode_head.3.weight",
370
+ "decoder.proposal_mode_head.3.bias",
371
+ "decoder.proposal_mode_embeddings.weight",
372
+ "decoder.proposal_slot_embeddings.weight",
373
+ "decoder.mode_residual_heads.0.0.weight",
374
+ "decoder.mode_residual_heads.0.0.bias",
375
+ "decoder.mode_residual_heads.0.1.weight",
376
+ "decoder.mode_residual_heads.0.1.bias",
377
+ "decoder.mode_residual_heads.0.3.weight",
378
+ "decoder.mode_residual_heads.0.3.bias",
379
+ "decoder.mode_residual_heads.1.0.weight",
380
+ "decoder.mode_residual_heads.1.0.bias",
381
+ "decoder.mode_residual_heads.1.1.weight",
382
+ "decoder.mode_residual_heads.1.1.bias",
383
+ "decoder.mode_residual_heads.1.3.weight",
384
+ "decoder.mode_residual_heads.1.3.bias",
385
+ "decoder.mode_residual_heads.2.0.weight",
386
+ "decoder.mode_residual_heads.2.0.bias",
387
+ "decoder.mode_residual_heads.2.1.weight",
388
+ "decoder.mode_residual_heads.2.1.bias",
389
+ "decoder.mode_residual_heads.2.3.weight",
390
+ "decoder.mode_residual_heads.2.3.bias",
391
+ "decoder.mode_residual_heads.3.0.weight",
392
+ "decoder.mode_residual_heads.3.0.bias",
393
+ "decoder.mode_residual_heads.3.1.weight",
394
+ "decoder.mode_residual_heads.3.1.bias",
395
+ "decoder.mode_residual_heads.3.3.weight",
396
+ "decoder.mode_residual_heads.3.3.bias",
397
+ "decoder.mode_residual_heads.4.0.weight",
398
+ "decoder.mode_residual_heads.4.0.bias",
399
+ "decoder.mode_residual_heads.4.1.weight",
400
+ "decoder.mode_residual_heads.4.1.bias",
401
+ "decoder.mode_residual_heads.4.3.weight",
402
+ "decoder.mode_residual_heads.4.3.bias",
403
+ "decoder.mode_residual_heads.5.0.weight",
404
+ "decoder.mode_residual_heads.5.0.bias",
405
+ "decoder.mode_residual_heads.5.1.weight",
406
+ "decoder.mode_residual_heads.5.1.bias",
407
+ "decoder.mode_residual_heads.5.3.weight",
408
+ "decoder.mode_residual_heads.5.3.bias",
409
+ "decoder.slot_delta.0.weight",
410
+ "decoder.slot_delta.0.bias",
411
+ "decoder.slot_delta.1.weight",
412
+ "decoder.slot_delta.1.bias",
413
+ "decoder.slot_delta.3.weight",
414
+ "decoder.slot_delta.3.bias",
415
+ "decoder.proposal_score.0.weight",
416
+ "decoder.proposal_score.0.bias",
417
+ "decoder.proposal_score.1.weight",
418
+ "decoder.proposal_score.1.bias",
419
+ "decoder.proposal_score.3.weight",
420
+ "decoder.proposal_score.3.bias",
421
+ "elastic_state_head.interaction_queries",
422
+ "elastic_state_head.interaction_attention.in_proj_weight",
423
+ "elastic_state_head.interaction_attention.in_proj_bias",
424
+ "elastic_state_head.interaction_attention.out_proj.weight",
425
+ "elastic_state_head.interaction_attention.out_proj.bias",
426
+ "elastic_state_head.interaction_mlp.0.weight",
427
+ "elastic_state_head.interaction_mlp.0.bias",
428
+ "elastic_state_head.interaction_mlp.1.weight",
429
+ "elastic_state_head.interaction_mlp.1.bias",
430
+ "elastic_state_head.interaction_mlp.3.weight",
431
+ "elastic_state_head.interaction_mlp.3.bias",
432
+ "elastic_state_head.decoder.field_queries",
433
+ "elastic_state_head.decoder.field_attention.in_proj_weight",
434
+ "elastic_state_head.decoder.field_attention.in_proj_bias",
435
+ "elastic_state_head.decoder.field_attention.out_proj.weight",
436
+ "elastic_state_head.decoder.field_attention.out_proj.bias",
437
+ "elastic_state_head.decoder.field_mlp.0.weight",
438
+ "elastic_state_head.decoder.field_mlp.0.bias",
439
+ "elastic_state_head.decoder.field_mlp.1.weight",
440
+ "elastic_state_head.decoder.field_mlp.1.bias",
441
+ "elastic_state_head.decoder.field_mlp.3.weight",
442
+ "elastic_state_head.decoder.field_mlp.3.bias",
443
+ "elastic_state_head.decoder.summary_proj.0.weight",
444
+ "elastic_state_head.decoder.summary_proj.0.bias",
445
+ "elastic_state_head.decoder.summary_proj.1.weight",
446
+ "elastic_state_head.decoder.summary_proj.1.bias",
447
+ "elastic_state_head.decoder.phase_head.0.weight",
448
+ "elastic_state_head.decoder.phase_head.0.bias",
449
+ "elastic_state_head.decoder.phase_head.1.weight",
450
+ "elastic_state_head.decoder.phase_head.1.bias",
451
+ "elastic_state_head.decoder.phase_head.3.weight",
452
+ "elastic_state_head.decoder.phase_head.3.bias",
453
+ "elastic_state_head.decoder.arm_role_head.0.weight",
454
+ "elastic_state_head.decoder.arm_role_head.0.bias",
455
+ "elastic_state_head.decoder.arm_role_head.1.weight",
456
+ "elastic_state_head.decoder.arm_role_head.1.bias",
457
+ "elastic_state_head.decoder.arm_role_head.3.weight",
458
+ "elastic_state_head.decoder.arm_role_head.3.bias",
459
+ "elastic_state_head.decoder.arm_identity.weight",
460
+ "elastic_state_head.decoder.support_mode.0.weight",
461
+ "elastic_state_head.decoder.support_mode.0.bias",
462
+ "elastic_state_head.decoder.support_mode.1.weight",
463
+ "elastic_state_head.decoder.support_mode.1.bias",
464
+ "elastic_state_head.decoder.support_mode.3.weight",
465
+ "elastic_state_head.decoder.support_mode.3.bias",
466
+ "elastic_state_head.decoder.access_field.weight",
467
+ "elastic_state_head.decoder.access_field.bias",
468
+ "elastic_state_head.decoder.target_belief_field.weight",
469
+ "elastic_state_head.decoder.target_belief_field.bias",
470
+ "elastic_state_head.decoder.visibility_field.weight",
471
+ "elastic_state_head.decoder.visibility_field.bias",
472
+ "elastic_state_head.decoder.clearance_field.weight",
473
+ "elastic_state_head.decoder.clearance_field.bias",
474
+ "elastic_state_head.decoder.occluder_contact_field.weight",
475
+ "elastic_state_head.decoder.occluder_contact_field.bias",
476
+ "elastic_state_head.decoder.grasp_affordance_field.weight",
477
+ "elastic_state_head.decoder.grasp_affordance_field.bias",
478
+ "elastic_state_head.decoder.support_stability_field.weight",
479
+ "elastic_state_head.decoder.support_stability_field.bias",
480
+ "elastic_state_head.decoder.persistence_field.weight",
481
+ "elastic_state_head.decoder.persistence_field.bias",
482
+ "elastic_state_head.decoder.reocclusion_field.weight",
483
+ "elastic_state_head.decoder.reocclusion_field.bias",
484
+ "elastic_state_head.decoder.disturbance_field.weight",
485
+ "elastic_state_head.decoder.disturbance_field.bias",
486
+ "elastic_state_head.decoder.uncertainty_field.weight",
487
+ "elastic_state_head.decoder.uncertainty_field.bias",
488
+ "elastic_state_head.decoder.reocclusion_head.0.weight",
489
+ "elastic_state_head.decoder.reocclusion_head.0.bias",
490
+ "elastic_state_head.decoder.reocclusion_head.1.weight",
491
+ "elastic_state_head.decoder.reocclusion_head.1.bias",
492
+ "elastic_state_head.decoder.reocclusion_head.3.weight",
493
+ "elastic_state_head.decoder.reocclusion_head.3.bias",
494
+ "world_model.state_encoder.0.weight",
495
+ "world_model.state_encoder.0.bias",
496
+ "world_model.state_encoder.1.weight",
497
+ "world_model.state_encoder.1.bias",
498
+ "world_model.scene_memory_proj.0.weight",
499
+ "world_model.scene_memory_proj.0.bias",
500
+ "world_model.scene_memory_proj.1.weight",
501
+ "world_model.scene_memory_proj.1.bias",
502
+ "world_model.belief_memory_proj.0.weight",
503
+ "world_model.belief_memory_proj.0.bias",
504
+ "world_model.belief_memory_proj.1.weight",
505
+ "world_model.belief_memory_proj.1.bias",
506
+ "world_model.action_encoder.0.weight",
507
+ "world_model.action_encoder.0.bias",
508
+ "world_model.action_encoder.1.weight",
509
+ "world_model.action_encoder.1.bias",
510
+ "world_model.transition.weight_ih",
511
+ "world_model.transition.weight_hh",
512
+ "world_model.transition.bias_ih",
513
+ "world_model.transition.bias_hh",
514
+ "world_model.scene_memory_update.weight",
515
+ "world_model.scene_memory_update.bias",
516
+ "world_model.belief_memory_update.weight",
517
+ "world_model.belief_memory_update.bias",
518
+ "world_model.compact_decoder.weight",
519
+ "world_model.compact_decoder.bias",
520
+ "world_model.target_belief_head.weight",
521
+ "world_model.target_belief_head.bias",
522
+ "world_model.visibility_head.weight",
523
+ "world_model.visibility_head.bias",
524
+ "world_model.clearance_head.weight",
525
+ "world_model.clearance_head.bias",
526
+ "world_model.occluder_contact_head.weight",
527
+ "world_model.occluder_contact_head.bias",
528
+ "world_model.grasp_affordance_head.weight",
529
+ "world_model.grasp_affordance_head.bias",
530
+ "world_model.support_stability_head.weight",
531
+ "world_model.support_stability_head.bias",
532
+ "world_model.persistence_head.weight",
533
+ "world_model.persistence_head.bias",
534
+ "world_model.reocclusion_head.weight",
535
+ "world_model.reocclusion_head.bias",
536
+ "world_model.disturbance_head.weight",
537
+ "world_model.disturbance_head.bias",
538
+ "world_model.uncertainty_head.weight",
539
+ "world_model.uncertainty_head.bias",
540
+ "world_model.access_head.weight",
541
+ "world_model.access_head.bias",
542
+ "planner.residual.trunk.0.weight",
543
+ "planner.residual.trunk.0.bias",
544
+ "planner.residual.trunk.1.weight",
545
+ "planner.residual.trunk.1.bias",
546
+ "planner.residual.trunk.3.weight",
547
+ "planner.residual.trunk.3.bias",
548
+ "planner.residual.success_head.weight",
549
+ "planner.residual.success_head.bias",
550
+ "planner.residual.risk_head.weight",
551
+ "planner.residual.risk_head.bias",
552
+ "planner.residual.residual_head.weight",
553
+ "planner.residual.residual_head.bias"
554
+ ],
555
+ "unexpected_keys": []
556
+ }
557
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.5,
5
+ "bag_proxy": 0.5416666666666666,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5694444444444443,
9
+ "visibility_integral": 32.623872251146366,
10
+ "corridor_availability": 0.889709601799647,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.1627785900766536,
13
+ "disturbance_cost": 0.2332938505957524
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt
5
+ - mean_success: 0.569
6
+ - visibility_integral: 32.624
7
+ - corridor_availability: 0.890
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.163
10
+ - disturbance_cost: 0.233
11
+ - foliage_proxy_success: 0.500
12
+ - bag_proxy_success: 0.542
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/config_resolved.yaml ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_clip_seed9
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 9
5
+ init_checkpoint: /workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
14
+ train_episodes_per_proxy: 48
15
+ val_episodes_per_proxy: 16
16
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage1_seed9.pt
17
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage1_seed9.pt
18
+ rebuild_dataset: false
19
+ chunk_horizon: 8
20
+ rollout_horizon: 5
21
+ history_steps: 6
22
+ planner_candidates: 8
23
+ seed: 9
24
+ optim:
25
+ epochs: 4
26
+ batch_size: 2
27
+ num_workers: 4
28
+ lr: 0.0003
29
+ weight_decay: 0.0001
30
+ trainer:
31
+ policy_type: elastic_reveal
32
+ use_bf16: true
33
+ grad_clip_norm: 1.0
34
+ freeze_backbone: true
35
+ gradient_checkpointing: false
36
+ plan_during_train: true
37
+ plan_during_eval: true
38
+ support_mode_conditioning: true
39
+ planner_mode: trainable
40
+ use_depth: false
41
+ use_world_model: true
42
+ use_role_tokens: true
43
+ compute_equivariance_probe: true
44
+ policy:
45
+ backbone:
46
+ model_name: openai/clip-vit-base-patch32
47
+ hidden_dim: 512
48
+ max_text_tokens: 32
49
+ freeze_backbone: true
50
+ gradient_checkpointing: false
51
+ use_dummy_backbone: false
52
+ fusion:
53
+ hidden_dim: 512
54
+ num_cameras: 3
55
+ num_layers: 4
56
+ num_heads: 8
57
+ ff_dim: 2048
58
+ dropout: 0.1
59
+ proprio_dim: 32
60
+ proprio_tokens: 1
61
+ memory:
62
+ hidden_dim: 512
63
+ action_dim: 14
64
+ history_steps: 6
65
+ scene_history_steps: 3
66
+ belief_history_steps: 8
67
+ num_layers: 2
68
+ dropout: 0.1
69
+ memory_bank_size: 4
70
+ scene_bank_size: 2
71
+ belief_bank_size: 2
72
+ num_heads: 8
73
+ max_history_steps: 8
74
+ decoder:
75
+ hidden_dim: 512
76
+ num_heads: 8
77
+ num_layers: 4
78
+ ff_dim: 2048
79
+ dropout: 0.1
80
+ chunk_size: 8
81
+ action_dim: 14
82
+ arm_action_dim: 7
83
+ num_candidates: 8
84
+ num_phases: 5
85
+ num_arm_roles: 4
86
+ num_proposal_modes: 6
87
+ planner_top_k: 4
88
+ reveal_head:
89
+ hidden_dim: 512
90
+ num_support_modes: 3
91
+ num_approach_templates: 32
92
+ rollout_horizon: 5
93
+ belief_map_size: 32
94
+ field_size: 16
95
+ num_heads: 8
96
+ predict_belief_map: true
97
+ num_phases: 5
98
+ num_arm_roles: 4
99
+ num_interaction_tokens: 8
100
+ world_model:
101
+ hidden_dim: 512
102
+ action_dim: 14
103
+ num_support_modes: 3
104
+ num_approach_templates: 32
105
+ rollout_horizon: 5
106
+ field_size: 16
107
+ num_heads: 8
108
+ num_phases: 5
109
+ num_arm_roles: 4
110
+ num_interaction_tokens: 8
111
+ belief_map_size: 32
112
+ predict_belief_map: true
113
+ scene_bank_size: 2
114
+ belief_bank_size: 2
115
+ planner:
116
+ hidden_dim: 512
117
+ num_candidates: 8
118
+ action_dim: 14
119
+ num_support_modes: 3
120
+ utility_margin: 0.1
121
+ num_heads: 8
122
+ num_layers: 2
123
+ num_phases: 5
124
+ num_arm_roles: 4
125
+ top_k: 4
126
+ loss_weights:
127
+ action: 1.0
128
+ phase: 0.1
129
+ arm_role: 0.15
130
+ support_mode: 0.1
131
+ corridor: 0.15
132
+ persistence: 0.05
133
+ disturbance: 0.05
134
+ world_model: 0.2
135
+ belief: 0.05
136
+ visibility: 0.05
137
+ clearance: 0.05
138
+ support_stability: 0.05
139
+ reocclusion: 0.05
140
+ occluder_contact: 0.05
141
+ grasp_affordance: 0.05
142
+ planner_success: 0.25
143
+ planner_risk: 0.1
144
+ planner_ranking: 0.2
145
+ proposal_reconstruction: 0.1
146
+ proposal_success: 0.15
147
+ proposal_ranking: 0.2
148
+ proposal_diversity: 0.05
149
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.2890625,
3
+ "planner_regret": 0.02300698682665825,
4
+ "planner_score_utility_spearman": 0.22968751192092896,
5
+ "risk_calibration_mse": 0.010304542258381844,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.022611485794186592,
8
+ "left_right_equivariance_error": 8.689248215887346e-05,
9
+ "belief_calibration_brier": 0.0043337177485227585,
10
+ "reocclusion_calibration_brier": 0.22800305485725403,
11
+ "support_stability_mae": 0.02859283983707428,
12
+ "clearance_auc": 0.6329041426155311,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.2469944953918457,
15
+ "num_samples": 128
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.027812569460978633,
6
+ "arm_role": 0.030137697646492406,
7
+ "belief": 0.12157152328444154,
8
+ "clearance": 0.09282162053216444,
9
+ "corridor": 0.2851656379864404,
10
+ "disturbance": 0.004553798715077344,
11
+ "grasp_affordance": 0.018851539715634365,
12
+ "occluder_contact": 0.2132460696916831,
13
+ "persistence": 5.642576662878807,
14
+ "phase": 0.7761939600894325,
15
+ "planner_ranking": 0.17902961440620282,
16
+ "planner_risk": 0.013923984336035668,
17
+ "planner_success": 0.6199151214800382,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.2823116054660395,
20
+ "proposal_reconstruction": 0.06912861580127164,
21
+ "proposal_success": 0.6811760576147782,
22
+ "reocclusion": 0.7353295496419856,
23
+ "role_swap_consistency": 0.0005873552748725113,
24
+ "support_mode": 0.7828435195119757,
25
+ "support_stability": 0.16347284512594343,
26
+ "total": 1.6866143584251403,
27
+ "uncertainty": 0.019001170223897423,
28
+ "visibility": 0.11754893544865282,
29
+ "world_model": 2.710779071795313
30
+ },
31
+ "val": {
32
+ "action": 0.02170204828144051,
33
+ "arm_role": 6.762321064002208e-06,
34
+ "belief": 0.10080993873998523,
35
+ "clearance": 0.08166962582617998,
36
+ "corridor": 0.23909102065954357,
37
+ "disturbance": 0.001983066906802833,
38
+ "grasp_affordance": 0.008535019573173486,
39
+ "occluder_contact": 0.2112727805506438,
40
+ "persistence": 3.857563339173794,
41
+ "phase": 0.6654304726980627,
42
+ "planner_ranking": 0.04032187890697969,
43
+ "planner_risk": 0.011350331830726645,
44
+ "planner_success": 0.5934910103678703,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.1493350621312857,
47
+ "proposal_reconstruction": 0.06338102876907215,
48
+ "proposal_success": 0.6806164355948567,
49
+ "reocclusion": 0.6909330077469349,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.6831411011517048,
52
+ "support_stability": 0.13910080850473605,
53
+ "total": 1.458911145105958,
54
+ "uncertainty": 0.0033405375688744243,
55
+ "visibility": 0.09547075629234314,
56
+ "world_model": 2.5560860373079777
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.023493385471795733,
63
+ "arm_role": 0.0002928718140250758,
64
+ "belief": 0.10523007610126546,
65
+ "clearance": 0.08677955961933262,
66
+ "corridor": 0.25750191186211613,
67
+ "disturbance": 0.0031594517295421777,
68
+ "grasp_affordance": 0.01005841078187682,
69
+ "occluder_contact": 0.20920588836858148,
70
+ "persistence": 4.331643560058192,
71
+ "phase": 0.7189607319078948,
72
+ "planner_ranking": 0.05423959079287933,
73
+ "planner_risk": 0.010427037446980217,
74
+ "planner_success": 0.5849820621703801,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.1505002517449228,
77
+ "proposal_reconstruction": 0.06525950771021216,
78
+ "proposal_success": 0.6752778025049913,
79
+ "reocclusion": 0.7005268357302014,
80
+ "role_swap_consistency": 0.0007142310405278726,
81
+ "support_mode": 0.70107421875,
82
+ "support_stability": 0.14081861141480898,
83
+ "total": 1.432289683818817,
84
+ "uncertainty": 0.002551493341237993,
85
+ "visibility": 0.10134971671198544,
86
+ "world_model": 2.237849539204648
87
+ },
88
+ "val": {
89
+ "action": 0.021186921891057864,
90
+ "arm_role": 3.6694105953749556e-07,
91
+ "belief": 0.09995241661090404,
92
+ "clearance": 0.08146111795213073,
93
+ "corridor": 0.24082361184991896,
94
+ "disturbance": 0.001976304362585779,
95
+ "grasp_affordance": 0.00922958003502572,
96
+ "occluder_contact": 0.21127386414445937,
97
+ "persistence": 3.7571401111781597,
98
+ "phase": 0.6817005267366767,
99
+ "planner_ranking": 0.03515352255374182,
100
+ "planner_risk": 0.01038273600534012,
101
+ "planner_success": 0.5073812543414533,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.1285581476986408,
104
+ "proposal_reconstruction": 0.0629420520272106,
105
+ "proposal_success": 0.6745674163103104,
106
+ "reocclusion": 0.6919681001454592,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 0.6647901809774339,
109
+ "support_stability": 0.14570825529517606,
110
+ "total": 1.3415670674294233,
111
+ "uncertainty": 0.0013466343752952525,
112
+ "visibility": 0.09475092665525153,
113
+ "world_model": 2.1340785464271903
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.021538028542540576,
120
+ "arm_role": 2.1901883577045642e-05,
121
+ "belief": 0.10526431232298675,
122
+ "clearance": 0.08594944182979433,
123
+ "corridor": 0.24735975777240177,
124
+ "disturbance": 0.0026733651749964336,
125
+ "grasp_affordance": 0.010091915089440974,
126
+ "occluder_contact": 0.20871730721310566,
127
+ "persistence": 4.281911664887478,
128
+ "phase": 0.6870194284539474,
129
+ "planner_ranking": 0.04152601579832519,
130
+ "planner_risk": 0.01045033406331449,
131
+ "planner_success": 0.5353652712545897,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1453557397189893,
134
+ "proposal_reconstruction": 0.06370952629337186,
135
+ "proposal_success": 0.6778088651205364,
136
+ "reocclusion": 0.6986164701612372,
137
+ "role_swap_consistency": 0.0004750598012929243,
138
+ "support_mode": 0.6878212376644737,
139
+ "support_stability": 0.1362508504700504,
140
+ "total": 1.384049719885776,
141
+ "uncertainty": 0.001396400365047157,
142
+ "visibility": 0.09892214826847377,
143
+ "world_model": 2.1307888821551675
144
+ },
145
+ "val": {
146
+ "action": 0.021681111145881005,
147
+ "arm_role": 0.0003864255304506514,
148
+ "belief": 0.10844068287406117,
149
+ "clearance": 0.08775011514080688,
150
+ "corridor": 0.23830276518128812,
151
+ "disturbance": 0.0019835491895037194,
152
+ "grasp_affordance": 0.011450761739979498,
153
+ "occluder_contact": 0.21598492935299873,
154
+ "persistence": 3.682887438684702,
155
+ "phase": 0.6754010105505586,
156
+ "planner_ranking": 0.03584061572041719,
157
+ "planner_risk": 0.010325502114255869,
158
+ "planner_success": 0.49944606237113476,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.1196386851370335,
161
+ "proposal_reconstruction": 0.0637086319620721,
162
+ "proposal_success": 0.6784614324569702,
163
+ "reocclusion": 0.6908501861616969,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 0.6635435968637466,
166
+ "support_stability": 0.14290154923219234,
167
+ "total": 1.3013203730806708,
168
+ "uncertainty": 0.002612559406315995,
169
+ "visibility": 0.10054636449785903,
170
+ "world_model": 1.9632274899631739
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.02116909674123714,
177
+ "arm_role": 0.00017300687338176526,
178
+ "belief": 0.10208533270970771,
179
+ "clearance": 0.08287150121637081,
180
+ "corridor": 0.24314571875882776,
181
+ "disturbance": 0.002553280315360577,
182
+ "grasp_affordance": 0.010202447837218642,
183
+ "occluder_contact": 0.20370756677891078,
184
+ "persistence": 3.4343402633541507,
185
+ "phase": 0.6811472039473684,
186
+ "planner_ranking": 0.03300265433170257,
187
+ "planner_risk": 0.010154466018828221,
188
+ "planner_success": 0.5132313249338615,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.1288216785380716,
191
+ "proposal_reconstruction": 0.06323393973472871,
192
+ "proposal_success": 0.6770071575516149,
193
+ "reocclusion": 0.7064933630980943,
194
+ "role_swap_consistency": 0.0003766025873023625,
195
+ "support_mode": 0.7007555509868421,
196
+ "support_stability": 0.1340178519732466,
197
+ "total": 1.314924956309168,
198
+ "uncertainty": 0.0012071453580622467,
199
+ "visibility": 0.09558045302370662,
200
+ "world_model": 2.054408212398228
201
+ },
202
+ "val": {
203
+ "action": 0.021696553943911567,
204
+ "arm_role": 6.053594985289124e-07,
205
+ "belief": 0.0983218071050942,
206
+ "clearance": 0.07689482159912586,
207
+ "corridor": 0.29242096332018264,
208
+ "disturbance": 0.0041615761442699295,
209
+ "grasp_affordance": 0.0100187708158046,
210
+ "occluder_contact": 0.19618010916747153,
211
+ "persistence": 4.662721422035247,
212
+ "phase": 0.6692422716878355,
213
+ "planner_ranking": 0.030305169929533804,
214
+ "planner_risk": 0.010842124038390466,
215
+ "planner_success": 0.5005343491211534,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1591037698090076,
218
+ "proposal_reconstruction": 0.06389545585261658,
219
+ "proposal_success": 0.6826766086742282,
220
+ "reocclusion": 0.7785650952719152,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 0.6616131067276001,
223
+ "support_stability": 0.1388778503460344,
224
+ "total": 1.3739404007792473,
225
+ "uncertainty": 2.288464340693963e-05,
226
+ "visibility": 0.09415236074710265,
227
+ "world_model": 1.9970475500449538
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/summary.json ADDED
@@ -0,0 +1,557 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_clip_seed9",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_clip_seed9/checkpoint_best.pt",
5
+ "final_train_total": 1.314924956309168,
6
+ "final_val_total": 1.3739404007792473,
7
+ "train_time_sec": 146.7574381828308,
8
+ "peak_gpu_memory_mb": 1915.8154296875,
9
+ "num_train_samples": 380,
10
+ "num_val_samples": 128,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": {
14
+ "path": "/workspace/VLAarchtests/artifacts/outputs/reveal_runs/proxy_backbone_only_clip/checkpoint_best.pt",
15
+ "loaded_keys": 461,
16
+ "skipped_shape_mismatch_keys": [
17
+ "memory.gru.weight_ih_l0",
18
+ "memory.gru.weight_hh_l0",
19
+ "memory.gru.bias_ih_l0",
20
+ "memory.gru.bias_hh_l0",
21
+ "memory.token_proj.0.weight",
22
+ "memory.token_proj.0.bias",
23
+ "memory.token_proj.1.weight",
24
+ "memory.token_proj.1.bias",
25
+ "decoder.actor_role_bias",
26
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
27
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
28
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
29
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
30
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
31
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
32
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
33
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
34
+ "decoder.revealer_decoder.layers.0.linear1.weight",
35
+ "decoder.revealer_decoder.layers.0.linear1.bias",
36
+ "decoder.revealer_decoder.layers.0.linear2.weight",
37
+ "decoder.revealer_decoder.layers.0.linear2.bias",
38
+ "decoder.revealer_decoder.layers.0.norm1.weight",
39
+ "decoder.revealer_decoder.layers.0.norm1.bias",
40
+ "decoder.revealer_decoder.layers.0.norm2.weight",
41
+ "decoder.revealer_decoder.layers.0.norm2.bias",
42
+ "decoder.revealer_decoder.layers.0.norm3.weight",
43
+ "decoder.revealer_decoder.layers.0.norm3.bias",
44
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
45
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
46
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
47
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
48
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
49
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
50
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
51
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
52
+ "decoder.revealer_decoder.layers.1.linear1.weight",
53
+ "decoder.revealer_decoder.layers.1.linear1.bias",
54
+ "decoder.revealer_decoder.layers.1.linear2.weight",
55
+ "decoder.revealer_decoder.layers.1.linear2.bias",
56
+ "decoder.revealer_decoder.layers.1.norm1.weight",
57
+ "decoder.revealer_decoder.layers.1.norm1.bias",
58
+ "decoder.revealer_decoder.layers.1.norm2.weight",
59
+ "decoder.revealer_decoder.layers.1.norm2.bias",
60
+ "decoder.revealer_decoder.layers.1.norm3.weight",
61
+ "decoder.revealer_decoder.layers.1.norm3.bias",
62
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
63
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
64
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
65
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
66
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
67
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
68
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
69
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
70
+ "decoder.revealer_decoder.layers.2.linear1.weight",
71
+ "decoder.revealer_decoder.layers.2.linear1.bias",
72
+ "decoder.revealer_decoder.layers.2.linear2.weight",
73
+ "decoder.revealer_decoder.layers.2.linear2.bias",
74
+ "decoder.revealer_decoder.layers.2.norm1.weight",
75
+ "decoder.revealer_decoder.layers.2.norm1.bias",
76
+ "decoder.revealer_decoder.layers.2.norm2.weight",
77
+ "decoder.revealer_decoder.layers.2.norm2.bias",
78
+ "decoder.revealer_decoder.layers.2.norm3.weight",
79
+ "decoder.revealer_decoder.layers.2.norm3.bias",
80
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
81
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
82
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
83
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
84
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
85
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
86
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
87
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
88
+ "decoder.revealer_decoder.layers.3.linear1.weight",
89
+ "decoder.revealer_decoder.layers.3.linear1.bias",
90
+ "decoder.revealer_decoder.layers.3.linear2.weight",
91
+ "decoder.revealer_decoder.layers.3.linear2.bias",
92
+ "decoder.revealer_decoder.layers.3.norm1.weight",
93
+ "decoder.revealer_decoder.layers.3.norm1.bias",
94
+ "decoder.revealer_decoder.layers.3.norm2.weight",
95
+ "decoder.revealer_decoder.layers.3.norm2.bias",
96
+ "decoder.revealer_decoder.layers.3.norm3.weight",
97
+ "decoder.revealer_decoder.layers.3.norm3.bias",
98
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
99
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
100
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
101
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
102
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
103
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
104
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
105
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
106
+ "decoder.actor_decoder.layers.0.linear1.weight",
107
+ "decoder.actor_decoder.layers.0.linear1.bias",
108
+ "decoder.actor_decoder.layers.0.linear2.weight",
109
+ "decoder.actor_decoder.layers.0.linear2.bias",
110
+ "decoder.actor_decoder.layers.0.norm1.weight",
111
+ "decoder.actor_decoder.layers.0.norm1.bias",
112
+ "decoder.actor_decoder.layers.0.norm2.weight",
113
+ "decoder.actor_decoder.layers.0.norm2.bias",
114
+ "decoder.actor_decoder.layers.0.norm3.weight",
115
+ "decoder.actor_decoder.layers.0.norm3.bias",
116
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
117
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
118
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
119
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
120
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
121
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
122
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
123
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
124
+ "decoder.actor_decoder.layers.1.linear1.weight",
125
+ "decoder.actor_decoder.layers.1.linear1.bias",
126
+ "decoder.actor_decoder.layers.1.linear2.weight",
127
+ "decoder.actor_decoder.layers.1.linear2.bias",
128
+ "decoder.actor_decoder.layers.1.norm1.weight",
129
+ "decoder.actor_decoder.layers.1.norm1.bias",
130
+ "decoder.actor_decoder.layers.1.norm2.weight",
131
+ "decoder.actor_decoder.layers.1.norm2.bias",
132
+ "decoder.actor_decoder.layers.1.norm3.weight",
133
+ "decoder.actor_decoder.layers.1.norm3.bias",
134
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
135
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
136
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
137
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
138
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
139
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
140
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
141
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
142
+ "decoder.actor_decoder.layers.2.linear1.weight",
143
+ "decoder.actor_decoder.layers.2.linear1.bias",
144
+ "decoder.actor_decoder.layers.2.linear2.weight",
145
+ "decoder.actor_decoder.layers.2.linear2.bias",
146
+ "decoder.actor_decoder.layers.2.norm1.weight",
147
+ "decoder.actor_decoder.layers.2.norm1.bias",
148
+ "decoder.actor_decoder.layers.2.norm2.weight",
149
+ "decoder.actor_decoder.layers.2.norm2.bias",
150
+ "decoder.actor_decoder.layers.2.norm3.weight",
151
+ "decoder.actor_decoder.layers.2.norm3.bias",
152
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
153
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
154
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
155
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
156
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
157
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
158
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
159
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
160
+ "decoder.actor_decoder.layers.3.linear1.weight",
161
+ "decoder.actor_decoder.layers.3.linear1.bias",
162
+ "decoder.actor_decoder.layers.3.linear2.weight",
163
+ "decoder.actor_decoder.layers.3.linear2.bias",
164
+ "decoder.actor_decoder.layers.3.norm1.weight",
165
+ "decoder.actor_decoder.layers.3.norm1.bias",
166
+ "decoder.actor_decoder.layers.3.norm2.weight",
167
+ "decoder.actor_decoder.layers.3.norm2.bias",
168
+ "decoder.actor_decoder.layers.3.norm3.weight",
169
+ "decoder.actor_decoder.layers.3.norm3.bias",
170
+ "decoder.revealer_mean.weight",
171
+ "decoder.revealer_mean.bias",
172
+ "decoder.revealer_log_std.weight",
173
+ "decoder.revealer_log_std.bias",
174
+ "decoder.actor_mean.weight",
175
+ "decoder.actor_mean.bias",
176
+ "decoder.actor_log_std.weight",
177
+ "decoder.actor_log_std.bias",
178
+ "decoder.proposal_score.0.weight",
179
+ "decoder.proposal_score.0.bias",
180
+ "decoder.proposal_score.1.weight",
181
+ "decoder.proposal_score.1.bias"
182
+ ],
183
+ "missing_keys": [
184
+ "backbone.depth_adapter.depth_proj.0.weight",
185
+ "backbone.depth_adapter.depth_proj.0.bias",
186
+ "backbone.depth_adapter.depth_proj.1.weight",
187
+ "backbone.depth_adapter.depth_proj.1.bias",
188
+ "backbone.depth_adapter.depth_proj.3.weight",
189
+ "backbone.depth_adapter.depth_proj.3.bias",
190
+ "backbone.depth_adapter.geometry_proj.0.weight",
191
+ "backbone.depth_adapter.geometry_proj.0.bias",
192
+ "backbone.depth_adapter.geometry_proj.1.weight",
193
+ "backbone.depth_adapter.geometry_proj.1.bias",
194
+ "backbone.depth_adapter.camera_proj.0.weight",
195
+ "backbone.depth_adapter.camera_proj.0.bias",
196
+ "backbone.depth_adapter.camera_proj.1.weight",
197
+ "backbone.depth_adapter.camera_proj.1.bias",
198
+ "fusion.geometry_fusion.attn.in_proj_weight",
199
+ "fusion.geometry_fusion.attn.in_proj_bias",
200
+ "fusion.geometry_fusion.attn.out_proj.weight",
201
+ "fusion.geometry_fusion.attn.out_proj.bias",
202
+ "fusion.geometry_fusion.gate.0.weight",
203
+ "fusion.geometry_fusion.gate.0.bias",
204
+ "fusion.geometry_fusion.gate.1.weight",
205
+ "fusion.geometry_fusion.gate.1.bias",
206
+ "fusion.geometry_fusion.gate.3.weight",
207
+ "fusion.geometry_fusion.gate.3.bias",
208
+ "fusion.geometry_fusion.out.0.weight",
209
+ "fusion.geometry_fusion.out.0.bias",
210
+ "fusion.geometry_fusion.out.1.weight",
211
+ "fusion.geometry_fusion.out.1.bias",
212
+ "memory.scene_memory.position_embedding",
213
+ "memory.scene_memory.bank_queries",
214
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
215
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
216
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
217
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
218
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
219
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
220
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
221
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
222
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
223
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
224
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
225
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
226
+ "memory.scene_memory.bank_attention.in_proj_weight",
227
+ "memory.scene_memory.bank_attention.in_proj_bias",
228
+ "memory.scene_memory.bank_attention.out_proj.weight",
229
+ "memory.scene_memory.bank_attention.out_proj.bias",
230
+ "memory.scene_memory.action_proj.0.weight",
231
+ "memory.scene_memory.action_proj.0.bias",
232
+ "memory.scene_memory.action_proj.1.weight",
233
+ "memory.scene_memory.action_proj.1.bias",
234
+ "memory.scene_memory.write_gate.0.weight",
235
+ "memory.scene_memory.write_gate.0.bias",
236
+ "memory.scene_memory.write_gate.1.weight",
237
+ "memory.scene_memory.write_gate.1.bias",
238
+ "memory.scene_memory.write_gate.3.weight",
239
+ "memory.scene_memory.write_gate.3.bias",
240
+ "memory.scene_memory.token_proj.0.weight",
241
+ "memory.scene_memory.token_proj.0.bias",
242
+ "memory.scene_memory.token_proj.1.weight",
243
+ "memory.scene_memory.token_proj.1.bias",
244
+ "memory.belief_memory.position_embedding",
245
+ "memory.belief_memory.bank_queries",
246
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
247
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
248
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
249
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
250
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
251
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
252
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
253
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
254
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
255
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
256
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
257
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
258
+ "memory.belief_memory.bank_attention.in_proj_weight",
259
+ "memory.belief_memory.bank_attention.in_proj_bias",
260
+ "memory.belief_memory.bank_attention.out_proj.weight",
261
+ "memory.belief_memory.bank_attention.out_proj.bias",
262
+ "memory.belief_memory.action_proj.0.weight",
263
+ "memory.belief_memory.action_proj.0.bias",
264
+ "memory.belief_memory.action_proj.1.weight",
265
+ "memory.belief_memory.action_proj.1.bias",
266
+ "memory.belief_memory.write_gate.0.weight",
267
+ "memory.belief_memory.write_gate.0.bias",
268
+ "memory.belief_memory.write_gate.1.weight",
269
+ "memory.belief_memory.write_gate.1.bias",
270
+ "memory.belief_memory.write_gate.3.weight",
271
+ "memory.belief_memory.write_gate.3.bias",
272
+ "memory.belief_memory.token_proj.0.weight",
273
+ "memory.belief_memory.token_proj.0.bias",
274
+ "memory.belief_memory.token_proj.1.weight",
275
+ "memory.belief_memory.token_proj.1.bias",
276
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
277
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
278
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
279
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
280
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
281
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
282
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
283
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
284
+ "decoder.arm_decoder.layers.0.linear1.weight",
285
+ "decoder.arm_decoder.layers.0.linear1.bias",
286
+ "decoder.arm_decoder.layers.0.linear2.weight",
287
+ "decoder.arm_decoder.layers.0.linear2.bias",
288
+ "decoder.arm_decoder.layers.0.norm1.weight",
289
+ "decoder.arm_decoder.layers.0.norm1.bias",
290
+ "decoder.arm_decoder.layers.0.norm2.weight",
291
+ "decoder.arm_decoder.layers.0.norm2.bias",
292
+ "decoder.arm_decoder.layers.0.norm3.weight",
293
+ "decoder.arm_decoder.layers.0.norm3.bias",
294
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
295
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
296
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
297
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
298
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
299
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
300
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
301
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
302
+ "decoder.arm_decoder.layers.1.linear1.weight",
303
+ "decoder.arm_decoder.layers.1.linear1.bias",
304
+ "decoder.arm_decoder.layers.1.linear2.weight",
305
+ "decoder.arm_decoder.layers.1.linear2.bias",
306
+ "decoder.arm_decoder.layers.1.norm1.weight",
307
+ "decoder.arm_decoder.layers.1.norm1.bias",
308
+ "decoder.arm_decoder.layers.1.norm2.weight",
309
+ "decoder.arm_decoder.layers.1.norm2.bias",
310
+ "decoder.arm_decoder.layers.1.norm3.weight",
311
+ "decoder.arm_decoder.layers.1.norm3.bias",
312
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
313
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
314
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
315
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
316
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
317
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
318
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
319
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
320
+ "decoder.arm_decoder.layers.2.linear1.weight",
321
+ "decoder.arm_decoder.layers.2.linear1.bias",
322
+ "decoder.arm_decoder.layers.2.linear2.weight",
323
+ "decoder.arm_decoder.layers.2.linear2.bias",
324
+ "decoder.arm_decoder.layers.2.norm1.weight",
325
+ "decoder.arm_decoder.layers.2.norm1.bias",
326
+ "decoder.arm_decoder.layers.2.norm2.weight",
327
+ "decoder.arm_decoder.layers.2.norm2.bias",
328
+ "decoder.arm_decoder.layers.2.norm3.weight",
329
+ "decoder.arm_decoder.layers.2.norm3.bias",
330
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
331
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
332
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
333
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
334
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
335
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
336
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
337
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
338
+ "decoder.arm_decoder.layers.3.linear1.weight",
339
+ "decoder.arm_decoder.layers.3.linear1.bias",
340
+ "decoder.arm_decoder.layers.3.linear2.weight",
341
+ "decoder.arm_decoder.layers.3.linear2.bias",
342
+ "decoder.arm_decoder.layers.3.norm1.weight",
343
+ "decoder.arm_decoder.layers.3.norm1.bias",
344
+ "decoder.arm_decoder.layers.3.norm2.weight",
345
+ "decoder.arm_decoder.layers.3.norm2.bias",
346
+ "decoder.arm_decoder.layers.3.norm3.weight",
347
+ "decoder.arm_decoder.layers.3.norm3.bias",
348
+ "decoder.arm_identity.weight",
349
+ "decoder.phase_adapter.weight",
350
+ "decoder.phase_adapter.bias",
351
+ "decoder.role_adapter.weight",
352
+ "decoder.role_adapter.bias",
353
+ "decoder.context_proj.0.weight",
354
+ "decoder.context_proj.0.bias",
355
+ "decoder.context_proj.1.weight",
356
+ "decoder.context_proj.1.bias",
357
+ "decoder.arm_head.0.weight",
358
+ "decoder.arm_head.0.bias",
359
+ "decoder.arm_head.1.weight",
360
+ "decoder.arm_head.1.bias",
361
+ "decoder.arm_mean.weight",
362
+ "decoder.arm_mean.bias",
363
+ "decoder.arm_log_std.weight",
364
+ "decoder.arm_log_std.bias",
365
+ "decoder.proposal_mode_head.0.weight",
366
+ "decoder.proposal_mode_head.0.bias",
367
+ "decoder.proposal_mode_head.1.weight",
368
+ "decoder.proposal_mode_head.1.bias",
369
+ "decoder.proposal_mode_head.3.weight",
370
+ "decoder.proposal_mode_head.3.bias",
371
+ "decoder.proposal_mode_embeddings.weight",
372
+ "decoder.proposal_slot_embeddings.weight",
373
+ "decoder.mode_residual_heads.0.0.weight",
374
+ "decoder.mode_residual_heads.0.0.bias",
375
+ "decoder.mode_residual_heads.0.1.weight",
376
+ "decoder.mode_residual_heads.0.1.bias",
377
+ "decoder.mode_residual_heads.0.3.weight",
378
+ "decoder.mode_residual_heads.0.3.bias",
379
+ "decoder.mode_residual_heads.1.0.weight",
380
+ "decoder.mode_residual_heads.1.0.bias",
381
+ "decoder.mode_residual_heads.1.1.weight",
382
+ "decoder.mode_residual_heads.1.1.bias",
383
+ "decoder.mode_residual_heads.1.3.weight",
384
+ "decoder.mode_residual_heads.1.3.bias",
385
+ "decoder.mode_residual_heads.2.0.weight",
386
+ "decoder.mode_residual_heads.2.0.bias",
387
+ "decoder.mode_residual_heads.2.1.weight",
388
+ "decoder.mode_residual_heads.2.1.bias",
389
+ "decoder.mode_residual_heads.2.3.weight",
390
+ "decoder.mode_residual_heads.2.3.bias",
391
+ "decoder.mode_residual_heads.3.0.weight",
392
+ "decoder.mode_residual_heads.3.0.bias",
393
+ "decoder.mode_residual_heads.3.1.weight",
394
+ "decoder.mode_residual_heads.3.1.bias",
395
+ "decoder.mode_residual_heads.3.3.weight",
396
+ "decoder.mode_residual_heads.3.3.bias",
397
+ "decoder.mode_residual_heads.4.0.weight",
398
+ "decoder.mode_residual_heads.4.0.bias",
399
+ "decoder.mode_residual_heads.4.1.weight",
400
+ "decoder.mode_residual_heads.4.1.bias",
401
+ "decoder.mode_residual_heads.4.3.weight",
402
+ "decoder.mode_residual_heads.4.3.bias",
403
+ "decoder.mode_residual_heads.5.0.weight",
404
+ "decoder.mode_residual_heads.5.0.bias",
405
+ "decoder.mode_residual_heads.5.1.weight",
406
+ "decoder.mode_residual_heads.5.1.bias",
407
+ "decoder.mode_residual_heads.5.3.weight",
408
+ "decoder.mode_residual_heads.5.3.bias",
409
+ "decoder.slot_delta.0.weight",
410
+ "decoder.slot_delta.0.bias",
411
+ "decoder.slot_delta.1.weight",
412
+ "decoder.slot_delta.1.bias",
413
+ "decoder.slot_delta.3.weight",
414
+ "decoder.slot_delta.3.bias",
415
+ "decoder.proposal_score.0.weight",
416
+ "decoder.proposal_score.0.bias",
417
+ "decoder.proposal_score.1.weight",
418
+ "decoder.proposal_score.1.bias",
419
+ "decoder.proposal_score.3.weight",
420
+ "decoder.proposal_score.3.bias",
421
+ "elastic_state_head.interaction_queries",
422
+ "elastic_state_head.interaction_attention.in_proj_weight",
423
+ "elastic_state_head.interaction_attention.in_proj_bias",
424
+ "elastic_state_head.interaction_attention.out_proj.weight",
425
+ "elastic_state_head.interaction_attention.out_proj.bias",
426
+ "elastic_state_head.interaction_mlp.0.weight",
427
+ "elastic_state_head.interaction_mlp.0.bias",
428
+ "elastic_state_head.interaction_mlp.1.weight",
429
+ "elastic_state_head.interaction_mlp.1.bias",
430
+ "elastic_state_head.interaction_mlp.3.weight",
431
+ "elastic_state_head.interaction_mlp.3.bias",
432
+ "elastic_state_head.decoder.field_queries",
433
+ "elastic_state_head.decoder.field_attention.in_proj_weight",
434
+ "elastic_state_head.decoder.field_attention.in_proj_bias",
435
+ "elastic_state_head.decoder.field_attention.out_proj.weight",
436
+ "elastic_state_head.decoder.field_attention.out_proj.bias",
437
+ "elastic_state_head.decoder.field_mlp.0.weight",
438
+ "elastic_state_head.decoder.field_mlp.0.bias",
439
+ "elastic_state_head.decoder.field_mlp.1.weight",
440
+ "elastic_state_head.decoder.field_mlp.1.bias",
441
+ "elastic_state_head.decoder.field_mlp.3.weight",
442
+ "elastic_state_head.decoder.field_mlp.3.bias",
443
+ "elastic_state_head.decoder.summary_proj.0.weight",
444
+ "elastic_state_head.decoder.summary_proj.0.bias",
445
+ "elastic_state_head.decoder.summary_proj.1.weight",
446
+ "elastic_state_head.decoder.summary_proj.1.bias",
447
+ "elastic_state_head.decoder.phase_head.0.weight",
448
+ "elastic_state_head.decoder.phase_head.0.bias",
449
+ "elastic_state_head.decoder.phase_head.1.weight",
450
+ "elastic_state_head.decoder.phase_head.1.bias",
451
+ "elastic_state_head.decoder.phase_head.3.weight",
452
+ "elastic_state_head.decoder.phase_head.3.bias",
453
+ "elastic_state_head.decoder.arm_role_head.0.weight",
454
+ "elastic_state_head.decoder.arm_role_head.0.bias",
455
+ "elastic_state_head.decoder.arm_role_head.1.weight",
456
+ "elastic_state_head.decoder.arm_role_head.1.bias",
457
+ "elastic_state_head.decoder.arm_role_head.3.weight",
458
+ "elastic_state_head.decoder.arm_role_head.3.bias",
459
+ "elastic_state_head.decoder.arm_identity.weight",
460
+ "elastic_state_head.decoder.support_mode.0.weight",
461
+ "elastic_state_head.decoder.support_mode.0.bias",
462
+ "elastic_state_head.decoder.support_mode.1.weight",
463
+ "elastic_state_head.decoder.support_mode.1.bias",
464
+ "elastic_state_head.decoder.support_mode.3.weight",
465
+ "elastic_state_head.decoder.support_mode.3.bias",
466
+ "elastic_state_head.decoder.access_field.weight",
467
+ "elastic_state_head.decoder.access_field.bias",
468
+ "elastic_state_head.decoder.target_belief_field.weight",
469
+ "elastic_state_head.decoder.target_belief_field.bias",
470
+ "elastic_state_head.decoder.visibility_field.weight",
471
+ "elastic_state_head.decoder.visibility_field.bias",
472
+ "elastic_state_head.decoder.clearance_field.weight",
473
+ "elastic_state_head.decoder.clearance_field.bias",
474
+ "elastic_state_head.decoder.occluder_contact_field.weight",
475
+ "elastic_state_head.decoder.occluder_contact_field.bias",
476
+ "elastic_state_head.decoder.grasp_affordance_field.weight",
477
+ "elastic_state_head.decoder.grasp_affordance_field.bias",
478
+ "elastic_state_head.decoder.support_stability_field.weight",
479
+ "elastic_state_head.decoder.support_stability_field.bias",
480
+ "elastic_state_head.decoder.persistence_field.weight",
481
+ "elastic_state_head.decoder.persistence_field.bias",
482
+ "elastic_state_head.decoder.reocclusion_field.weight",
483
+ "elastic_state_head.decoder.reocclusion_field.bias",
484
+ "elastic_state_head.decoder.disturbance_field.weight",
485
+ "elastic_state_head.decoder.disturbance_field.bias",
486
+ "elastic_state_head.decoder.uncertainty_field.weight",
487
+ "elastic_state_head.decoder.uncertainty_field.bias",
488
+ "elastic_state_head.decoder.reocclusion_head.0.weight",
489
+ "elastic_state_head.decoder.reocclusion_head.0.bias",
490
+ "elastic_state_head.decoder.reocclusion_head.1.weight",
491
+ "elastic_state_head.decoder.reocclusion_head.1.bias",
492
+ "elastic_state_head.decoder.reocclusion_head.3.weight",
493
+ "elastic_state_head.decoder.reocclusion_head.3.bias",
494
+ "world_model.state_encoder.0.weight",
495
+ "world_model.state_encoder.0.bias",
496
+ "world_model.state_encoder.1.weight",
497
+ "world_model.state_encoder.1.bias",
498
+ "world_model.scene_memory_proj.0.weight",
499
+ "world_model.scene_memory_proj.0.bias",
500
+ "world_model.scene_memory_proj.1.weight",
501
+ "world_model.scene_memory_proj.1.bias",
502
+ "world_model.belief_memory_proj.0.weight",
503
+ "world_model.belief_memory_proj.0.bias",
504
+ "world_model.belief_memory_proj.1.weight",
505
+ "world_model.belief_memory_proj.1.bias",
506
+ "world_model.action_encoder.0.weight",
507
+ "world_model.action_encoder.0.bias",
508
+ "world_model.action_encoder.1.weight",
509
+ "world_model.action_encoder.1.bias",
510
+ "world_model.transition.weight_ih",
511
+ "world_model.transition.weight_hh",
512
+ "world_model.transition.bias_ih",
513
+ "world_model.transition.bias_hh",
514
+ "world_model.scene_memory_update.weight",
515
+ "world_model.scene_memory_update.bias",
516
+ "world_model.belief_memory_update.weight",
517
+ "world_model.belief_memory_update.bias",
518
+ "world_model.compact_decoder.weight",
519
+ "world_model.compact_decoder.bias",
520
+ "world_model.target_belief_head.weight",
521
+ "world_model.target_belief_head.bias",
522
+ "world_model.visibility_head.weight",
523
+ "world_model.visibility_head.bias",
524
+ "world_model.clearance_head.weight",
525
+ "world_model.clearance_head.bias",
526
+ "world_model.occluder_contact_head.weight",
527
+ "world_model.occluder_contact_head.bias",
528
+ "world_model.grasp_affordance_head.weight",
529
+ "world_model.grasp_affordance_head.bias",
530
+ "world_model.support_stability_head.weight",
531
+ "world_model.support_stability_head.bias",
532
+ "world_model.persistence_head.weight",
533
+ "world_model.persistence_head.bias",
534
+ "world_model.reocclusion_head.weight",
535
+ "world_model.reocclusion_head.bias",
536
+ "world_model.disturbance_head.weight",
537
+ "world_model.disturbance_head.bias",
538
+ "world_model.uncertainty_head.weight",
539
+ "world_model.uncertainty_head.bias",
540
+ "world_model.access_head.weight",
541
+ "world_model.access_head.bias",
542
+ "planner.residual.trunk.0.weight",
543
+ "planner.residual.trunk.0.bias",
544
+ "planner.residual.trunk.1.weight",
545
+ "planner.residual.trunk.1.bias",
546
+ "planner.residual.trunk.3.weight",
547
+ "planner.residual.trunk.3.bias",
548
+ "planner.residual.success_head.weight",
549
+ "planner.residual.success_head.bias",
550
+ "planner.residual.risk_head.weight",
551
+ "planner.residual.risk_head.bias",
552
+ "planner.residual.residual_head.weight",
553
+ "planner.residual.residual_head.bias"
554
+ ],
555
+ "unexpected_keys": []
556
+ }
557
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4583333333333333,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5694444444444445,
9
+ "visibility_integral": 32.2005988392565,
10
+ "corridor_availability": 0.8664570152759552,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.1903364318709135,
13
+ "disturbance_cost": 0.35011103795841336
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
5
+ - mean_success: 0.569
6
+ - visibility_integral: 32.201
7
+ - corridor_availability: 0.866
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.190
10
+ - disturbance_cost: 0.350
11
+ - foliage_proxy_success: 0.458
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5555555555555555,
9
+ "visibility_integral": 33.31703626612822,
10
+ "corridor_availability": 0.886079938047462,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.1836884579143008,
13
+ "disturbance_cost": 0.3696938648612963
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_planner/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
5
+ - mean_success: 0.556
6
+ - visibility_integral: 33.317
7
+ - corridor_availability: 0.886
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.184
10
+ - disturbance_cost: 0.370
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4583333333333333,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5694444444444445,
9
+ "visibility_integral": 32.571378606888985,
10
+ "corridor_availability": 0.8744470203916231,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.249059588784357,
13
+ "disturbance_cost": 0.34120469799058306
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/benchmark_no_role_symmetry/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt
5
+ - mean_success: 0.569
6
+ - visibility_integral: 32.571
7
+ - corridor_availability: 0.874
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.249
10
+ - disturbance_cost: 0.341
11
+ - foliage_proxy_success: 0.458
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/config_resolved.yaml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_dummy_seed13
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 13
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
12
+ train_episodes_per_proxy: 48
13
+ val_episodes_per_proxy: 16
14
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed13.pt
15
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed13.pt
16
+ rebuild_dataset: false
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 13
22
+ optim:
23
+ epochs: 4
24
+ batch_size: 16
25
+ num_workers: 4
26
+ lr: 0.001
27
+ weight_decay: 0.0001
28
+ trainer:
29
+ policy_type: elastic_reveal
30
+ use_bf16: false
31
+ grad_clip_norm: 1.0
32
+ freeze_backbone: true
33
+ gradient_checkpointing: false
34
+ plan_during_train: true
35
+ plan_during_eval: true
36
+ support_mode_conditioning: true
37
+ planner_mode: trainable
38
+ use_depth: false
39
+ use_world_model: true
40
+ use_role_tokens: true
41
+ compute_equivariance_probe: true
42
+ policy:
43
+ backbone:
44
+ model_name: openai/clip-vit-base-patch32
45
+ hidden_dim: 192
46
+ max_text_tokens: 32
47
+ freeze_backbone: true
48
+ gradient_checkpointing: false
49
+ use_dummy_backbone: true
50
+ fusion:
51
+ hidden_dim: 192
52
+ num_cameras: 3
53
+ num_layers: 2
54
+ num_heads: 4
55
+ ff_dim: 384
56
+ dropout: 0.1
57
+ proprio_dim: 32
58
+ proprio_tokens: 1
59
+ memory:
60
+ hidden_dim: 192
61
+ action_dim: 14
62
+ history_steps: 6
63
+ scene_history_steps: 3
64
+ belief_history_steps: 8
65
+ num_layers: 2
66
+ dropout: 0.1
67
+ memory_bank_size: 4
68
+ scene_bank_size: 2
69
+ belief_bank_size: 2
70
+ num_heads: 4
71
+ max_history_steps: 8
72
+ decoder:
73
+ hidden_dim: 192
74
+ num_heads: 4
75
+ num_layers: 2
76
+ ff_dim: 384
77
+ dropout: 0.1
78
+ chunk_size: 8
79
+ action_dim: 14
80
+ arm_action_dim: 7
81
+ num_candidates: 8
82
+ num_phases: 5
83
+ num_arm_roles: 4
84
+ num_proposal_modes: 6
85
+ planner_top_k: 4
86
+ reveal_head:
87
+ hidden_dim: 192
88
+ num_support_modes: 3
89
+ num_approach_templates: 32
90
+ rollout_horizon: 5
91
+ belief_map_size: 32
92
+ field_size: 16
93
+ num_heads: 4
94
+ predict_belief_map: true
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ world_model:
99
+ hidden_dim: 192
100
+ action_dim: 14
101
+ num_support_modes: 3
102
+ num_approach_templates: 32
103
+ rollout_horizon: 5
104
+ field_size: 16
105
+ num_heads: 4
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+ num_interaction_tokens: 8
109
+ belief_map_size: 32
110
+ predict_belief_map: true
111
+ scene_bank_size: 2
112
+ belief_bank_size: 2
113
+ planner:
114
+ hidden_dim: 192
115
+ num_candidates: 8
116
+ action_dim: 14
117
+ num_support_modes: 3
118
+ utility_margin: 0.1
119
+ num_heads: 4
120
+ num_layers: 2
121
+ num_phases: 5
122
+ num_arm_roles: 4
123
+ top_k: 4
124
+ loss_weights:
125
+ action: 1.0
126
+ phase: 0.15
127
+ arm_role: 0.2
128
+ support_mode: 0.15
129
+ corridor: 0.2
130
+ persistence: 0.1
131
+ disturbance: 0.1
132
+ world_model: 0.25
133
+ belief: 0.05
134
+ visibility: 0.05
135
+ clearance: 0.05
136
+ support_stability: 0.05
137
+ reocclusion: 0.05
138
+ occluder_contact: 0.05
139
+ grasp_affordance: 0.05
140
+ planner_success: 0.2
141
+ planner_risk: 0.1
142
+ planner_ranking: 0.1
143
+ proposal_reconstruction: 0.2
144
+ proposal_success: 0.1
145
+ proposal_ranking: 0.1
146
+ proposal_diversity: 0.05
147
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.2595419847328244,
3
+ "planner_regret": 0.015185066498816013,
4
+ "planner_score_utility_spearman": 0.25190839171409607,
5
+ "risk_calibration_mse": 0.011332111433148384,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.02456846833229065,
8
+ "left_right_equivariance_error": 0.007538194466820534,
9
+ "belief_calibration_brier": 0.0055354926735162735,
10
+ "reocclusion_calibration_brier": 0.2274838089942932,
11
+ "support_stability_mae": 0.030257930979132652,
12
+ "clearance_auc": 0.7414014153848468,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.7680174112319946,
15
+ "num_samples": 131
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.029530804604291916,
6
+ "arm_role": 0.19113596672893132,
7
+ "belief": 0.19201900158077478,
8
+ "clearance": 0.1937584774568677,
9
+ "corridor": 0.30155759242673713,
10
+ "disturbance": 0.018230090441647917,
11
+ "grasp_affordance": 0.1115249302238226,
12
+ "occluder_contact": 0.29577948339283466,
13
+ "persistence": 5.046393771966298,
14
+ "phase": 0.835017109910647,
15
+ "planner_ranking": 0.6733469751973947,
16
+ "planner_risk": 0.04033496890527507,
17
+ "planner_success": 0.6355331862966219,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.276770144701004,
20
+ "proposal_reconstruction": 0.07184042579804857,
21
+ "proposal_success": 0.6676094954212507,
22
+ "reocclusion": 0.6988904004295667,
23
+ "role_swap_consistency": 0.0006935761872834215,
24
+ "support_mode": 0.7387049297491709,
25
+ "support_stability": 0.22416748199611902,
26
+ "total": 2.4212693075339,
27
+ "uncertainty": 0.32931591259936493,
28
+ "visibility": 0.23356754829486212,
29
+ "world_model": 4.170340110858281
30
+ },
31
+ "val": {
32
+ "action": 0.023605089427696332,
33
+ "arm_role": 8.891185360779572e-05,
34
+ "belief": 0.112466166416804,
35
+ "clearance": 0.08774211009343465,
36
+ "corridor": 0.2502693798806932,
37
+ "disturbance": 0.0037313879001885653,
38
+ "grasp_affordance": 0.013532540657454066,
39
+ "occluder_contact": 0.2236137886842092,
40
+ "persistence": 4.796973652309841,
41
+ "phase": 0.6506193346447415,
42
+ "planner_ranking": 0.45240074396133423,
43
+ "planner_risk": 0.012336155710120996,
44
+ "planner_success": 0.6348234679963853,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.1647081640031602,
47
+ "proposal_reconstruction": 0.06623147221075164,
48
+ "proposal_success": 0.6723773082097372,
49
+ "reocclusion": 0.6799028648270501,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.6129622724321153,
52
+ "support_stability": 0.14574629151158863,
53
+ "total": 1.9533665710025363,
54
+ "uncertainty": 0.057104989886283875,
55
+ "visibility": 0.09962501211298837,
56
+ "world_model": 3.08394538031684
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.02052135338696341,
63
+ "arm_role": 0.00010673219821910607,
64
+ "belief": 0.11743779480457306,
65
+ "clearance": 0.09043452050536871,
66
+ "corridor": 0.24632801488041878,
67
+ "disturbance": 0.003475519949764324,
68
+ "grasp_affordance": 0.01625332736875862,
69
+ "occluder_contact": 0.2240921917061011,
70
+ "persistence": 4.695922573407491,
71
+ "phase": 0.49508154888947803,
72
+ "planner_ranking": 0.14279444872712097,
73
+ "planner_risk": 0.0141817982463787,
74
+ "planner_success": 0.593176061908404,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.165678009390831,
77
+ "proposal_reconstruction": 0.06292749894782901,
78
+ "proposal_success": 0.674570898214976,
79
+ "reocclusion": 0.3844434078782797,
80
+ "role_swap_consistency": 0.00039524554207067314,
81
+ "support_mode": 0.17358588459561966,
82
+ "support_stability": 0.1374168156956633,
83
+ "total": 1.6440163105726242,
84
+ "uncertainty": 0.047071967429171004,
85
+ "visibility": 0.11256152174125116,
86
+ "world_model": 2.4736096411943436
87
+ },
88
+ "val": {
89
+ "action": 0.020492848422792222,
90
+ "arm_role": 0.0002776960156754487,
91
+ "belief": 0.1081986419028706,
92
+ "clearance": 0.08335375868611866,
93
+ "corridor": 0.24787565734651354,
94
+ "disturbance": 0.0022675159141524797,
95
+ "grasp_affordance": 0.012290253303945065,
96
+ "occluder_contact": 0.21959979832172394,
97
+ "persistence": 4.647055625915527,
98
+ "phase": 0.4316861795054542,
99
+ "planner_ranking": 0.06341143821676572,
100
+ "planner_risk": 0.015357115098999606,
101
+ "planner_success": 0.5689369605647193,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.1283477942148845,
104
+ "proposal_reconstruction": 0.06308732968237665,
105
+ "proposal_success": 0.6809348861376444,
106
+ "reocclusion": 0.2748950504594379,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 0.0006280758987284369,
109
+ "support_stability": 0.14622381826241812,
110
+ "total": 1.6025353935029771,
111
+ "uncertainty": 0.02438033703300688,
112
+ "visibility": 0.10466726124286652,
113
+ "world_model": 2.558868553903368
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.01646478761297961,
120
+ "arm_role": 9.377782756322024e-05,
121
+ "belief": 0.10991635639220476,
122
+ "clearance": 0.0843405183404684,
123
+ "corridor": 0.2701566057900588,
124
+ "disturbance": 0.0031300995663817353,
125
+ "grasp_affordance": 0.012393822447241595,
126
+ "occluder_contact": 0.21479063170651594,
127
+ "persistence": 2.6339182580510774,
128
+ "phase": 0.431367311005791,
129
+ "planner_ranking": 0.06486702508603533,
130
+ "planner_risk": 0.013548698586722216,
131
+ "planner_success": 0.5643768397470316,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1353335281213124,
134
+ "proposal_reconstruction": 0.05951391921068231,
135
+ "proposal_success": 0.6731756230195364,
136
+ "reocclusion": 0.2623978331685066,
137
+ "role_swap_consistency": 0.00040521422973445925,
138
+ "support_mode": 0.000605581031171217,
139
+ "support_stability": 0.1400139912342032,
140
+ "total": 1.2923575937747955,
141
+ "uncertainty": 0.02004621450517637,
142
+ "visibility": 0.10328224146117766,
143
+ "world_model": 2.1331751296917596
144
+ },
145
+ "val": {
146
+ "action": 0.018090524814195104,
147
+ "arm_role": 4.204427063490989e-05,
148
+ "belief": 0.11348766502406862,
149
+ "clearance": 0.0778748012251324,
150
+ "corridor": 0.24816315703921848,
151
+ "disturbance": 0.0018734507805978258,
152
+ "grasp_affordance": 0.008446878753602505,
153
+ "occluder_contact": 0.2068953894906574,
154
+ "persistence": 1.9170836640728846,
155
+ "phase": 0.4777056227127711,
156
+ "planner_ranking": 0.07497243583202362,
157
+ "planner_risk": 0.012007931971715556,
158
+ "planner_success": 0.5846167008082072,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.1227490504582722,
161
+ "proposal_reconstruction": 0.06178469873136944,
162
+ "proposal_success": 0.6768591006596884,
163
+ "reocclusion": 0.2698347626460923,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 0.0005942495643264718,
166
+ "support_stability": 0.14820611890819338,
167
+ "total": 1.2714158693949382,
168
+ "uncertainty": 0.004030831908393238,
169
+ "visibility": 0.09794799155659145,
170
+ "world_model": 2.303717931111654
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.015296258614398539,
177
+ "arm_role": 9.897743439069018e-05,
178
+ "belief": 0.10741911331812541,
179
+ "clearance": 0.07931565772742033,
180
+ "corridor": 0.23081608302891254,
181
+ "disturbance": 0.00287542298125724,
182
+ "grasp_affordance": 0.008955261165586611,
183
+ "occluder_contact": 0.21085621416568756,
184
+ "persistence": 1.6830786913633347,
185
+ "phase": 0.4407324629525344,
186
+ "planner_ranking": 0.053573422211532794,
187
+ "planner_risk": 0.011835894741428396,
188
+ "planner_success": 0.5389373525977135,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.1375357458988826,
191
+ "proposal_reconstruction": 0.05875217309221625,
192
+ "proposal_success": 0.669308491051197,
193
+ "reocclusion": 0.26737124752253294,
194
+ "role_swap_consistency": 0.00044258072254403186,
195
+ "support_mode": 0.0058784369854644565,
196
+ "support_stability": 0.13682511821389198,
197
+ "total": 1.1672432621320088,
198
+ "uncertainty": 0.007140855586233859,
199
+ "visibility": 0.094703309237957,
200
+ "world_model": 2.072191367546717
201
+ },
202
+ "val": {
203
+ "action": 0.016218292733861342,
204
+ "arm_role": 0.00022501617463098632,
205
+ "belief": 0.10660513407654232,
206
+ "clearance": 0.07916852169566685,
207
+ "corridor": 0.23598399923907387,
208
+ "disturbance": 0.0013176489026389187,
209
+ "grasp_affordance": 0.009249631315469742,
210
+ "occluder_contact": 0.2084801279836231,
211
+ "persistence": 1.9978744321399264,
212
+ "phase": 0.46462951434983146,
213
+ "planner_ranking": 0.04140180618398719,
214
+ "planner_risk": 0.011076963868820004,
215
+ "planner_success": 0.5154120292928484,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1469912661446466,
218
+ "proposal_reconstruction": 0.05962582967347569,
219
+ "proposal_success": 0.6495795779758029,
220
+ "reocclusion": 0.2503652158710692,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 0.0004595977985041423,
223
+ "support_stability": 0.14600716531276703,
224
+ "total": 1.2128634585274591,
225
+ "uncertainty": 0.007759603775209851,
226
+ "visibility": 0.09225249456034766,
227
+ "world_model": 2.1404969029956393
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/summary.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed13",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed13/checkpoint_best.pt",
5
+ "final_train_total": 1.1672432621320088,
6
+ "final_val_total": 1.2128634585274591,
7
+ "train_time_sec": 18.091050624847412,
8
+ "peak_gpu_memory_mb": 631.1953125,
9
+ "num_train_samples": 380,
10
+ "num_val_samples": 131,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": null
14
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.625,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5694444444444445,
9
+ "visibility_integral": 32.801942747500206,
10
+ "corridor_availability": 0.8877548724412918,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.4711664057066363,
13
+ "disturbance_cost": 0.37882790300581193
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
5
+ - mean_success: 0.569
6
+ - visibility_integral: 32.802
7
+ - corridor_availability: 0.888
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.471
10
+ - disturbance_cost: 0.379
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.625
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5416666666666666,
9
+ "visibility_integral": 34.428366212381256,
10
+ "corridor_availability": 0.8909231291876899,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.4917179537341767,
13
+ "disturbance_cost": 0.39409097459995085
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_planner/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
5
+ - mean_success: 0.542
6
+ - visibility_integral: 34.428
7
+ - corridor_availability: 0.891
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.492
10
+ - disturbance_cost: 0.394
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.625
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.625,
6
+ "cloth_proxy": 0.6666666666666666
7
+ },
8
+ "mean_success": 0.5694444444444445,
9
+ "visibility_integral": 33.27109728753567,
10
+ "corridor_availability": 0.8943836614489555,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 1.488106187582016,
13
+ "disturbance_cost": 0.3667886131960485
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/benchmark_no_role_symmetry/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt
5
+ - mean_success: 0.569
6
+ - visibility_integral: 33.271
7
+ - corridor_availability: 0.894
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 1.488
10
+ - disturbance_cost: 0.367
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.625
13
+ - cloth_proxy_success: 0.667
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/config_resolved.yaml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_dummy_seed14
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 14
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
12
+ train_episodes_per_proxy: 48
13
+ val_episodes_per_proxy: 16
14
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed14.pt
15
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed14.pt
16
+ rebuild_dataset: false
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 14
22
+ optim:
23
+ epochs: 4
24
+ batch_size: 16
25
+ num_workers: 4
26
+ lr: 0.001
27
+ weight_decay: 0.0001
28
+ trainer:
29
+ policy_type: elastic_reveal
30
+ use_bf16: false
31
+ grad_clip_norm: 1.0
32
+ freeze_backbone: true
33
+ gradient_checkpointing: false
34
+ plan_during_train: true
35
+ plan_during_eval: true
36
+ support_mode_conditioning: true
37
+ planner_mode: trainable
38
+ use_depth: false
39
+ use_world_model: true
40
+ use_role_tokens: true
41
+ compute_equivariance_probe: true
42
+ policy:
43
+ backbone:
44
+ model_name: openai/clip-vit-base-patch32
45
+ hidden_dim: 192
46
+ max_text_tokens: 32
47
+ freeze_backbone: true
48
+ gradient_checkpointing: false
49
+ use_dummy_backbone: true
50
+ fusion:
51
+ hidden_dim: 192
52
+ num_cameras: 3
53
+ num_layers: 2
54
+ num_heads: 4
55
+ ff_dim: 384
56
+ dropout: 0.1
57
+ proprio_dim: 32
58
+ proprio_tokens: 1
59
+ memory:
60
+ hidden_dim: 192
61
+ action_dim: 14
62
+ history_steps: 6
63
+ scene_history_steps: 3
64
+ belief_history_steps: 8
65
+ num_layers: 2
66
+ dropout: 0.1
67
+ memory_bank_size: 4
68
+ scene_bank_size: 2
69
+ belief_bank_size: 2
70
+ num_heads: 4
71
+ max_history_steps: 8
72
+ decoder:
73
+ hidden_dim: 192
74
+ num_heads: 4
75
+ num_layers: 2
76
+ ff_dim: 384
77
+ dropout: 0.1
78
+ chunk_size: 8
79
+ action_dim: 14
80
+ arm_action_dim: 7
81
+ num_candidates: 8
82
+ num_phases: 5
83
+ num_arm_roles: 4
84
+ num_proposal_modes: 6
85
+ planner_top_k: 4
86
+ reveal_head:
87
+ hidden_dim: 192
88
+ num_support_modes: 3
89
+ num_approach_templates: 32
90
+ rollout_horizon: 5
91
+ belief_map_size: 32
92
+ field_size: 16
93
+ num_heads: 4
94
+ predict_belief_map: true
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ world_model:
99
+ hidden_dim: 192
100
+ action_dim: 14
101
+ num_support_modes: 3
102
+ num_approach_templates: 32
103
+ rollout_horizon: 5
104
+ field_size: 16
105
+ num_heads: 4
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+ num_interaction_tokens: 8
109
+ belief_map_size: 32
110
+ predict_belief_map: true
111
+ scene_bank_size: 2
112
+ belief_bank_size: 2
113
+ planner:
114
+ hidden_dim: 192
115
+ num_candidates: 8
116
+ action_dim: 14
117
+ num_support_modes: 3
118
+ utility_margin: 0.1
119
+ num_heads: 4
120
+ num_layers: 2
121
+ num_phases: 5
122
+ num_arm_roles: 4
123
+ top_k: 4
124
+ loss_weights:
125
+ action: 1.0
126
+ phase: 0.15
127
+ arm_role: 0.2
128
+ support_mode: 0.15
129
+ corridor: 0.2
130
+ persistence: 0.1
131
+ disturbance: 0.1
132
+ world_model: 0.25
133
+ belief: 0.05
134
+ visibility: 0.05
135
+ clearance: 0.05
136
+ support_stability: 0.05
137
+ reocclusion: 0.05
138
+ occluder_contact: 0.05
139
+ grasp_affordance: 0.05
140
+ planner_success: 0.2
141
+ planner_risk: 0.1
142
+ planner_ranking: 0.1
143
+ proposal_reconstruction: 0.2
144
+ proposal_success: 0.1
145
+ proposal_ranking: 0.1
146
+ proposal_diversity: 0.05
147
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.2846153846153846,
3
+ "planner_regret": 0.014314642176032066,
4
+ "planner_score_utility_spearman": 0.2153846174478531,
5
+ "risk_calibration_mse": 0.010775926522910595,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.02589959278702736,
8
+ "left_right_equivariance_error": 0.008901518605211201,
9
+ "belief_calibration_brier": 0.005614265333861113,
10
+ "reocclusion_calibration_brier": 0.28406235575675964,
11
+ "support_stability_mae": 0.025872904807329178,
12
+ "clearance_auc": 0.5220335124994485,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.7309081554412842,
15
+ "num_samples": 130
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.033738364155093827,
6
+ "arm_role": 0.2658534389071671,
7
+ "belief": 0.1663714082290729,
8
+ "clearance": 0.1995344152674079,
9
+ "corridor": 0.2937144724031289,
10
+ "disturbance": 0.01641949706633265,
11
+ "grasp_affordance": 0.07253360034277041,
12
+ "occluder_contact": 0.262634892637531,
13
+ "persistence": 5.348720759153366,
14
+ "phase": 0.9128680676221848,
15
+ "planner_ranking": 0.7161665211121241,
16
+ "planner_risk": 0.03542382351588458,
17
+ "planner_success": 0.6313644871115685,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.328845535715421,
20
+ "proposal_reconstruction": 0.07508338304857413,
21
+ "proposal_success": 0.6797524491945902,
22
+ "reocclusion": 0.7106639867027601,
23
+ "role_swap_consistency": 0.0008167610091428893,
24
+ "support_mode": 0.7801499888300896,
25
+ "support_stability": 0.21256058973570666,
26
+ "total": 2.46435983479023,
27
+ "uncertainty": 0.17734388983808458,
28
+ "visibility": 0.16707653552293777,
29
+ "world_model": 4.078198651472728
30
+ },
31
+ "val": {
32
+ "action": 0.023770140690935984,
33
+ "arm_role": 0.0004891494075612476,
34
+ "belief": 0.11787863655222787,
35
+ "clearance": 0.08211326102415721,
36
+ "corridor": 0.2646504044532776,
37
+ "disturbance": 0.0077974022262626225,
38
+ "grasp_affordance": 0.010528300681875812,
39
+ "occluder_contact": 0.23685429162449306,
40
+ "persistence": 4.643319712744819,
41
+ "phase": 0.6877350012461344,
42
+ "planner_ranking": 0.5576971173286438,
43
+ "planner_risk": 0.012001174760775434,
44
+ "planner_success": 0.6474077436659071,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.2473273674647014,
47
+ "proposal_reconstruction": 0.06659724977281359,
48
+ "proposal_success": 0.6868854032622443,
49
+ "reocclusion": 0.6894112494256761,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.7945182191001045,
52
+ "support_stability": 0.13977908922566307,
53
+ "total": 1.9791885084576077,
54
+ "uncertainty": 0.016744557561145887,
55
+ "visibility": 0.09745695524745518,
56
+ "world_model": 3.0115205181969538
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.02093995890269677,
63
+ "arm_role": 0.00021873527142209545,
64
+ "belief": 0.1156839697311322,
65
+ "clearance": 0.09139195084571838,
66
+ "corridor": 0.2529828678816557,
67
+ "disturbance": 0.003422619032789953,
68
+ "grasp_affordance": 0.017661277670413256,
69
+ "occluder_contact": 0.22792026090125242,
70
+ "persistence": 4.702208956082662,
71
+ "phase": 0.5312556164960066,
72
+ "planner_ranking": 0.20636002533137798,
73
+ "planner_risk": 0.015822513572250802,
74
+ "planner_success": 0.5910777151584625,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.1696062982082367,
77
+ "proposal_reconstruction": 0.06334876082837582,
78
+ "proposal_success": 0.6708702544371287,
79
+ "reocclusion": 0.5039266211291155,
80
+ "role_swap_consistency": 0.0005020403975019386,
81
+ "support_mode": 0.3201311229883383,
82
+ "support_stability": 0.13968352818240723,
83
+ "total": 1.6841449290513992,
84
+ "uncertainty": 0.026018289965577424,
85
+ "visibility": 0.11011519034703572,
86
+ "world_model": 2.466151461005211
87
+ },
88
+ "val": {
89
+ "action": 0.020535202903880015,
90
+ "arm_role": 0.00012925987215971368,
91
+ "belief": 0.10588792545927896,
92
+ "clearance": 0.08000239895449744,
93
+ "corridor": 0.23227471278773415,
94
+ "disturbance": 0.0022439691221936503,
95
+ "grasp_affordance": 0.011653332453635003,
96
+ "occluder_contact": 0.21834516359700096,
97
+ "persistence": 4.46406364440918,
98
+ "phase": 0.4118766354189979,
99
+ "planner_ranking": 0.0892416491276688,
100
+ "planner_risk": 0.0152344209038549,
101
+ "planner_success": 0.6057713859611087,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.133669826719496,
104
+ "proposal_reconstruction": 0.06398758581942982,
105
+ "proposal_success": 0.6783458656734891,
106
+ "reocclusion": 0.2840655545393626,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 0.0015922162112676436,
109
+ "support_stability": 0.13890525698661804,
110
+ "total": 1.584020005332099,
111
+ "uncertainty": 0.014379701991048124,
112
+ "visibility": 0.09630187600851059,
113
+ "world_model": 2.5434003671010337
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.017165315182258684,
120
+ "arm_role": 0.00014243966719125942,
121
+ "belief": 0.1267746559654673,
122
+ "clearance": 0.09291451362272103,
123
+ "corridor": 0.2539026445398728,
124
+ "disturbance": 0.0040997157484525815,
125
+ "grasp_affordance": 0.016216314087311428,
126
+ "occluder_contact": 0.2287510900447766,
127
+ "persistence": 2.7297142073512077,
128
+ "phase": 0.4553527260820071,
129
+ "planner_ranking": 0.0675589762783299,
130
+ "planner_risk": 0.012244323831206808,
131
+ "planner_success": 0.5227356925606728,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1367994795242946,
134
+ "proposal_reconstruction": 0.06006583757698536,
135
+ "proposal_success": 0.6718559389313062,
136
+ "reocclusion": 0.28394716791808605,
137
+ "role_swap_consistency": 0.000532965175807476,
138
+ "support_mode": 0.0007756326898136953,
139
+ "support_stability": 0.14084124999741712,
140
+ "total": 1.2956190605958302,
141
+ "uncertainty": 0.011363255020114593,
142
+ "visibility": 0.11323032714426517,
143
+ "world_model": 2.120655362804731
144
+ },
145
+ "val": {
146
+ "action": 0.016470486712124612,
147
+ "arm_role": 0.00015339441274085807,
148
+ "belief": 0.15912896229161155,
149
+ "clearance": 0.07826702462302314,
150
+ "corridor": 0.21473425957891676,
151
+ "disturbance": 0.0018082650106710692,
152
+ "grasp_affordance": 0.008080463701238235,
153
+ "occluder_contact": 0.22728429403569964,
154
+ "persistence": 1.846471561325921,
155
+ "phase": 0.4164143088791106,
156
+ "planner_ranking": 0.05541756912134588,
157
+ "planner_risk": 0.011288604181673791,
158
+ "planner_success": 0.5237696303261651,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.1311746835708618,
161
+ "proposal_reconstruction": 0.06064582823051347,
162
+ "proposal_success": 0.6669412983788384,
163
+ "reocclusion": 0.27248211950063705,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 0.00040661103816496,
166
+ "support_stability": 0.13817799753612942,
167
+ "total": 1.241025275654263,
168
+ "uncertainty": 0.003020187374204397,
169
+ "visibility": 0.11647009683979882,
170
+ "world_model": 2.323344442579481
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.015070427674800158,
177
+ "arm_role": 0.0002641689807205694,
178
+ "belief": 0.141230215318501,
179
+ "clearance": 0.07984113336230318,
180
+ "corridor": 0.225482989102602,
181
+ "disturbance": 0.0017908170169296984,
182
+ "grasp_affordance": 0.008550037746317685,
183
+ "occluder_contact": 0.21477928136785826,
184
+ "persistence": 1.6129546587665875,
185
+ "phase": 0.42590194568037987,
186
+ "planner_ranking": 0.04456973075866699,
187
+ "planner_risk": 0.010397601523436606,
188
+ "planner_success": 0.49412518242994946,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.1504750202099483,
191
+ "proposal_reconstruction": 0.058567725432415806,
192
+ "proposal_success": 0.6462936575214068,
193
+ "reocclusion": 0.2506879176944494,
194
+ "role_swap_consistency": 0.000550856914439161,
195
+ "support_mode": 0.0003065853112881693,
196
+ "support_stability": 0.1366732595488429,
197
+ "total": 1.134415107468764,
198
+ "uncertainty": 0.0035936666245106608,
199
+ "visibility": 0.10351777387162049,
200
+ "world_model": 2.024999057253202
201
+ },
202
+ "val": {
203
+ "action": 0.016186242405739095,
204
+ "arm_role": 0.0002410423346898622,
205
+ "belief": 0.12203978498776753,
206
+ "clearance": 0.07702170064051946,
207
+ "corridor": 0.21113747523890602,
208
+ "disturbance": 0.0014993647216922706,
209
+ "grasp_affordance": 0.008119617278377214,
210
+ "occluder_contact": 0.21474246515168083,
211
+ "persistence": 1.9725701610247295,
212
+ "phase": 0.4842751953336928,
213
+ "planner_ranking": 0.04342265882425838,
214
+ "planner_risk": 0.01107009764139851,
215
+ "planner_success": 0.5070097777578566,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1282474862204657,
218
+ "proposal_reconstruction": 0.05997827731900745,
219
+ "proposal_success": 0.6469291316138374,
220
+ "reocclusion": 0.2716698878341251,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 0.00020467836778455725,
223
+ "support_stability": 0.13836157073577246,
224
+ "total": 1.2091523673799303,
225
+ "uncertainty": 0.0025335378272251952,
226
+ "visibility": 0.09879730641841888,
227
+ "world_model": 2.1507359743118286
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/summary.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed14",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed14/checkpoint_best.pt",
5
+ "final_train_total": 1.134415107468764,
6
+ "final_val_total": 1.2091523673799303,
7
+ "train_time_sec": 23.220722675323486,
8
+ "peak_gpu_memory_mb": 626.4716796875,
9
+ "num_train_samples": 381,
10
+ "num_val_samples": 130,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": null
14
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4583333333333333,
5
+ "bag_proxy": 0.625,
6
+ "cloth_proxy": 0.7083333333333334
7
+ },
8
+ "mean_success": 0.5972222222222222,
9
+ "visibility_integral": 29.697570121950573,
10
+ "corridor_availability": 0.8675610861844487,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.20430763148842,
13
+ "disturbance_cost": 0.36563710583787823
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
5
+ - mean_success: 0.597
6
+ - visibility_integral: 29.698
7
+ - corridor_availability: 0.868
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.204
10
+ - disturbance_cost: 0.366
11
+ - foliage_proxy_success: 0.458
12
+ - bag_proxy_success: 0.625
13
+ - cloth_proxy_success: 0.708
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4583333333333333,
5
+ "bag_proxy": 0.625,
6
+ "cloth_proxy": 0.7083333333333334
7
+ },
8
+ "mean_success": 0.5972222222222222,
9
+ "visibility_integral": 29.697570121950573,
10
+ "corridor_availability": 0.8675610861844487,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.20430763148842,
13
+ "disturbance_cost": 0.36563710583787823
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_planner/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
5
+ - mean_success: 0.597
6
+ - visibility_integral: 29.698
7
+ - corridor_availability: 0.868
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.204
10
+ - disturbance_cost: 0.366
11
+ - foliage_proxy_success: 0.458
12
+ - bag_proxy_success: 0.625
13
+ - cloth_proxy_success: 0.708
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.5,
5
+ "bag_proxy": 0.625,
6
+ "cloth_proxy": 0.7083333333333334
7
+ },
8
+ "mean_success": 0.6111111111111112,
9
+ "visibility_integral": 28.954636810554398,
10
+ "corridor_availability": 0.8660841253068712,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.10539705814184,
13
+ "disturbance_cost": 0.35598844579524463
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/benchmark_no_role_symmetry/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt
5
+ - mean_success: 0.611
6
+ - visibility_integral: 28.955
7
+ - corridor_availability: 0.866
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.105
10
+ - disturbance_cost: 0.356
11
+ - foliage_proxy_success: 0.500
12
+ - bag_proxy_success: 0.625
13
+ - cloth_proxy_success: 0.708
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/config_resolved.yaml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_interaction_r3d_stage1_dummy_seed15
2
+ output_dir: /workspace/VLAarchtests/artifacts/outputs/r3d
3
+ device: cuda
4
+ seed: 15
5
+ data:
6
+ proxies:
7
+ - foliage_proxy
8
+ - bag_proxy
9
+ - cloth_proxy
10
+ resolution: 96
11
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state
12
+ train_episodes_per_proxy: 48
13
+ val_episodes_per_proxy: 16
14
+ train_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_train_v6_rgbd_stage1_dummy_seed15.pt
15
+ val_dataset_path: /workspace/VLAarchtests/artifacts/data/reveal_proxy/proxy_val_v6_rgbd_stage1_dummy_seed15.pt
16
+ rebuild_dataset: false
17
+ chunk_horizon: 8
18
+ rollout_horizon: 5
19
+ history_steps: 6
20
+ planner_candidates: 8
21
+ seed: 15
22
+ optim:
23
+ epochs: 4
24
+ batch_size: 16
25
+ num_workers: 4
26
+ lr: 0.001
27
+ weight_decay: 0.0001
28
+ trainer:
29
+ policy_type: elastic_reveal
30
+ use_bf16: false
31
+ grad_clip_norm: 1.0
32
+ freeze_backbone: true
33
+ gradient_checkpointing: false
34
+ plan_during_train: true
35
+ plan_during_eval: true
36
+ support_mode_conditioning: true
37
+ planner_mode: trainable
38
+ use_depth: false
39
+ use_world_model: true
40
+ use_role_tokens: true
41
+ compute_equivariance_probe: true
42
+ policy:
43
+ backbone:
44
+ model_name: openai/clip-vit-base-patch32
45
+ hidden_dim: 192
46
+ max_text_tokens: 32
47
+ freeze_backbone: true
48
+ gradient_checkpointing: false
49
+ use_dummy_backbone: true
50
+ fusion:
51
+ hidden_dim: 192
52
+ num_cameras: 3
53
+ num_layers: 2
54
+ num_heads: 4
55
+ ff_dim: 384
56
+ dropout: 0.1
57
+ proprio_dim: 32
58
+ proprio_tokens: 1
59
+ memory:
60
+ hidden_dim: 192
61
+ action_dim: 14
62
+ history_steps: 6
63
+ scene_history_steps: 3
64
+ belief_history_steps: 8
65
+ num_layers: 2
66
+ dropout: 0.1
67
+ memory_bank_size: 4
68
+ scene_bank_size: 2
69
+ belief_bank_size: 2
70
+ num_heads: 4
71
+ max_history_steps: 8
72
+ decoder:
73
+ hidden_dim: 192
74
+ num_heads: 4
75
+ num_layers: 2
76
+ ff_dim: 384
77
+ dropout: 0.1
78
+ chunk_size: 8
79
+ action_dim: 14
80
+ arm_action_dim: 7
81
+ num_candidates: 8
82
+ num_phases: 5
83
+ num_arm_roles: 4
84
+ num_proposal_modes: 6
85
+ planner_top_k: 4
86
+ reveal_head:
87
+ hidden_dim: 192
88
+ num_support_modes: 3
89
+ num_approach_templates: 32
90
+ rollout_horizon: 5
91
+ belief_map_size: 32
92
+ field_size: 16
93
+ num_heads: 4
94
+ predict_belief_map: true
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_interaction_tokens: 8
98
+ world_model:
99
+ hidden_dim: 192
100
+ action_dim: 14
101
+ num_support_modes: 3
102
+ num_approach_templates: 32
103
+ rollout_horizon: 5
104
+ field_size: 16
105
+ num_heads: 4
106
+ num_phases: 5
107
+ num_arm_roles: 4
108
+ num_interaction_tokens: 8
109
+ belief_map_size: 32
110
+ predict_belief_map: true
111
+ scene_bank_size: 2
112
+ belief_bank_size: 2
113
+ planner:
114
+ hidden_dim: 192
115
+ num_candidates: 8
116
+ action_dim: 14
117
+ num_support_modes: 3
118
+ utility_margin: 0.1
119
+ num_heads: 4
120
+ num_layers: 2
121
+ num_phases: 5
122
+ num_arm_roles: 4
123
+ top_k: 4
124
+ loss_weights:
125
+ action: 1.0
126
+ phase: 0.15
127
+ arm_role: 0.2
128
+ support_mode: 0.15
129
+ corridor: 0.2
130
+ persistence: 0.1
131
+ disturbance: 0.1
132
+ world_model: 0.25
133
+ belief: 0.05
134
+ visibility: 0.05
135
+ clearance: 0.05
136
+ support_stability: 0.05
137
+ reocclusion: 0.05
138
+ occluder_contact: 0.05
139
+ grasp_affordance: 0.05
140
+ planner_success: 0.2
141
+ planner_risk: 0.1
142
+ planner_ranking: 0.1
143
+ proposal_reconstruction: 0.2
144
+ proposal_success: 0.1
145
+ proposal_ranking: 0.1
146
+ proposal_diversity: 0.05
147
+ role_swap_consistency: 0.05
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/diagnostics_full/proxy_diagnostics.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "planner_top1_accuracy": 0.3053435114503817,
3
+ "planner_regret": 0.013406210578978062,
4
+ "planner_score_utility_spearman": 0.2839694619178772,
5
+ "risk_calibration_mse": 0.010891024023294449,
6
+ "role_collapse_rate": 0.0,
7
+ "proposal_diversity": 0.02313310280442238,
8
+ "left_right_equivariance_error": 0.006598936667775407,
9
+ "belief_calibration_brier": 0.00368268764577806,
10
+ "reocclusion_calibration_brier": 0.2288682460784912,
11
+ "support_stability_mae": 0.025202222168445587,
12
+ "clearance_auc": 0.9189163634555108,
13
+ "memory_write_rate": 0.0,
14
+ "memory_saturation": 0.8174758553504944,
15
+ "num_samples": 131
16
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/metrics.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 0.028008008919035394,
6
+ "arm_role": 0.2316993211661611,
7
+ "belief": 0.21131388066957393,
8
+ "clearance": 0.19917472638189793,
9
+ "corridor": 0.3046618662774563,
10
+ "disturbance": 0.020259966540227953,
11
+ "grasp_affordance": 0.15939014249791703,
12
+ "occluder_contact": 0.3023037730405728,
13
+ "persistence": 5.1030773023764295,
14
+ "phase": 0.7391876379648844,
15
+ "planner_ranking": 0.6672491803765297,
16
+ "planner_risk": 0.035407664448333286,
17
+ "planner_success": 0.6247484882672628,
18
+ "proposal_diversity": 0.0,
19
+ "proposal_ranking": 1.2685468345880508,
20
+ "proposal_reconstruction": 0.07012522220611572,
21
+ "proposal_success": 0.6749546950062116,
22
+ "reocclusion": 0.6581779879828294,
23
+ "role_swap_consistency": 0.0007787000698347887,
24
+ "support_mode": 0.6318444466839234,
25
+ "support_stability": 0.21354713415106139,
26
+ "total": 2.377249076962471,
27
+ "uncertainty": 0.2297215286331872,
28
+ "visibility": 0.20075704219440618,
29
+ "world_model": 4.083281387885411
30
+ },
31
+ "val": {
32
+ "action": 0.023762268117732473,
33
+ "arm_role": 0.00020197388787184737,
34
+ "belief": 0.1366901993751526,
35
+ "clearance": 0.10309203879700767,
36
+ "corridor": 0.26862603922684986,
37
+ "disturbance": 0.0037259276594138807,
38
+ "grasp_affordance": 0.044725324544641704,
39
+ "occluder_contact": 0.2536553243796031,
40
+ "persistence": 4.777863184611003,
41
+ "phase": 0.5066013468636407,
42
+ "planner_ranking": 0.44456031918525696,
43
+ "planner_risk": 0.01433694911085897,
44
+ "planner_success": 0.6283807026015388,
45
+ "proposal_diversity": 0.0,
46
+ "proposal_ranking": 1.1667029857635498,
47
+ "proposal_reconstruction": 0.0664608735177252,
48
+ "proposal_success": 0.6838224861356947,
49
+ "reocclusion": 0.3364369339413113,
50
+ "role_swap_consistency": 0.0,
51
+ "support_mode": 0.06715444227059682,
52
+ "support_stability": 0.14777708219157326,
53
+ "total": 1.8394301467471652,
54
+ "uncertainty": 0.07208604945076837,
55
+ "visibility": 0.12188677820894453,
56
+ "world_model": 3.079341014226278
57
+ }
58
+ },
59
+ {
60
+ "epoch": 1,
61
+ "train": {
62
+ "action": 0.018888041842728853,
63
+ "arm_role": 0.00043030476990679745,
64
+ "belief": 0.11719414374480645,
65
+ "clearance": 0.08535642797748248,
66
+ "corridor": 0.24796467771132788,
67
+ "disturbance": 0.0024048478032151857,
68
+ "grasp_affordance": 0.022171703943361838,
69
+ "occluder_contact": 0.22088239962855974,
70
+ "persistence": 4.555501798788707,
71
+ "phase": 0.43327916599810123,
72
+ "planner_ranking": 0.15463371171305576,
73
+ "planner_risk": 0.01981719226265947,
74
+ "planner_success": 0.5631782834728559,
75
+ "proposal_diversity": 0.0,
76
+ "proposal_ranking": 1.1632012923558552,
77
+ "proposal_reconstruction": 0.0615519261918962,
78
+ "proposal_success": 0.6722564473748207,
79
+ "reocclusion": 0.287830734004577,
80
+ "role_swap_consistency": 0.00048373279059887864,
81
+ "support_mode": 0.008119381836574272,
82
+ "support_stability": 0.13662359025329351,
83
+ "total": 1.567106415828069,
84
+ "uncertainty": 0.03243653344300886,
85
+ "visibility": 0.11203592922538519,
86
+ "world_model": 2.404594744245211
87
+ },
88
+ "val": {
89
+ "action": 0.019907095055613253,
90
+ "arm_role": 0.00038116834993060265,
91
+ "belief": 0.1014507081773546,
92
+ "clearance": 0.07728531956672668,
93
+ "corridor": 0.22947043677171072,
94
+ "disturbance": 0.0014698771928023133,
95
+ "grasp_affordance": 0.02056772096289529,
96
+ "occluder_contact": 0.20453951425022548,
97
+ "persistence": 3.6124378045399985,
98
+ "phase": 0.47070127063327366,
99
+ "planner_ranking": 0.08099263947870997,
100
+ "planner_risk": 0.017360565563042957,
101
+ "planner_success": 0.5593770245711008,
102
+ "proposal_diversity": 0.0,
103
+ "proposal_ranking": 1.11685311794281,
104
+ "proposal_reconstruction": 0.0633203275501728,
105
+ "proposal_success": 0.683642049630483,
106
+ "reocclusion": 0.42518342865837944,
107
+ "role_swap_consistency": 0.0,
108
+ "support_mode": 8.963614042537908e-05,
109
+ "support_stability": 0.1495772964424557,
110
+ "total": 1.5412384668986003,
111
+ "uncertainty": 0.024036270876725514,
112
+ "visibility": 0.10443270951509476,
113
+ "world_model": 2.6981404887305365
114
+ }
115
+ },
116
+ {
117
+ "epoch": 2,
118
+ "train": {
119
+ "action": 0.01506453799083829,
120
+ "arm_role": 0.0002299571582019174,
121
+ "belief": 0.10169448765615623,
122
+ "clearance": 0.08062320730338494,
123
+ "corridor": 0.23694788571447134,
124
+ "disturbance": 0.002010827219540564,
125
+ "grasp_affordance": 0.012944541425288966,
126
+ "occluder_contact": 0.20663638102511564,
127
+ "persistence": 2.024513818323612,
128
+ "phase": 0.4406547602266073,
129
+ "planner_ranking": 0.052334820929293834,
130
+ "planner_risk": 0.012688904457415143,
131
+ "planner_success": 0.4998842130104701,
132
+ "proposal_diversity": 0.0,
133
+ "proposal_ranking": 1.1411344707012177,
134
+ "proposal_reconstruction": 0.058503514621406794,
135
+ "proposal_success": 0.663138655324777,
136
+ "reocclusion": 0.28770653810352087,
137
+ "role_swap_consistency": 0.0005917157322983257,
138
+ "support_mode": 0.00027886544603461516,
139
+ "support_stability": 0.14369840795795122,
140
+ "total": 1.2098931844035785,
141
+ "uncertainty": 0.009047253523021936,
142
+ "visibility": 0.09652530650297801,
143
+ "world_model": 2.1335272987683616
144
+ },
145
+ "val": {
146
+ "action": 0.0173407852028807,
147
+ "arm_role": 0.00028451886545452807,
148
+ "belief": 0.09623022625843684,
149
+ "clearance": 0.07612819969654083,
150
+ "corridor": 0.22281885809368557,
151
+ "disturbance": 0.001401680282368842,
152
+ "grasp_affordance": 0.00781761777276794,
153
+ "occluder_contact": 0.20622349116537306,
154
+ "persistence": 2.1598196625709534,
155
+ "phase": 0.47410638795958626,
156
+ "planner_ranking": 0.0378283916765617,
157
+ "planner_risk": 0.013348096515983343,
158
+ "planner_success": 0.4943488637606303,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_ranking": 1.1125682062572904,
161
+ "proposal_reconstruction": 0.06057575262255139,
162
+ "proposal_success": 0.6509590811199613,
163
+ "reocclusion": 0.2778696550263299,
164
+ "role_swap_consistency": 0.0,
165
+ "support_mode": 7.348006571798275e-05,
166
+ "support_stability": 0.14099042697085273,
167
+ "total": 1.2928278247515361,
168
+ "uncertainty": 0.0023198039270937443,
169
+ "visibility": 0.08993011878596412,
170
+ "world_model": 2.425517029232449
171
+ }
172
+ },
173
+ {
174
+ "epoch": 3,
175
+ "train": {
176
+ "action": 0.015032132350218793,
177
+ "arm_role": 0.00015960596041016592,
178
+ "belief": 0.10330141056329012,
179
+ "clearance": 0.0756644958940645,
180
+ "corridor": 0.22099452962478003,
181
+ "disturbance": 0.0017974149668589234,
182
+ "grasp_affordance": 0.008848114540645232,
183
+ "occluder_contact": 0.20204609570403895,
184
+ "persistence": 1.6058371538917224,
185
+ "phase": 0.42861080542206764,
186
+ "planner_ranking": 0.040083787171170115,
187
+ "planner_risk": 0.010861996522483727,
188
+ "planner_success": 0.48133989547689754,
189
+ "proposal_diversity": 0.0,
190
+ "proposal_ranking": 1.1467161824305852,
191
+ "proposal_reconstruction": 0.058588774874806404,
192
+ "proposal_success": 0.6429290076096853,
193
+ "reocclusion": 0.24268781704207262,
194
+ "role_swap_consistency": 0.00047596763154918637,
195
+ "support_mode": 2.783346417345456e-05,
196
+ "support_stability": 0.1325785775358478,
197
+ "total": 1.1217727214097977,
198
+ "uncertainty": 0.003058687725570053,
199
+ "visibility": 0.09524129331111908,
200
+ "world_model": 2.0093316386143365
201
+ },
202
+ "val": {
203
+ "action": 0.016727436126934156,
204
+ "arm_role": 0.0002483524456490866,
205
+ "belief": 0.09281252986854976,
206
+ "clearance": 0.0730266264743275,
207
+ "corridor": 0.22520612014664543,
208
+ "disturbance": 0.0031746443160550874,
209
+ "grasp_affordance": 0.00780139294349485,
210
+ "occluder_contact": 0.20420674648549822,
211
+ "persistence": 1.9897065493795607,
212
+ "phase": 0.42935120397143894,
213
+ "planner_ranking": 0.03520135974718465,
214
+ "planner_risk": 0.012488630910714468,
215
+ "planner_success": 0.5116605394416385,
216
+ "proposal_diversity": 0.0,
217
+ "proposal_ranking": 1.1216257943047419,
218
+ "proposal_reconstruction": 0.05996803608205584,
219
+ "proposal_success": 0.6389667987823486,
220
+ "reocclusion": 0.26481906490193474,
221
+ "role_swap_consistency": 0.0,
222
+ "support_mode": 4.154515813247094e-05,
223
+ "support_stability": 0.13968953986962637,
224
+ "total": 1.1943119830555387,
225
+ "uncertainty": 0.0017189466937755544,
226
+ "visibility": 0.09683923174937566,
227
+ "world_model": 2.1186628209220038
228
+ }
229
+ }
230
+ ]
artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/summary.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "experiment_name": "proxy_interaction_r3d_stage1_dummy_seed15",
3
+ "device": "cuda",
4
+ "best_checkpoint": "/workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage1_dummy_seed15/checkpoint_best.pt",
5
+ "final_train_total": 1.1217727214097977,
6
+ "final_val_total": 1.1943119830555387,
7
+ "train_time_sec": 20.030457735061646,
8
+ "peak_gpu_memory_mb": 631.1953125,
9
+ "num_train_samples": 380,
10
+ "num_val_samples": 131,
11
+ "planner_mode": "trainable",
12
+ "frozen_modules": [],
13
+ "init_info": null
14
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "full": {
3
+ "per_task_success": {
4
+ "foliage_proxy": 0.4166666666666667,
5
+ "bag_proxy": 0.5833333333333334,
6
+ "cloth_proxy": 0.625
7
+ },
8
+ "mean_success": 0.5416666666666666,
9
+ "visibility_integral": 34.34427807728449,
10
+ "corridor_availability": 0.893132723040051,
11
+ "reocclusion_rate": 0.0,
12
+ "persistence_horizon_mae": 2.3119179729333856,
13
+ "disturbance_cost": 0.39262517919350004
14
+ }
15
+ }
artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/benchmark_full/reveal_benchmark.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Reveal Proxy Benchmark
2
+
3
+ ## full
4
+ - checkpoint: /workspace/VLAarchtests/artifacts/outputs/r3d/proxy_interaction_r3d_stage2_clip_seed11/checkpoint_best.pt
5
+ - mean_success: 0.542
6
+ - visibility_integral: 34.344
7
+ - corridor_availability: 0.893
8
+ - reocclusion_rate: 0.000
9
+ - persistence_horizon_mae: 2.312
10
+ - disturbance_cost: 0.393
11
+ - foliage_proxy_success: 0.417
12
+ - bag_proxy_success: 0.583
13
+ - cloth_proxy_success: 0.625