lsnu commited on
Commit
5e25737
·
verified ·
1 Parent(s): 3231b34

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_bridge_retrieval_smoke.cpython-311.pyc +3 -0
  3. code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_pickclutter_smoke.cpython-311.pyc +3 -0
  4. data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/episode_splits.json +98 -0
  5. data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/train.pt +3 -0
  6. data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/val.pt +3 -0
  7. data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/episode_splits.json +98 -0
  8. data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/train.pt +3 -0
  9. data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/val.pt +3 -0
  10. data/maniskill_pickclutter/_debug_one.pt +3 -0
  11. data/maniskill_pickclutter/smoke_v1/episode_splits.json +98 -0
  12. data/maniskill_pickclutter/smoke_v1/train.pt +3 -0
  13. data/maniskill_pickclutter/smoke_v1/val.pt +3 -0
  14. data/maniskill_pickclutter/smoke_v2/episode_splits.json +98 -0
  15. data/maniskill_pickclutter/smoke_v2/train.pt +3 -0
  16. data/maniskill_pickclutter/smoke_v2/val.pt +3 -0
  17. data/maniskill_pickclutter/smoke_v3/episode_splits.json +98 -0
  18. data/maniskill_pickclutter/smoke_v3/train.pt +3 -0
  19. data/maniskill_pickclutter/smoke_v3/val.pt +3 -0
  20. data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast.pt +3 -0
  21. data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast_transition.pt +3 -0
  22. data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt +3 -0
  23. data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast.pt +3 -0
  24. data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast_transition.pt +3 -0
  25. data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt +3 -0
  26. outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt +3 -0
  27. outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/config_resolved.yaml +173 -0
  28. outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/metrics.json +140 -0
  29. outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/checkpoint_best.pt +3 -0
  30. outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/metrics.json +278 -0
  31. outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt +3 -0
  32. outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/summary.json +0 -0
  33. outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt +3 -0
  34. outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/config_resolved.yaml +170 -0
  35. outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/summary.json +0 -0
  36. outputs/adapter_proxy/proxy_adapter_wrapped_clip_transition_fast_seed17/checkpoint_best.pt +3 -0
  37. outputs/anchor_adapter_wrapped_dual_push_seed17/checkpoint_best.pt +3 -0
  38. outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17/checkpoint_best.pt +3 -0
  39. outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17/summary.json +0 -0
  40. outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23/checkpoint_best.pt +3 -0
  41. outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23/summary.json +0 -0
  42. outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17/checkpoint_best.pt +3 -0
  43. outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17/summary.json +1104 -0
  44. outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23/checkpoint_best.pt +3 -0
  45. outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23/summary.json +1030 -0
  46. outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed17/checkpoint_best.pt +3 -0
  47. outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed17/summary.json +0 -0
  48. outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed23/checkpoint_best.pt +3 -0
  49. outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed23/summary.json +0 -0
  50. outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed29/checkpoint_best.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_pickclutter_smoke.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
37
+ code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_bridge_retrieval_smoke.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
38
+ reports/proxy_rank_only_live/active/reveal_benchmark.json filter=lfs diff=lfs merge=lfs -text
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_bridge_retrieval_smoke.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a85cd23beb92a6be6a17a67a984917ed0a03bb54761fd9a853554bfd4aa0860
3
+ size 130350
code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/__pycache__/run_maniskill_pickclutter_smoke.cpython-311.pyc ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b4da5c78421ec938985ec16690dce1c0ae0a8f65da6097478e09059c1556f58
3
+ size 131928
data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/episode_splits.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval": [
3
+ 172000,
4
+ 172001,
5
+ 172002,
6
+ 172003,
7
+ 172004,
8
+ 172005,
9
+ 172006,
10
+ 172007,
11
+ 172008,
12
+ 172009,
13
+ 172010,
14
+ 172011,
15
+ 172012,
16
+ 172013,
17
+ 172014,
18
+ 172015,
19
+ 172016,
20
+ 172017,
21
+ 172018,
22
+ 172019,
23
+ 172020,
24
+ 172021,
25
+ 172022,
26
+ 172023,
27
+ 172024,
28
+ 172025,
29
+ 172026,
30
+ 172027,
31
+ 172028,
32
+ 172029,
33
+ 172030,
34
+ 172031,
35
+ 172032,
36
+ 172033,
37
+ 172034,
38
+ 172035,
39
+ 172036,
40
+ 172037,
41
+ 172038,
42
+ 172039,
43
+ 172040,
44
+ 172041,
45
+ 172042,
46
+ 172043,
47
+ 172044,
48
+ 172045,
49
+ 172046,
50
+ 172047,
51
+ 172048,
52
+ 172049
53
+ ],
54
+ "train": [
55
+ 170000,
56
+ 170001,
57
+ 170002,
58
+ 170003,
59
+ 170004,
60
+ 170005,
61
+ 170006,
62
+ 170007,
63
+ 170008,
64
+ 170009,
65
+ 170010,
66
+ 170011,
67
+ 170012,
68
+ 170013,
69
+ 170014,
70
+ 170015,
71
+ 170016,
72
+ 170017,
73
+ 170018,
74
+ 170019,
75
+ 170020,
76
+ 170021,
77
+ 170022,
78
+ 170023,
79
+ 170024,
80
+ 170025,
81
+ 170026,
82
+ 170027,
83
+ 170028,
84
+ 170029,
85
+ 170030,
86
+ 170031
87
+ ],
88
+ "val": [
89
+ 171000,
90
+ 171001,
91
+ 171002,
92
+ 171003,
93
+ 171004,
94
+ 171005,
95
+ 171006,
96
+ 171007
97
+ ]
98
+ }
data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64c854519a672f3190914069d9100442facc32789aa6ecca7147315b216a1aba
3
+ size 907932312
data/maniskill_bridge_retrieval/bag_bridge_smoke_v1/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d97a77b8b583d1d13907ffa67f052ccaa9460b4d79026fc688a3ed24f3112e3
3
+ size 329139344
data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/episode_splits.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval": [
3
+ 170104,
4
+ 170105,
5
+ 170106,
6
+ 170107,
7
+ 170109,
8
+ 170113,
9
+ 170117,
10
+ 170121,
11
+ 170124,
12
+ 170125,
13
+ 170129,
14
+ 170131,
15
+ 170137,
16
+ 170138,
17
+ 170139,
18
+ 170140,
19
+ 170145,
20
+ 170150,
21
+ 170151,
22
+ 170153,
23
+ 170154,
24
+ 170155,
25
+ 170166,
26
+ 170171,
27
+ 170173,
28
+ 170175,
29
+ 170177,
30
+ 170180,
31
+ 170181,
32
+ 170184,
33
+ 170185,
34
+ 170186,
35
+ 170198,
36
+ 170200,
37
+ 170205,
38
+ 170207,
39
+ 170208,
40
+ 170212,
41
+ 170214,
42
+ 170217,
43
+ 170223,
44
+ 170226,
45
+ 170228,
46
+ 170229,
47
+ 170234,
48
+ 170237,
49
+ 170239,
50
+ 170241,
51
+ 170242,
52
+ 170243
53
+ ],
54
+ "train": [
55
+ 170000,
56
+ 170005,
57
+ 170006,
58
+ 170008,
59
+ 170012,
60
+ 170016,
61
+ 170019,
62
+ 170020,
63
+ 170028,
64
+ 170031,
65
+ 170032,
66
+ 170035,
67
+ 170036,
68
+ 170037,
69
+ 170038,
70
+ 170044,
71
+ 170045,
72
+ 170049,
73
+ 170050,
74
+ 170053,
75
+ 170054,
76
+ 170055,
77
+ 170057,
78
+ 170059,
79
+ 170061,
80
+ 170066,
81
+ 170067,
82
+ 170069,
83
+ 170073,
84
+ 170077,
85
+ 170079,
86
+ 170081
87
+ ],
88
+ "val": [
89
+ 170089,
90
+ 170091,
91
+ 170092,
92
+ 170095,
93
+ 170099,
94
+ 170100,
95
+ 170102,
96
+ 170103
97
+ ]
98
+ }
data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:926f4def9d473a355a5f9dc669055a4a5656071ee0fe9d6647fb6b9c160f3b95
3
+ size 1389837080
data/maniskill_bridge_retrieval/cloth_bridge_smoke_v1/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69eb63dbf6f1ca19f69437634c4931b73e4923a1d644617a2179a28813822e8
3
+ size 347516816
data/maniskill_pickclutter/_debug_one.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb55e2a1cfaf72250af4d8436ac7323a66aa4b9ebc5890414a9d7bbca3180fc7
3
+ size 37494636
data/maniskill_pickclutter/smoke_v1/episode_splits.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval": [
3
+ 172000,
4
+ 172001,
5
+ 172002,
6
+ 172003,
7
+ 172004,
8
+ 172005,
9
+ 172006,
10
+ 172007,
11
+ 172008,
12
+ 172009,
13
+ 172010,
14
+ 172011,
15
+ 172012,
16
+ 172013,
17
+ 172014,
18
+ 172015,
19
+ 172016,
20
+ 172017,
21
+ 172018,
22
+ 172019,
23
+ 172020,
24
+ 172021,
25
+ 172022,
26
+ 172023,
27
+ 172024,
28
+ 172025,
29
+ 172026,
30
+ 172027,
31
+ 172028,
32
+ 172029,
33
+ 172030,
34
+ 172031,
35
+ 172032,
36
+ 172033,
37
+ 172034,
38
+ 172035,
39
+ 172036,
40
+ 172037,
41
+ 172038,
42
+ 172039,
43
+ 172040,
44
+ 172041,
45
+ 172042,
46
+ 172043,
47
+ 172044,
48
+ 172045,
49
+ 172046,
50
+ 172047,
51
+ 172048,
52
+ 172049
53
+ ],
54
+ "train": [
55
+ 170000,
56
+ 170001,
57
+ 170002,
58
+ 170003,
59
+ 170004,
60
+ 170005,
61
+ 170006,
62
+ 170007,
63
+ 170008,
64
+ 170009,
65
+ 170010,
66
+ 170011,
67
+ 170012,
68
+ 170013,
69
+ 170014,
70
+ 170015,
71
+ 170016,
72
+ 170017,
73
+ 170018,
74
+ 170019,
75
+ 170020,
76
+ 170021,
77
+ 170022,
78
+ 170023,
79
+ 170024,
80
+ 170025,
81
+ 170026,
82
+ 170027,
83
+ 170028,
84
+ 170029,
85
+ 170030,
86
+ 170031
87
+ ],
88
+ "val": [
89
+ 171000,
90
+ 171001,
91
+ 171002,
92
+ 171003,
93
+ 171004,
94
+ 171005,
95
+ 171006,
96
+ 171007
97
+ ]
98
+ }
data/maniskill_pickclutter/smoke_v1/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eecd3d3acfc05c6bb1aa54fd0b2a6c9771103aa6cf8453face688b99a56c2f3c
3
+ size 1197054040
data/maniskill_pickclutter/smoke_v1/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba290e8ba892a41518ff56852db3804b4d85de6aaf5962ef857428e906b993b1
3
+ size 341454800
data/maniskill_pickclutter/smoke_v2/episode_splits.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval": [
3
+ 172000,
4
+ 172001,
5
+ 172002,
6
+ 172003,
7
+ 172004,
8
+ 172005,
9
+ 172006,
10
+ 172007,
11
+ 172008,
12
+ 172009,
13
+ 172010,
14
+ 172011,
15
+ 172012,
16
+ 172013,
17
+ 172014,
18
+ 172015,
19
+ 172016,
20
+ 172017,
21
+ 172018,
22
+ 172019,
23
+ 172020,
24
+ 172021,
25
+ 172022,
26
+ 172023,
27
+ 172024,
28
+ 172025,
29
+ 172026,
30
+ 172027,
31
+ 172028,
32
+ 172029,
33
+ 172030,
34
+ 172031,
35
+ 172032,
36
+ 172033,
37
+ 172034,
38
+ 172035,
39
+ 172036,
40
+ 172037,
41
+ 172038,
42
+ 172039,
43
+ 172040,
44
+ 172041,
45
+ 172042,
46
+ 172043,
47
+ 172044,
48
+ 172045,
49
+ 172046,
50
+ 172047,
51
+ 172048,
52
+ 172049
53
+ ],
54
+ "train": [
55
+ 170000,
56
+ 170001,
57
+ 170002,
58
+ 170003,
59
+ 170004,
60
+ 170005,
61
+ 170006,
62
+ 170007,
63
+ 170008,
64
+ 170009,
65
+ 170010,
66
+ 170011,
67
+ 170012,
68
+ 170013,
69
+ 170014,
70
+ 170015,
71
+ 170016,
72
+ 170017,
73
+ 170018,
74
+ 170019,
75
+ 170020,
76
+ 170021,
77
+ 170022,
78
+ 170023,
79
+ 170024,
80
+ 170025,
81
+ 170026,
82
+ 170027,
83
+ 170028,
84
+ 170029,
85
+ 170030,
86
+ 170031
87
+ ],
88
+ "val": [
89
+ 171000,
90
+ 171001,
91
+ 171002,
92
+ 171003,
93
+ 171004,
94
+ 171005,
95
+ 171006,
96
+ 171007
97
+ ]
98
+ }
data/maniskill_pickclutter/smoke_v2/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a484fda7cb224d1a1bada8b32cc7c0971cb9f8339e89fa3942c01d2f180d2b0
3
+ size 1197206872
data/maniskill_pickclutter/smoke_v2/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6973f7ea8a1aafe7b01dc933226ab8d82f28bee2e2dec2d167f402a2c17347a
3
+ size 341498704
data/maniskill_pickclutter/smoke_v3/episode_splits.json ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval": [
3
+ 172000,
4
+ 172001,
5
+ 172002,
6
+ 172003,
7
+ 172004,
8
+ 172005,
9
+ 172006,
10
+ 172007,
11
+ 172008,
12
+ 172009,
13
+ 172010,
14
+ 172011,
15
+ 172012,
16
+ 172013,
17
+ 172014,
18
+ 172015,
19
+ 172016,
20
+ 172017,
21
+ 172018,
22
+ 172019,
23
+ 172020,
24
+ 172021,
25
+ 172022,
26
+ 172023,
27
+ 172024,
28
+ 172025,
29
+ 172026,
30
+ 172027,
31
+ 172028,
32
+ 172029,
33
+ 172030,
34
+ 172031,
35
+ 172032,
36
+ 172033,
37
+ 172034,
38
+ 172035,
39
+ 172036,
40
+ 172037,
41
+ 172038,
42
+ 172039,
43
+ 172040,
44
+ 172041,
45
+ 172042,
46
+ 172043,
47
+ 172044,
48
+ 172045,
49
+ 172046,
50
+ 172047,
51
+ 172048,
52
+ 172049
53
+ ],
54
+ "train": [
55
+ 170000,
56
+ 170001,
57
+ 170002,
58
+ 170003,
59
+ 170004,
60
+ 170005,
61
+ 170006,
62
+ 170007,
63
+ 170008,
64
+ 170009,
65
+ 170010,
66
+ 170011,
67
+ 170012,
68
+ 170013,
69
+ 170014,
70
+ 170015,
71
+ 170016,
72
+ 170017,
73
+ 170018,
74
+ 170019,
75
+ 170020,
76
+ 170021,
77
+ 170022,
78
+ 170023,
79
+ 170024,
80
+ 170025,
81
+ 170026,
82
+ 170027,
83
+ 170028,
84
+ 170029,
85
+ 170030,
86
+ 170031
87
+ ],
88
+ "val": [
89
+ 171000,
90
+ 171001,
91
+ 171002,
92
+ 171003,
93
+ 171004,
94
+ 171005,
95
+ 171006,
96
+ 171007
97
+ ]
98
+ }
data/maniskill_pickclutter/smoke_v3/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ecff2fb107f703fdaf0662bc5913b9c4e8326cd8cd92ee24f9a73a946247188
3
+ size 1201790040
data/maniskill_pickclutter/smoke_v3/val.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf0859f51001b2f4a152a9f00509d8ecb7bd486ec97619704b1844cfab2db6d
3
+ size 342798032
data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d195345f2dc6dfcdf666a4dca99ad717670cd4cb24b816c54b892137c5dd6906
3
+ size 140638392
data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast_transition.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5d10aca6b616e13c713dddfa70e1a378e340caf13e2a825dc2a5e23b394b38e
3
+ size 240587364
data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a14d47b0e5c2766739b9236258f10ffee24638fec98389ce1290be788840132f
3
+ size 2682625452
data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8e7c4d10e4afe8f3cf211288159b07f4120b6c88fbfd6ef74f29ca8ba9eb76
3
+ size 46282928
data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast_transition.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d90e7247fa635a47164b76df5a496c93307aa39b04183d294b90b2ed7e870c2
3
+ size 79014044
data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:752e0111dd5427257a97ad8f7264b389fd8414d6d77cbf513a87b0cf7acd22e9
3
+ size 675590116
outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377c2ccd2d15193fcaf5203661e7353fc69bd48084e3564f245e496579a0c841
3
+ size 878963364
outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/config_resolved.yaml ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_adapter_wrapped_clip_base_fast_seed17
2
+ output_dir: /workspace/workspace/outputs/adapter_proxy
3
+ device: cuda
4
+ seed: 17
5
+ init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase_fast
14
+ train_episodes_per_proxy: 12
15
+ val_episodes_per_proxy: 4
16
+ train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_fast.pt
17
+ val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_fast.pt
18
+ rebuild_dataset: false
19
+ chunk_horizon: 8
20
+ rollout_horizon: 5
21
+ history_steps: 6
22
+ planner_candidates: 8
23
+ seed: 17
24
+ optim:
25
+ epochs: 2
26
+ batch_size: 4
27
+ num_workers: 8
28
+ lr: 0.0001
29
+ weight_decay: 0.0001
30
+ trainer:
31
+ policy_type: adapter_wrapped
32
+ training_regime: adapter_train_frozen_trunk
33
+ eval_mode: adapter_active
34
+ adapter_mode: adapter_active
35
+ adapter_use_transition_model: false
36
+ adapter_use_task_conditioning: true
37
+ use_bf16: true
38
+ grad_clip_norm: 1.0
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ plan_during_train: false
42
+ plan_during_eval: false
43
+ support_mode_conditioning: true
44
+ planner_mode: false
45
+ use_depth: true
46
+ use_world_model: false
47
+ use_role_tokens: true
48
+ compute_equivariance_probe: false
49
+ trainable_parameter_prefixes:
50
+ - adapter.state_head
51
+ - adapter.proposal_prior
52
+ - adapter.planner
53
+ policy:
54
+ backbone:
55
+ model_name: openai/clip-vit-base-patch32
56
+ hidden_dim: 512
57
+ max_text_tokens: 32
58
+ freeze_backbone: true
59
+ gradient_checkpointing: false
60
+ use_dummy_backbone: false
61
+ fusion:
62
+ hidden_dim: 512
63
+ num_cameras: 3
64
+ num_layers: 4
65
+ num_heads: 8
66
+ ff_dim: 2048
67
+ dropout: 0.1
68
+ proprio_dim: 32
69
+ proprio_tokens: 1
70
+ memory:
71
+ hidden_dim: 512
72
+ action_dim: 14
73
+ history_steps: 6
74
+ scene_history_steps: 3
75
+ belief_history_steps: 8
76
+ num_layers: 2
77
+ dropout: 0.1
78
+ memory_bank_size: 4
79
+ scene_bank_size: 2
80
+ belief_bank_size: 2
81
+ num_heads: 8
82
+ max_history_steps: 8
83
+ reveal_cache_steps: 4
84
+ reveal_cache_decay: 0.7
85
+ decoder:
86
+ hidden_dim: 512
87
+ num_heads: 8
88
+ num_layers: 4
89
+ ff_dim: 2048
90
+ dropout: 0.1
91
+ chunk_size: 8
92
+ action_dim: 14
93
+ arm_action_dim: 7
94
+ num_candidates: 8
95
+ num_phases: 5
96
+ num_arm_roles: 4
97
+ num_proposal_modes: 7
98
+ planner_top_k: 4
99
+ proposal_delta_scale: 0.2
100
+ proposal_slot_scale: 0.05
101
+ reveal_head:
102
+ hidden_dim: 512
103
+ num_support_modes: 3
104
+ num_approach_templates: 32
105
+ rollout_horizon: 5
106
+ belief_map_size: 32
107
+ field_size: 16
108
+ num_heads: 8
109
+ predict_belief_map: true
110
+ num_phases: 5
111
+ num_arm_roles: 4
112
+ num_interaction_tokens: 8
113
+ num_tasks: 4
114
+ world_model:
115
+ hidden_dim: 512
116
+ action_dim: 14
117
+ num_support_modes: 3
118
+ num_approach_templates: 32
119
+ rollout_horizon: 5
120
+ field_size: 16
121
+ num_heads: 8
122
+ num_phases: 5
123
+ num_arm_roles: 4
124
+ num_interaction_tokens: 8
125
+ belief_map_size: 32
126
+ predict_belief_map: true
127
+ scene_bank_size: 2
128
+ belief_bank_size: 2
129
+ rollout_mode: compact_rollout
130
+ num_tasks: 4
131
+ lightweight_field_size: 4
132
+ planner:
133
+ hidden_dim: 512
134
+ num_candidates: 8
135
+ action_dim: 14
136
+ num_support_modes: 3
137
+ utility_margin: 0.1
138
+ num_heads: 8
139
+ num_layers: 2
140
+ num_phases: 5
141
+ num_arm_roles: 4
142
+ top_k: 4
143
+ adapter_confidence_threshold: 0.45
144
+ loss_weights:
145
+ action: 1.0
146
+ phase: 0.08
147
+ arm_role: 0.08
148
+ support_mode: 0.08
149
+ corridor: 0.12
150
+ persistence: 0.06
151
+ disturbance: 0.06
152
+ world_model: 0.0
153
+ transition: 0.0
154
+ belief: 0.05
155
+ visibility: 0.05
156
+ clearance: 0.06
157
+ support_stability: 0.06
158
+ reocclusion: 0.06
159
+ occluder_contact: 0.05
160
+ grasp_affordance: 0.05
161
+ planner_success: 0.15
162
+ planner_risk: 0.08
163
+ planner_ranking: 0.15
164
+ proposal_reconstruction: 0.08
165
+ proposal_success: 0.1
166
+ proposal_ranking: 0.12
167
+ proposal_mode: 0.08
168
+ proposal_diversity: 0.05
169
+ role_swap_consistency: 0.0
170
+ task_metrics: 0.06
171
+ gate: 0.05
172
+ distillation: 0.05
173
+ calibration: 0.02
outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_fast_seed17/metrics.json ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 1.1780137238295183,
6
+ "arm_role": 0.000544056080402895,
7
+ "belief": 0.10274084074341733,
8
+ "calibration": 0.0,
9
+ "clearance": 0.08112246429790622,
10
+ "corridor": 0.21243907782532598,
11
+ "distillation": 0.0036539296447501883,
12
+ "disturbance": 0.0010930091615908009,
13
+ "gate": 0.0,
14
+ "grasp_affordance": 0.011060374242294094,
15
+ "occluder_contact": 0.19354943348013837,
16
+ "persistence": 0.29602919886415097,
17
+ "phase": 0.1456924275211666,
18
+ "planner_ranking": 1.1046701566032742,
19
+ "planner_risk": 0.03252584584381269,
20
+ "planner_success": 0.5002943964108176,
21
+ "proposal_diversity": 0.0,
22
+ "proposal_mode": 0.9053098727827487,
23
+ "proposal_ranking": 0.7633599224297897,
24
+ "proposal_reconstruction": 1.1813416908616605,
25
+ "proposal_success": 0.5018493273983831,
26
+ "reocclusion": 0.1370238650428212,
27
+ "role_swap_consistency": 0.0,
28
+ "support_mode": 0.0010332910049170175,
29
+ "support_stability": 0.13264792088581168,
30
+ "task_metrics": 0.07693366929078879,
31
+ "total": 1.8312026676924333,
32
+ "transition": 0.0,
33
+ "uncertainty": 1.4312560102039045e-05,
34
+ "visibility": 0.096126823645571,
35
+ "world_model": 0.0
36
+ },
37
+ "val": {
38
+ "action": 1.146972581744194,
39
+ "arm_role": 2.7849786739864157e-05,
40
+ "belief": 0.09928969945758581,
41
+ "calibration": 0.0,
42
+ "clearance": 0.07546275667846203,
43
+ "corridor": 0.18693614657968283,
44
+ "distillation": 0.005982774979202077,
45
+ "disturbance": 0.0012652746545427362,
46
+ "gate": 0.0,
47
+ "grasp_affordance": 0.009092151012737304,
48
+ "occluder_contact": 0.19199086539447308,
49
+ "persistence": 0.4173499735770747,
50
+ "phase": 0.20510842488147318,
51
+ "planner_ranking": 1.0746948570013046,
52
+ "planner_risk": 0.03205434698611498,
53
+ "planner_success": 0.3765582703053951,
54
+ "proposal_diversity": 0.0,
55
+ "proposal_mode": 0.5553285405039787,
56
+ "proposal_ranking": 0.6613346468657255,
57
+ "proposal_reconstruction": 1.1140409670770168,
58
+ "proposal_success": 0.32496484369039536,
59
+ "reocclusion": 0.2021030569449067,
60
+ "role_swap_consistency": 0.0,
61
+ "support_mode": 0.00011286496555840131,
62
+ "support_stability": 0.13265474420040846,
63
+ "task_metrics": 0.06524855340830982,
64
+ "total": 1.7250810116529465,
65
+ "transition": 0.0,
66
+ "uncertainty": 8.913456255754681e-06,
67
+ "visibility": 0.09269411116838455,
68
+ "world_model": 0.0
69
+ }
70
+ },
71
+ {
72
+ "epoch": 1,
73
+ "train": {
74
+ "action": 1.1840074995289678,
75
+ "arm_role": 1.7842088946844857e-05,
76
+ "belief": 0.10108890773161598,
77
+ "calibration": 0.0,
78
+ "clearance": 0.08066983359015506,
79
+ "corridor": 0.20431885726587928,
80
+ "distillation": 0.005328163808292668,
81
+ "disturbance": 0.000988402207440231,
82
+ "gate": 0.0,
83
+ "grasp_affordance": 0.010460576832132496,
84
+ "occluder_contact": 0.19120351322319196,
85
+ "persistence": 0.20984708754669712,
86
+ "phase": 0.1270662468412648,
87
+ "planner_ranking": 1.051699793857077,
88
+ "planner_risk": 0.03183994928131933,
89
+ "planner_success": 0.37528212303700653,
90
+ "proposal_diversity": 0.0,
91
+ "proposal_mode": 0.541168266016504,
92
+ "proposal_ranking": 0.7413897125617318,
93
+ "proposal_reconstruction": 1.1529877976230953,
94
+ "proposal_success": 0.273181245378826,
95
+ "reocclusion": 0.11955958685797194,
96
+ "role_swap_consistency": 0.0,
97
+ "support_mode": 0.00014792317929475203,
98
+ "support_stability": 0.1314481108084969,
99
+ "task_metrics": 0.07543641668946846,
100
+ "total": 1.744326695151951,
101
+ "transition": 0.0,
102
+ "uncertainty": 7.94198708297739e-06,
103
+ "visibility": 0.09458825672450273,
104
+ "world_model": 0.0
105
+ },
106
+ "val": {
107
+ "action": 1.1787440478801727,
108
+ "arm_role": 1.3783465302452669e-05,
109
+ "belief": 0.0974554605782032,
110
+ "calibration": 0.0,
111
+ "clearance": 0.0746708307415247,
112
+ "corridor": 0.18591812625527382,
113
+ "distillation": 0.0038922334788367152,
114
+ "disturbance": 0.0005819438138132682,
115
+ "gate": 0.0,
116
+ "grasp_affordance": 0.008575586834922433,
117
+ "occluder_contact": 0.19005733728408813,
118
+ "persistence": 0.4048172008187976,
119
+ "phase": 0.24421580568014178,
120
+ "planner_ranking": 1.0271672308444977,
121
+ "planner_risk": 0.03108011605218053,
122
+ "planner_success": 0.3713325075805187,
123
+ "proposal_diversity": 0.0,
124
+ "proposal_mode": 0.46797188371419907,
125
+ "proposal_ranking": 0.6800601556897163,
126
+ "proposal_reconstruction": 1.0902876928448677,
127
+ "proposal_success": 0.25984624214470387,
128
+ "reocclusion": 0.19258547481149435,
129
+ "role_swap_consistency": 0.0,
130
+ "support_mode": 0.00014510085156871355,
131
+ "support_stability": 0.13228781055659056,
132
+ "task_metrics": 0.06339579145424068,
133
+ "total": 1.7367750853300095,
134
+ "transition": 0.0,
135
+ "uncertainty": 6.649694360483238e-06,
136
+ "visibility": 0.09114759508520365,
137
+ "world_model": 0.0
138
+ }
139
+ }
140
+ ]
outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2b7436f5685a38203440d3c84fc7f86908ddba389b844866554b4d0c0fd0768
3
+ size 878965604
outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/metrics.json ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "epoch": 0,
4
+ "train": {
5
+ "action": 1.1828932802216345,
6
+ "arm_role": 0.00244398444339226,
7
+ "belief": 0.10072019552232839,
8
+ "calibration": 0.0,
9
+ "clearance": 0.07946077994063121,
10
+ "corridor": 0.21543118382702356,
11
+ "distillation": 0.00042247207064432005,
12
+ "disturbance": 0.0009066167868626844,
13
+ "gate": 0.0,
14
+ "grasp_affordance": 0.011442071496031615,
15
+ "occluder_contact": 0.19184747789086415,
16
+ "persistence": 0.5456274578801724,
17
+ "phase": 0.1889389944928033,
18
+ "planner_ranking": 0.8968874569199666,
19
+ "planner_risk": 0.03290799349358603,
20
+ "planner_success": 0.35506935793311656,
21
+ "proposal_diversity": 0.0,
22
+ "proposal_mode": 0.7599493966383093,
23
+ "proposal_ranking": 1.4915186276956767,
24
+ "proposal_reconstruction": 1.0803285907296574,
25
+ "proposal_success": 0.3194384900461726,
26
+ "reocclusion": 0.1872198152817598,
27
+ "role_swap_consistency": 0.0,
28
+ "support_mode": 0.4244060135689102,
29
+ "support_stability": 0.13155287654459977,
30
+ "task_metrics": 0.07493724777292804,
31
+ "total": 2.751452175509028,
32
+ "transition": 4.318220460114359,
33
+ "uncertainty": 1.531094441807496e-05,
34
+ "visibility": 0.09642757938689545,
35
+ "world_model": 0.0
36
+ },
37
+ "val": {
38
+ "action": 1.1680383563041687,
39
+ "arm_role": 0.0025612511759391054,
40
+ "belief": 0.09879593178629875,
41
+ "calibration": 0.0,
42
+ "clearance": 0.07741740134855112,
43
+ "corridor": 0.20817755659421286,
44
+ "distillation": 0.0,
45
+ "disturbance": 0.0007382428300237128,
46
+ "gate": 0.0,
47
+ "grasp_affordance": 0.010511041525751353,
48
+ "occluder_contact": 0.19018630186716715,
49
+ "persistence": 0.4509886346757412,
50
+ "phase": 0.1597365932694326,
51
+ "planner_ranking": 0.22907628491520882,
52
+ "planner_risk": 0.02909238338470459,
53
+ "planner_success": 0.18200772007306418,
54
+ "proposal_diversity": 0.0,
55
+ "proposal_mode": 0.71118057568868,
56
+ "proposal_ranking": 1.4729209462801616,
57
+ "proposal_reconstruction": 1.015290528535843,
58
+ "proposal_success": 0.2791739940643311,
59
+ "reocclusion": 0.16477556849519412,
60
+ "role_swap_consistency": 0.0,
61
+ "support_mode": 0.5340653051932652,
62
+ "support_stability": 0.12872510105371476,
63
+ "task_metrics": 0.06174707182993491,
64
+ "total": 2.407643111546834,
65
+ "transition": 3.39704422156016,
66
+ "uncertainty": 7.099100287177862e-06,
67
+ "visibility": 0.09383414511879286,
68
+ "world_model": 0.0
69
+ }
70
+ },
71
+ {
72
+ "epoch": 1,
73
+ "train": {
74
+ "action": 1.187044749740793,
75
+ "arm_role": 0.001233981896833587,
76
+ "belief": 0.09885497215916128,
77
+ "calibration": 0.0,
78
+ "clearance": 0.07787450506281451,
79
+ "corridor": 0.21069503738349224,
80
+ "distillation": 0.0,
81
+ "disturbance": 0.0007993320816102586,
82
+ "gate": 0.0,
83
+ "grasp_affordance": 0.0100274878874922,
84
+ "occluder_contact": 0.19033558541486242,
85
+ "persistence": 0.508021433908148,
86
+ "phase": 0.19023076729739413,
87
+ "planner_ranking": 0.058458461105322636,
88
+ "planner_risk": 0.03440776518976488,
89
+ "planner_success": 0.1257152666627359,
90
+ "proposal_diversity": 0.0,
91
+ "proposal_mode": 0.7171601638072679,
92
+ "proposal_ranking": 1.499033512187605,
93
+ "proposal_reconstruction": 1.066634831809196,
94
+ "proposal_success": 0.3018947724534684,
95
+ "reocclusion": 0.16926059677821248,
96
+ "role_swap_consistency": 0.0,
97
+ "support_mode": 0.4455214215426886,
98
+ "support_stability": 0.13059799138362668,
99
+ "task_metrics": 0.07159904390573502,
100
+ "total": 2.4211200485710336,
101
+ "transition": 3.487839874099283,
102
+ "uncertainty": 3.770016950513401e-06,
103
+ "visibility": 0.09318254963189614,
104
+ "world_model": 0.0
105
+ },
106
+ "val": {
107
+ "action": 1.1680383563041687,
108
+ "arm_role": 0.001657356577925384,
109
+ "belief": 0.09766801769534747,
110
+ "calibration": 0.0,
111
+ "clearance": 0.07670599135259787,
112
+ "corridor": 0.20785387406746547,
113
+ "distillation": 0.0,
114
+ "disturbance": 0.0007254338066559285,
115
+ "gate": 0.0,
116
+ "grasp_affordance": 0.009808245363334816,
117
+ "occluder_contact": 0.18903621584177016,
118
+ "persistence": 0.43403610289096833,
119
+ "phase": 0.17749264603480697,
120
+ "planner_ranking": 0.00962653555907309,
121
+ "planner_risk": 0.02840747827043136,
122
+ "planner_success": 0.0469651294251283,
123
+ "proposal_diversity": 0.0,
124
+ "proposal_mode": 0.5958098510901133,
125
+ "proposal_ranking": 1.567319353421529,
126
+ "proposal_reconstruction": 1.0027365585168202,
127
+ "proposal_success": 0.3119396299123764,
128
+ "reocclusion": 0.14939573630690575,
129
+ "role_swap_consistency": 0.0,
130
+ "support_mode": 0.38477273682753244,
131
+ "support_stability": 0.12813995343943438,
132
+ "task_metrics": 0.05784295691798131,
133
+ "total": 2.3466440041859946,
134
+ "transition": 3.402106682459513,
135
+ "uncertainty": 3.2218885041383296e-06,
136
+ "visibility": 0.09148541142543157,
137
+ "world_model": 0.0
138
+ }
139
+ },
140
+ {
141
+ "epoch": 2,
142
+ "train": {
143
+ "action": 1.187824563819821,
144
+ "arm_role": 0.0017524876263963075,
145
+ "belief": 0.09850409833573494,
146
+ "calibration": 0.0,
147
+ "clearance": 0.07750590865602013,
148
+ "corridor": 0.21022135673576042,
149
+ "distillation": 0.0,
150
+ "disturbance": 0.0008020720826393432,
151
+ "gate": 0.0,
152
+ "grasp_affordance": 0.009951516582841883,
153
+ "occluder_contact": 0.190022504630209,
154
+ "persistence": 0.5073582559448331,
155
+ "phase": 0.17974354623339506,
156
+ "planner_ranking": 0.009596662447169549,
157
+ "planner_risk": 0.03246875642603185,
158
+ "planner_success": 0.06673186843698266,
159
+ "proposal_diversity": 0.0,
160
+ "proposal_mode": 0.7036348676481167,
161
+ "proposal_ranking": 1.4990194234527459,
162
+ "proposal_reconstruction": 1.0593123075340976,
163
+ "proposal_success": 0.30170050113141034,
164
+ "reocclusion": 0.1706294410807245,
165
+ "role_swap_consistency": 0.0,
166
+ "support_mode": 0.4435207678490326,
167
+ "support_stability": 0.12954452590030782,
168
+ "task_metrics": 0.07019141574679803,
169
+ "total": 2.3952997061384824,
170
+ "transition": 3.4510987426052573,
171
+ "uncertainty": 2.649417712834203e-06,
172
+ "visibility": 0.09213429119657068,
173
+ "world_model": 0.0
174
+ },
175
+ "val": {
176
+ "action": 1.1680383563041687,
177
+ "arm_role": 0.0005777989087315897,
178
+ "belief": 0.09620878870288531,
179
+ "calibration": 0.0,
180
+ "clearance": 0.07562205567955971,
181
+ "corridor": 0.2099471464753151,
182
+ "distillation": 0.0,
183
+ "disturbance": 0.0008037402614718304,
184
+ "gate": 0.0,
185
+ "grasp_affordance": 0.009381201630458236,
186
+ "occluder_contact": 0.18789172718922298,
187
+ "persistence": 0.44771519377827645,
188
+ "phase": 0.15351878677805264,
189
+ "planner_ranking": 0.005908836016897112,
190
+ "planner_risk": 0.029111843556165695,
191
+ "planner_success": 0.030371779979517063,
192
+ "proposal_diversity": 0.0,
193
+ "proposal_mode": 0.6608088513215383,
194
+ "proposal_ranking": 1.519856317838033,
195
+ "proposal_reconstruction": 0.9984971513350804,
196
+ "proposal_success": 0.2899133563041687,
197
+ "reocclusion": 0.15338999405503273,
198
+ "role_swap_consistency": 0.0,
199
+ "support_mode": 0.4591325432062149,
200
+ "support_stability": 0.12738436510165532,
201
+ "task_metrics": 0.05577167191853126,
202
+ "total": 2.3411471287409467,
203
+ "transition": 3.3808055957158407,
204
+ "uncertainty": 1.560352771671584e-06,
205
+ "visibility": 0.08981477295358976,
206
+ "world_model": 0.0
207
+ }
208
+ },
209
+ {
210
+ "epoch": 3,
211
+ "train": {
212
+ "action": 1.1873075451169695,
213
+ "arm_role": 0.0010167556069400005,
214
+ "belief": 0.09699463875604276,
215
+ "calibration": 0.0,
216
+ "clearance": 0.0765939431280649,
217
+ "corridor": 0.21000426350271,
218
+ "distillation": 0.0,
219
+ "disturbance": 0.0008205439020564561,
220
+ "gate": 0.0,
221
+ "grasp_affordance": 0.009616962144886996,
222
+ "occluder_contact": 0.1890684860844572,
223
+ "persistence": 0.5268036977802756,
224
+ "phase": 0.18212753434141143,
225
+ "planner_ranking": 0.007861482998857102,
226
+ "planner_risk": 0.0305439497837249,
227
+ "planner_success": 0.0545816100632944,
228
+ "proposal_diversity": 0.0,
229
+ "proposal_mode": 0.7096028443144149,
230
+ "proposal_ranking": 1.49962230790563,
231
+ "proposal_reconstruction": 1.0570516235688154,
232
+ "proposal_success": 0.3012468101096754,
233
+ "reocclusion": 0.16893144916085637,
234
+ "role_swap_consistency": 0.0,
235
+ "support_mode": 0.43846767214166016,
236
+ "support_stability": 0.12901192851865492,
237
+ "task_metrics": 0.0706772211500827,
238
+ "total": 2.383075835324135,
239
+ "transition": 3.399705786664947,
240
+ "uncertainty": 1.833678168140796e-06,
241
+ "visibility": 0.09043271063255663,
242
+ "world_model": 0.0
243
+ },
244
+ "val": {
245
+ "action": 1.1680383563041687,
246
+ "arm_role": 0.0008160963848543664,
247
+ "belief": 0.09533951580524444,
248
+ "calibration": 0.0,
249
+ "clearance": 0.07521944617231686,
250
+ "corridor": 0.2074363355835279,
251
+ "distillation": 0.0,
252
+ "disturbance": 0.0007471947777958121,
253
+ "gate": 0.0,
254
+ "grasp_affordance": 0.009425108910848697,
255
+ "occluder_contact": 0.187281297147274,
256
+ "persistence": 0.42866156020512186,
257
+ "phase": 0.13389708844115375,
258
+ "planner_ranking": 0.007386005097456897,
259
+ "planner_risk": 0.03013829297075669,
260
+ "planner_success": 0.027494619445254404,
261
+ "proposal_diversity": 0.0,
262
+ "proposal_mode": 0.7145659645398458,
263
+ "proposal_ranking": 1.4651208639144897,
264
+ "proposal_reconstruction": 0.99560972849528,
265
+ "proposal_success": 0.29622272253036497,
266
+ "reocclusion": 0.15021706620852152,
267
+ "role_swap_consistency": 0.0,
268
+ "support_mode": 0.3665752013524373,
269
+ "support_stability": 0.12691180408000946,
270
+ "task_metrics": 0.056707360843817396,
271
+ "total": 2.3298022985458373,
272
+ "transition": 3.3876041332880655,
273
+ "uncertainty": 1.581879031557302e-06,
274
+ "visibility": 0.08887151132027309,
275
+ "world_model": 0.0
276
+ }
277
+ }
278
+ ]
outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52bffd6b1228221303a52fa31678c02c0df4cf0493aae456340f2c0fd19e66d
3
+ size 878963300
outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_fast_seed17/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bfd34e337b63d17b4a09d4c5b47a2b02484ae20ebd88d0c43c15e90f2a53eea
3
+ size 878965540
outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/config_resolved.yaml ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17
2
+ output_dir: /workspace/workspace/outputs/adapter_proxy
3
+ device: cuda
4
+ seed: 17
5
+ init_checkpoint: /workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt
6
+ init_strict: false
7
+ data:
8
+ proxies:
9
+ - foliage_proxy
10
+ - bag_proxy
11
+ - cloth_proxy
12
+ resolution: 224
13
+ dataset_version: reveal_proxy_v6_rgbd_elastic_state_phase
14
+ train_episodes_per_proxy: 128
15
+ val_episodes_per_proxy: 32
16
+ train_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_train_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
17
+ val_dataset_path: /workspace/workspace/data/reveal_proxy/proxy_val_clip224_v6_rgbd_stage3_phase_rebuild128_seed17.pt
18
+ rebuild_dataset: true
19
+ chunk_horizon: 8
20
+ rollout_horizon: 5
21
+ history_steps: 6
22
+ planner_candidates: 8
23
+ seed: 17
24
+ optim:
25
+ epochs: 4
26
+ batch_size: 8
27
+ num_workers: 32
28
+ lr: 5.0e-05
29
+ weight_decay: 0.0001
30
+ trainer:
31
+ policy_type: adapter_wrapped
32
+ training_regime: proxy_rank_only
33
+ eval_mode: adapter_active
34
+ adapter_mode: adapter_active
35
+ adapter_use_transition_model: false
36
+ adapter_use_task_conditioning: true
37
+ use_bf16: true
38
+ grad_clip_norm: 1.0
39
+ freeze_backbone: true
40
+ gradient_checkpointing: false
41
+ plan_during_train: false
42
+ plan_during_eval: false
43
+ support_mode_conditioning: true
44
+ planner_mode: false
45
+ use_depth: true
46
+ use_world_model: false
47
+ use_role_tokens: true
48
+ compute_equivariance_probe: false
49
+ trainable_parameter_prefixes:
50
+ - adapter.proposal_prior
51
+ - adapter.planner
52
+ policy:
53
+ backbone:
54
+ model_name: openai/clip-vit-base-patch32
55
+ hidden_dim: 512
56
+ max_text_tokens: 32
57
+ freeze_backbone: true
58
+ gradient_checkpointing: false
59
+ use_dummy_backbone: false
60
+ fusion:
61
+ hidden_dim: 512
62
+ num_cameras: 3
63
+ num_layers: 4
64
+ num_heads: 8
65
+ ff_dim: 2048
66
+ dropout: 0.1
67
+ proprio_dim: 32
68
+ proprio_tokens: 1
69
+ memory:
70
+ hidden_dim: 512
71
+ action_dim: 14
72
+ history_steps: 6
73
+ scene_history_steps: 3
74
+ belief_history_steps: 8
75
+ num_layers: 2
76
+ dropout: 0.1
77
+ memory_bank_size: 4
78
+ scene_bank_size: 2
79
+ belief_bank_size: 2
80
+ num_heads: 8
81
+ max_history_steps: 8
82
+ decoder:
83
+ hidden_dim: 512
84
+ num_heads: 8
85
+ num_layers: 4
86
+ ff_dim: 2048
87
+ dropout: 0.1
88
+ chunk_size: 8
89
+ action_dim: 14
90
+ arm_action_dim: 7
91
+ num_candidates: 8
92
+ num_phases: 5
93
+ num_arm_roles: 4
94
+ num_proposal_modes: 7
95
+ planner_top_k: 4
96
+ proposal_delta_scale: 0.2
97
+ proposal_slot_scale: 0.05
98
+ reveal_head:
99
+ hidden_dim: 512
100
+ num_support_modes: 3
101
+ num_approach_templates: 32
102
+ rollout_horizon: 5
103
+ belief_map_size: 32
104
+ field_size: 16
105
+ num_heads: 8
106
+ predict_belief_map: true
107
+ num_phases: 5
108
+ num_arm_roles: 4
109
+ num_interaction_tokens: 8
110
+ num_tasks: 4
111
+ world_model:
112
+ hidden_dim: 512
113
+ action_dim: 14
114
+ num_support_modes: 3
115
+ num_approach_templates: 32
116
+ rollout_horizon: 5
117
+ field_size: 16
118
+ num_heads: 8
119
+ num_phases: 5
120
+ num_arm_roles: 4
121
+ num_interaction_tokens: 8
122
+ belief_map_size: 32
123
+ predict_belief_map: true
124
+ scene_bank_size: 2
125
+ belief_bank_size: 2
126
+ rollout_mode: compact_rollout
127
+ num_tasks: 4
128
+ lightweight_field_size: 4
129
+ planner:
130
+ hidden_dim: 512
131
+ num_candidates: 8
132
+ action_dim: 14
133
+ num_support_modes: 3
134
+ utility_margin: 0.1
135
+ num_heads: 8
136
+ num_layers: 2
137
+ num_phases: 5
138
+ num_arm_roles: 4
139
+ top_k: 4
140
+ adapter_confidence_threshold: 0.55
141
+ loss_weights:
142
+ action: 0.5
143
+ phase: 0.0
144
+ arm_role: 0.0
145
+ support_mode: 0.0
146
+ corridor: 0.0
147
+ persistence: 0.0
148
+ disturbance: 0.0
149
+ world_model: 0.0
150
+ transition: 0.0
151
+ belief: 0.0
152
+ visibility: 0.0
153
+ clearance: 0.0
154
+ support_stability: 0.0
155
+ reocclusion: 0.0
156
+ occluder_contact: 0.0
157
+ grasp_affordance: 0.0
158
+ planner_success: 0.0
159
+ planner_risk: 0.0
160
+ planner_ranking: 0.2
161
+ proposal_reconstruction: 0.0
162
+ proposal_success: 0.1
163
+ proposal_ranking: 0.2
164
+ proposal_mode: 0.1
165
+ proposal_diversity: 0.02
166
+ role_swap_consistency: 0.0
167
+ task_metrics: 0.0
168
+ gate: 0.0
169
+ distillation: 0.05
170
+ calibration: 0.0
outputs/adapter_proxy/proxy_adapter_wrapped_clip_rank_only_rebuild128_seed17/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/adapter_proxy/proxy_adapter_wrapped_clip_transition_fast_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5fd7c44df9c0406fe34b2d94e1d21594273cb51d21e6a15fc5c82486c95200
3
+ size 878964132
outputs/anchor_adapter_wrapped_dual_push_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ba52c98865e44202c6a6d41e290c7ba0914da8aea1090c7d2e2a2f9b987a77
3
+ size 811648748
outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e3a1fd0956525ecf82a3c43386645545dc39770062b82ea94a4e7e23d9a296
3
+ size 878857572
outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cc79d0a38e6bdf98609a7bf9dd2f42be5f07ce442911879ce00c1815f31815
3
+ size 878856868
outputs/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc20a31e9014f71b1e5475f959c3561cc2cbb8557e6a2cd21bc7a1502192b889
3
+ size 817967468
outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17/summary.json ADDED
@@ -0,0 +1,1104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "bag",
3
+ "variant": "trunk_only_ft",
4
+ "checkpoint_path": "/workspace/workspace/outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17/checkpoint_best.pt",
5
+ "init_info": {
6
+ "path": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
7
+ "loaded_keys": 489,
8
+ "skipped_shape_mismatch_keys": [
9
+ "memory.scene_memory.position_embedding",
10
+ "memory.scene_memory.bank_queries",
11
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
12
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
13
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
14
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
15
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
16
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
17
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
18
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
19
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
20
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
21
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
22
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
23
+ "memory.scene_memory.bank_attention.in_proj_weight",
24
+ "memory.scene_memory.bank_attention.in_proj_bias",
25
+ "memory.scene_memory.bank_attention.out_proj.weight",
26
+ "memory.scene_memory.bank_attention.out_proj.bias",
27
+ "memory.scene_memory.action_proj.0.weight",
28
+ "memory.scene_memory.action_proj.0.bias",
29
+ "memory.scene_memory.action_proj.1.weight",
30
+ "memory.scene_memory.action_proj.1.bias",
31
+ "memory.scene_memory.write_gate.0.weight",
32
+ "memory.scene_memory.write_gate.0.bias",
33
+ "memory.scene_memory.write_gate.1.weight",
34
+ "memory.scene_memory.write_gate.1.bias",
35
+ "memory.scene_memory.write_gate.3.weight",
36
+ "memory.scene_memory.write_gate.3.bias",
37
+ "memory.scene_memory.token_proj.0.weight",
38
+ "memory.scene_memory.token_proj.0.bias",
39
+ "memory.scene_memory.token_proj.1.weight",
40
+ "memory.scene_memory.token_proj.1.bias",
41
+ "memory.belief_memory.position_embedding",
42
+ "memory.belief_memory.bank_queries",
43
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
44
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
45
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
46
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
47
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
48
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
49
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
50
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
51
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
52
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
53
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
54
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
55
+ "memory.belief_memory.bank_attention.in_proj_weight",
56
+ "memory.belief_memory.bank_attention.in_proj_bias",
57
+ "memory.belief_memory.bank_attention.out_proj.weight",
58
+ "memory.belief_memory.bank_attention.out_proj.bias",
59
+ "memory.belief_memory.action_proj.0.weight",
60
+ "memory.belief_memory.action_proj.0.bias",
61
+ "memory.belief_memory.action_proj.1.weight",
62
+ "memory.belief_memory.action_proj.1.bias",
63
+ "memory.belief_memory.write_gate.0.weight",
64
+ "memory.belief_memory.write_gate.0.bias",
65
+ "memory.belief_memory.write_gate.1.weight",
66
+ "memory.belief_memory.write_gate.1.bias",
67
+ "memory.belief_memory.write_gate.3.weight",
68
+ "memory.belief_memory.write_gate.3.bias",
69
+ "memory.belief_memory.token_proj.0.weight",
70
+ "memory.belief_memory.token_proj.0.bias",
71
+ "memory.belief_memory.token_proj.1.weight",
72
+ "memory.belief_memory.token_proj.1.bias",
73
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
74
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
75
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
76
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
77
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
78
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
79
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
80
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
81
+ "decoder.arm_decoder.layers.0.linear1.weight",
82
+ "decoder.arm_decoder.layers.0.linear1.bias",
83
+ "decoder.arm_decoder.layers.0.linear2.weight",
84
+ "decoder.arm_decoder.layers.0.linear2.bias",
85
+ "decoder.arm_decoder.layers.0.norm1.weight",
86
+ "decoder.arm_decoder.layers.0.norm1.bias",
87
+ "decoder.arm_decoder.layers.0.norm2.weight",
88
+ "decoder.arm_decoder.layers.0.norm2.bias",
89
+ "decoder.arm_decoder.layers.0.norm3.weight",
90
+ "decoder.arm_decoder.layers.0.norm3.bias",
91
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
92
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
93
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
94
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
95
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
96
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
97
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
98
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
99
+ "decoder.arm_decoder.layers.1.linear1.weight",
100
+ "decoder.arm_decoder.layers.1.linear1.bias",
101
+ "decoder.arm_decoder.layers.1.linear2.weight",
102
+ "decoder.arm_decoder.layers.1.linear2.bias",
103
+ "decoder.arm_decoder.layers.1.norm1.weight",
104
+ "decoder.arm_decoder.layers.1.norm1.bias",
105
+ "decoder.arm_decoder.layers.1.norm2.weight",
106
+ "decoder.arm_decoder.layers.1.norm2.bias",
107
+ "decoder.arm_decoder.layers.1.norm3.weight",
108
+ "decoder.arm_decoder.layers.1.norm3.bias",
109
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
110
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
111
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
112
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
113
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
114
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
115
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
116
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
117
+ "decoder.arm_decoder.layers.2.linear1.weight",
118
+ "decoder.arm_decoder.layers.2.linear1.bias",
119
+ "decoder.arm_decoder.layers.2.linear2.weight",
120
+ "decoder.arm_decoder.layers.2.linear2.bias",
121
+ "decoder.arm_decoder.layers.2.norm1.weight",
122
+ "decoder.arm_decoder.layers.2.norm1.bias",
123
+ "decoder.arm_decoder.layers.2.norm2.weight",
124
+ "decoder.arm_decoder.layers.2.norm2.bias",
125
+ "decoder.arm_decoder.layers.2.norm3.weight",
126
+ "decoder.arm_decoder.layers.2.norm3.bias",
127
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
128
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
129
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
130
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
131
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
132
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
133
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
134
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
135
+ "decoder.arm_decoder.layers.3.linear1.weight",
136
+ "decoder.arm_decoder.layers.3.linear1.bias",
137
+ "decoder.arm_decoder.layers.3.linear2.weight",
138
+ "decoder.arm_decoder.layers.3.linear2.bias",
139
+ "decoder.arm_decoder.layers.3.norm1.weight",
140
+ "decoder.arm_decoder.layers.3.norm1.bias",
141
+ "decoder.arm_decoder.layers.3.norm2.weight",
142
+ "decoder.arm_decoder.layers.3.norm2.bias",
143
+ "decoder.arm_decoder.layers.3.norm3.weight",
144
+ "decoder.arm_decoder.layers.3.norm3.bias",
145
+ "decoder.arm_identity.weight",
146
+ "decoder.task_embedding.weight",
147
+ "decoder.phase_adapter.weight",
148
+ "decoder.phase_adapter.bias",
149
+ "decoder.role_adapter.weight",
150
+ "decoder.role_adapter.bias",
151
+ "decoder.context_proj.0.weight",
152
+ "decoder.context_proj.0.bias",
153
+ "decoder.context_proj.1.weight",
154
+ "decoder.context_proj.1.bias",
155
+ "decoder.arm_head.0.weight",
156
+ "decoder.arm_head.0.bias",
157
+ "decoder.arm_head.1.weight",
158
+ "decoder.arm_head.1.bias",
159
+ "decoder.arm_mean.weight",
160
+ "decoder.arm_mean.bias",
161
+ "decoder.arm_log_std.weight",
162
+ "decoder.arm_log_std.bias",
163
+ "decoder.proposal_mode_head.0.weight",
164
+ "decoder.proposal_mode_head.0.bias",
165
+ "decoder.proposal_mode_head.1.weight",
166
+ "decoder.proposal_mode_head.1.bias",
167
+ "decoder.proposal_mode_head.3.weight",
168
+ "decoder.proposal_mode_head.3.bias",
169
+ "decoder.proposal_mode_embeddings.weight",
170
+ "decoder.proposal_slot_embeddings.weight",
171
+ "decoder.mode_residual_heads.0.0.weight",
172
+ "decoder.mode_residual_heads.0.0.bias",
173
+ "decoder.mode_residual_heads.0.1.weight",
174
+ "decoder.mode_residual_heads.0.1.bias",
175
+ "decoder.mode_residual_heads.0.3.weight",
176
+ "decoder.mode_residual_heads.0.3.bias",
177
+ "decoder.mode_residual_heads.1.0.weight",
178
+ "decoder.mode_residual_heads.1.0.bias",
179
+ "decoder.mode_residual_heads.1.1.weight",
180
+ "decoder.mode_residual_heads.1.1.bias",
181
+ "decoder.mode_residual_heads.1.3.weight",
182
+ "decoder.mode_residual_heads.1.3.bias",
183
+ "decoder.mode_residual_heads.2.0.weight",
184
+ "decoder.mode_residual_heads.2.0.bias",
185
+ "decoder.mode_residual_heads.2.1.weight",
186
+ "decoder.mode_residual_heads.2.1.bias",
187
+ "decoder.mode_residual_heads.2.3.weight",
188
+ "decoder.mode_residual_heads.2.3.bias",
189
+ "decoder.mode_residual_heads.3.0.weight",
190
+ "decoder.mode_residual_heads.3.0.bias",
191
+ "decoder.mode_residual_heads.3.1.weight",
192
+ "decoder.mode_residual_heads.3.1.bias",
193
+ "decoder.mode_residual_heads.3.3.weight",
194
+ "decoder.mode_residual_heads.3.3.bias",
195
+ "decoder.mode_residual_heads.4.0.weight",
196
+ "decoder.mode_residual_heads.4.0.bias",
197
+ "decoder.mode_residual_heads.4.1.weight",
198
+ "decoder.mode_residual_heads.4.1.bias",
199
+ "decoder.mode_residual_heads.4.3.weight",
200
+ "decoder.mode_residual_heads.4.3.bias",
201
+ "decoder.mode_residual_heads.5.0.weight",
202
+ "decoder.mode_residual_heads.5.0.bias",
203
+ "decoder.mode_residual_heads.5.1.weight",
204
+ "decoder.mode_residual_heads.5.1.bias",
205
+ "decoder.mode_residual_heads.5.3.weight",
206
+ "decoder.mode_residual_heads.5.3.bias",
207
+ "decoder.mode_residual_heads.6.0.weight",
208
+ "decoder.mode_residual_heads.6.0.bias",
209
+ "decoder.mode_residual_heads.6.1.weight",
210
+ "decoder.mode_residual_heads.6.1.bias",
211
+ "decoder.mode_residual_heads.6.3.weight",
212
+ "decoder.mode_residual_heads.6.3.bias",
213
+ "decoder.slot_delta.0.weight",
214
+ "decoder.slot_delta.0.bias",
215
+ "decoder.slot_delta.1.weight",
216
+ "decoder.slot_delta.1.bias",
217
+ "decoder.slot_delta.3.weight",
218
+ "decoder.slot_delta.3.bias",
219
+ "decoder.proposal_score.0.weight",
220
+ "decoder.proposal_score.0.bias",
221
+ "decoder.proposal_score.1.weight",
222
+ "decoder.proposal_score.1.bias",
223
+ "decoder.proposal_score.3.weight",
224
+ "decoder.proposal_score.3.bias",
225
+ "elastic_state_head.interaction_queries",
226
+ "elastic_state_head.interaction_attention.in_proj_weight",
227
+ "elastic_state_head.interaction_attention.in_proj_bias",
228
+ "elastic_state_head.interaction_attention.out_proj.weight",
229
+ "elastic_state_head.interaction_attention.out_proj.bias",
230
+ "elastic_state_head.interaction_mlp.0.weight",
231
+ "elastic_state_head.interaction_mlp.0.bias",
232
+ "elastic_state_head.interaction_mlp.1.weight",
233
+ "elastic_state_head.interaction_mlp.1.bias",
234
+ "elastic_state_head.interaction_mlp.3.weight",
235
+ "elastic_state_head.interaction_mlp.3.bias",
236
+ "elastic_state_head.decoder.field_queries",
237
+ "elastic_state_head.decoder.field_attention.in_proj_weight",
238
+ "elastic_state_head.decoder.field_attention.in_proj_bias",
239
+ "elastic_state_head.decoder.field_attention.out_proj.weight",
240
+ "elastic_state_head.decoder.field_attention.out_proj.bias",
241
+ "elastic_state_head.decoder.field_mlp.0.weight",
242
+ "elastic_state_head.decoder.field_mlp.0.bias",
243
+ "elastic_state_head.decoder.field_mlp.1.weight",
244
+ "elastic_state_head.decoder.field_mlp.1.bias",
245
+ "elastic_state_head.decoder.field_mlp.3.weight",
246
+ "elastic_state_head.decoder.field_mlp.3.bias",
247
+ "elastic_state_head.decoder.summary_proj.0.weight",
248
+ "elastic_state_head.decoder.summary_proj.0.bias",
249
+ "elastic_state_head.decoder.summary_proj.1.weight",
250
+ "elastic_state_head.decoder.summary_proj.1.bias",
251
+ "elastic_state_head.decoder.phase_head.0.weight",
252
+ "elastic_state_head.decoder.phase_head.0.bias",
253
+ "elastic_state_head.decoder.phase_head.1.weight",
254
+ "elastic_state_head.decoder.phase_head.1.bias",
255
+ "elastic_state_head.decoder.phase_head.3.weight",
256
+ "elastic_state_head.decoder.phase_head.3.bias",
257
+ "elastic_state_head.decoder.arm_role_head.0.weight",
258
+ "elastic_state_head.decoder.arm_role_head.0.bias",
259
+ "elastic_state_head.decoder.arm_role_head.1.weight",
260
+ "elastic_state_head.decoder.arm_role_head.1.bias",
261
+ "elastic_state_head.decoder.arm_role_head.3.weight",
262
+ "elastic_state_head.decoder.arm_role_head.3.bias",
263
+ "elastic_state_head.decoder.arm_identity.weight",
264
+ "elastic_state_head.decoder.support_mode.0.weight",
265
+ "elastic_state_head.decoder.support_mode.0.bias",
266
+ "elastic_state_head.decoder.support_mode.1.weight",
267
+ "elastic_state_head.decoder.support_mode.1.bias",
268
+ "elastic_state_head.decoder.support_mode.3.weight",
269
+ "elastic_state_head.decoder.support_mode.3.bias",
270
+ "elastic_state_head.decoder.access_field.weight",
271
+ "elastic_state_head.decoder.access_field.bias",
272
+ "elastic_state_head.decoder.target_belief_field.weight",
273
+ "elastic_state_head.decoder.target_belief_field.bias",
274
+ "elastic_state_head.decoder.visibility_field.weight",
275
+ "elastic_state_head.decoder.visibility_field.bias",
276
+ "elastic_state_head.decoder.clearance_field.weight",
277
+ "elastic_state_head.decoder.clearance_field.bias",
278
+ "elastic_state_head.decoder.occluder_contact_field.weight",
279
+ "elastic_state_head.decoder.occluder_contact_field.bias",
280
+ "elastic_state_head.decoder.grasp_affordance_field.weight",
281
+ "elastic_state_head.decoder.grasp_affordance_field.bias",
282
+ "elastic_state_head.decoder.support_stability_field.weight",
283
+ "elastic_state_head.decoder.support_stability_field.bias",
284
+ "elastic_state_head.decoder.persistence_field.weight",
285
+ "elastic_state_head.decoder.persistence_field.bias",
286
+ "elastic_state_head.decoder.reocclusion_field.weight",
287
+ "elastic_state_head.decoder.reocclusion_field.bias",
288
+ "elastic_state_head.decoder.disturbance_field.weight",
289
+ "elastic_state_head.decoder.disturbance_field.bias",
290
+ "elastic_state_head.decoder.uncertainty_field.weight",
291
+ "elastic_state_head.decoder.uncertainty_field.bias",
292
+ "elastic_state_head.decoder.reocclusion_head.0.weight",
293
+ "elastic_state_head.decoder.reocclusion_head.0.bias",
294
+ "elastic_state_head.decoder.reocclusion_head.1.weight",
295
+ "elastic_state_head.decoder.reocclusion_head.1.bias",
296
+ "elastic_state_head.decoder.reocclusion_head.3.weight",
297
+ "elastic_state_head.decoder.reocclusion_head.3.bias",
298
+ "elastic_state_head.decoder.task_embedding.weight",
299
+ "elastic_state_head.decoder.task_field_affine.weight",
300
+ "elastic_state_head.decoder.task_field_affine.bias",
301
+ "elastic_state_head.decoder.task_summary_adapter.0.weight",
302
+ "elastic_state_head.decoder.task_summary_adapter.0.bias",
303
+ "elastic_state_head.decoder.task_summary_adapter.1.weight",
304
+ "elastic_state_head.decoder.task_summary_adapter.1.bias",
305
+ "elastic_state_head.decoder.task_phase_head.weight",
306
+ "elastic_state_head.decoder.task_phase_head.bias",
307
+ "elastic_state_head.decoder.task_support_head.weight",
308
+ "elastic_state_head.decoder.task_support_head.bias",
309
+ "elastic_state_head.decoder.task_reocclusion_head.weight",
310
+ "elastic_state_head.decoder.task_reocclusion_head.bias",
311
+ "elastic_state_head.decoder.task_metric_head.0.weight",
312
+ "elastic_state_head.decoder.task_metric_head.0.bias",
313
+ "elastic_state_head.decoder.task_metric_head.1.weight",
314
+ "elastic_state_head.decoder.task_metric_head.1.bias",
315
+ "elastic_state_head.decoder.task_metric_head.3.weight",
316
+ "elastic_state_head.decoder.task_metric_head.3.bias",
317
+ "world_model.state_encoder.0.weight",
318
+ "world_model.state_encoder.0.bias",
319
+ "world_model.state_encoder.1.weight",
320
+ "world_model.state_encoder.1.bias",
321
+ "world_model.scene_memory_proj.0.weight",
322
+ "world_model.scene_memory_proj.0.bias",
323
+ "world_model.scene_memory_proj.1.weight",
324
+ "world_model.scene_memory_proj.1.bias",
325
+ "world_model.belief_memory_proj.0.weight",
326
+ "world_model.belief_memory_proj.0.bias",
327
+ "world_model.belief_memory_proj.1.weight",
328
+ "world_model.belief_memory_proj.1.bias",
329
+ "world_model.action_encoder.0.weight",
330
+ "world_model.action_encoder.0.bias",
331
+ "world_model.action_encoder.1.weight",
332
+ "world_model.action_encoder.1.bias",
333
+ "world_model.transition.weight_ih",
334
+ "world_model.transition.weight_hh",
335
+ "world_model.transition.bias_ih",
336
+ "world_model.transition.bias_hh",
337
+ "world_model.scene_memory_update.weight",
338
+ "world_model.scene_memory_update.bias",
339
+ "world_model.belief_memory_update.weight",
340
+ "world_model.belief_memory_update.bias",
341
+ "world_model.compact_decoder.weight",
342
+ "world_model.compact_decoder.bias",
343
+ "world_model.target_belief_head.weight",
344
+ "world_model.target_belief_head.bias",
345
+ "world_model.visibility_head.weight",
346
+ "world_model.visibility_head.bias",
347
+ "world_model.clearance_head.weight",
348
+ "world_model.clearance_head.bias",
349
+ "world_model.occluder_contact_head.weight",
350
+ "world_model.occluder_contact_head.bias",
351
+ "world_model.grasp_affordance_head.weight",
352
+ "world_model.grasp_affordance_head.bias",
353
+ "world_model.support_stability_head.weight",
354
+ "world_model.support_stability_head.bias",
355
+ "world_model.persistence_head.weight",
356
+ "world_model.persistence_head.bias",
357
+ "world_model.reocclusion_head.weight",
358
+ "world_model.reocclusion_head.bias",
359
+ "world_model.disturbance_head.weight",
360
+ "world_model.disturbance_head.bias",
361
+ "world_model.uncertainty_head.weight",
362
+ "world_model.uncertainty_head.bias",
363
+ "world_model.access_head.weight",
364
+ "world_model.access_head.bias",
365
+ "world_model.task_embedding.weight",
366
+ "world_model.spatial_field_encoder.0.weight",
367
+ "world_model.spatial_field_encoder.0.bias",
368
+ "world_model.spatial_field_encoder.2.weight",
369
+ "world_model.spatial_field_encoder.2.bias",
370
+ "world_model.spatial_context_proj.0.weight",
371
+ "world_model.spatial_context_proj.0.bias",
372
+ "world_model.spatial_context_proj.1.weight",
373
+ "world_model.spatial_context_proj.1.bias",
374
+ "world_model.spatial_gate_z.weight",
375
+ "world_model.spatial_gate_z.bias",
376
+ "world_model.spatial_gate_r.weight",
377
+ "world_model.spatial_gate_r.bias",
378
+ "world_model.spatial_candidate.weight",
379
+ "world_model.spatial_candidate.bias",
380
+ "world_model.spatial_summary_proj.0.weight",
381
+ "world_model.spatial_summary_proj.0.bias",
382
+ "world_model.spatial_summary_proj.1.weight",
383
+ "world_model.spatial_summary_proj.1.bias",
384
+ "world_model.spatial_phase_head.weight",
385
+ "world_model.spatial_phase_head.bias",
386
+ "world_model.spatial_support_mode_head.weight",
387
+ "world_model.spatial_support_mode_head.bias",
388
+ "world_model.spatial_arm_role_head.weight",
389
+ "world_model.spatial_arm_role_head.bias",
390
+ "world_model.spatial_reocclusion_head.weight",
391
+ "world_model.spatial_reocclusion_head.bias",
392
+ "world_model.spatial_target_belief_head.weight",
393
+ "world_model.spatial_target_belief_head.bias",
394
+ "world_model.spatial_visibility_head.weight",
395
+ "world_model.spatial_visibility_head.bias",
396
+ "world_model.spatial_clearance_head.weight",
397
+ "world_model.spatial_clearance_head.bias",
398
+ "world_model.spatial_occluder_contact_head.weight",
399
+ "world_model.spatial_occluder_contact_head.bias",
400
+ "world_model.spatial_grasp_affordance_head.weight",
401
+ "world_model.spatial_grasp_affordance_head.bias",
402
+ "world_model.spatial_support_stability_head.weight",
403
+ "world_model.spatial_support_stability_head.bias",
404
+ "world_model.spatial_persistence_head.weight",
405
+ "world_model.spatial_persistence_head.bias",
406
+ "world_model.spatial_reocclusion_field_head.weight",
407
+ "world_model.spatial_reocclusion_field_head.bias",
408
+ "world_model.spatial_disturbance_head.weight",
409
+ "world_model.spatial_disturbance_head.bias",
410
+ "world_model.spatial_uncertainty_head.weight",
411
+ "world_model.spatial_uncertainty_head.bias",
412
+ "world_model.spatial_access_head.weight",
413
+ "world_model.spatial_access_head.bias",
414
+ "planner.residual.trunk.0.weight",
415
+ "planner.residual.trunk.0.bias",
416
+ "planner.residual.trunk.1.weight",
417
+ "planner.residual.trunk.1.bias",
418
+ "planner.residual.trunk.3.weight",
419
+ "planner.residual.trunk.3.bias",
420
+ "planner.residual.success_head.weight",
421
+ "planner.residual.success_head.bias",
422
+ "planner.residual.risk_head.weight",
423
+ "planner.residual.risk_head.bias",
424
+ "planner.residual.residual_head.weight",
425
+ "planner.residual.residual_head.bias"
426
+ ],
427
+ "remapped_keys": {},
428
+ "missing_keys": [
429
+ "memory.gru.weight_ih_l0",
430
+ "memory.gru.weight_hh_l0",
431
+ "memory.gru.bias_ih_l0",
432
+ "memory.gru.bias_hh_l0",
433
+ "memory.gru.weight_ih_l1",
434
+ "memory.gru.weight_hh_l1",
435
+ "memory.gru.bias_ih_l1",
436
+ "memory.gru.bias_hh_l1",
437
+ "memory.token_proj.0.weight",
438
+ "memory.token_proj.0.bias",
439
+ "memory.token_proj.1.weight",
440
+ "memory.token_proj.1.bias",
441
+ "memory.action_proj.0.weight",
442
+ "memory.action_proj.0.bias",
443
+ "memory.action_proj.1.weight",
444
+ "memory.action_proj.1.bias",
445
+ "decoder.actor_role_bias",
446
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
447
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
448
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
449
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
450
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
451
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
452
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
453
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
454
+ "decoder.revealer_decoder.layers.0.linear1.weight",
455
+ "decoder.revealer_decoder.layers.0.linear1.bias",
456
+ "decoder.revealer_decoder.layers.0.linear2.weight",
457
+ "decoder.revealer_decoder.layers.0.linear2.bias",
458
+ "decoder.revealer_decoder.layers.0.norm1.weight",
459
+ "decoder.revealer_decoder.layers.0.norm1.bias",
460
+ "decoder.revealer_decoder.layers.0.norm2.weight",
461
+ "decoder.revealer_decoder.layers.0.norm2.bias",
462
+ "decoder.revealer_decoder.layers.0.norm3.weight",
463
+ "decoder.revealer_decoder.layers.0.norm3.bias",
464
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
465
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
466
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
467
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
468
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
469
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
470
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
471
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
472
+ "decoder.revealer_decoder.layers.1.linear1.weight",
473
+ "decoder.revealer_decoder.layers.1.linear1.bias",
474
+ "decoder.revealer_decoder.layers.1.linear2.weight",
475
+ "decoder.revealer_decoder.layers.1.linear2.bias",
476
+ "decoder.revealer_decoder.layers.1.norm1.weight",
477
+ "decoder.revealer_decoder.layers.1.norm1.bias",
478
+ "decoder.revealer_decoder.layers.1.norm2.weight",
479
+ "decoder.revealer_decoder.layers.1.norm2.bias",
480
+ "decoder.revealer_decoder.layers.1.norm3.weight",
481
+ "decoder.revealer_decoder.layers.1.norm3.bias",
482
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
483
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
484
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
485
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
486
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
487
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
488
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
489
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
490
+ "decoder.revealer_decoder.layers.2.linear1.weight",
491
+ "decoder.revealer_decoder.layers.2.linear1.bias",
492
+ "decoder.revealer_decoder.layers.2.linear2.weight",
493
+ "decoder.revealer_decoder.layers.2.linear2.bias",
494
+ "decoder.revealer_decoder.layers.2.norm1.weight",
495
+ "decoder.revealer_decoder.layers.2.norm1.bias",
496
+ "decoder.revealer_decoder.layers.2.norm2.weight",
497
+ "decoder.revealer_decoder.layers.2.norm2.bias",
498
+ "decoder.revealer_decoder.layers.2.norm3.weight",
499
+ "decoder.revealer_decoder.layers.2.norm3.bias",
500
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
501
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
502
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
503
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
504
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
505
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
506
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
507
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
508
+ "decoder.revealer_decoder.layers.3.linear1.weight",
509
+ "decoder.revealer_decoder.layers.3.linear1.bias",
510
+ "decoder.revealer_decoder.layers.3.linear2.weight",
511
+ "decoder.revealer_decoder.layers.3.linear2.bias",
512
+ "decoder.revealer_decoder.layers.3.norm1.weight",
513
+ "decoder.revealer_decoder.layers.3.norm1.bias",
514
+ "decoder.revealer_decoder.layers.3.norm2.weight",
515
+ "decoder.revealer_decoder.layers.3.norm2.bias",
516
+ "decoder.revealer_decoder.layers.3.norm3.weight",
517
+ "decoder.revealer_decoder.layers.3.norm3.bias",
518
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
519
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
520
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
521
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
522
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
523
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
524
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
525
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
526
+ "decoder.actor_decoder.layers.0.linear1.weight",
527
+ "decoder.actor_decoder.layers.0.linear1.bias",
528
+ "decoder.actor_decoder.layers.0.linear2.weight",
529
+ "decoder.actor_decoder.layers.0.linear2.bias",
530
+ "decoder.actor_decoder.layers.0.norm1.weight",
531
+ "decoder.actor_decoder.layers.0.norm1.bias",
532
+ "decoder.actor_decoder.layers.0.norm2.weight",
533
+ "decoder.actor_decoder.layers.0.norm2.bias",
534
+ "decoder.actor_decoder.layers.0.norm3.weight",
535
+ "decoder.actor_decoder.layers.0.norm3.bias",
536
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
537
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
538
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
539
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
540
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
541
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
542
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
543
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
544
+ "decoder.actor_decoder.layers.1.linear1.weight",
545
+ "decoder.actor_decoder.layers.1.linear1.bias",
546
+ "decoder.actor_decoder.layers.1.linear2.weight",
547
+ "decoder.actor_decoder.layers.1.linear2.bias",
548
+ "decoder.actor_decoder.layers.1.norm1.weight",
549
+ "decoder.actor_decoder.layers.1.norm1.bias",
550
+ "decoder.actor_decoder.layers.1.norm2.weight",
551
+ "decoder.actor_decoder.layers.1.norm2.bias",
552
+ "decoder.actor_decoder.layers.1.norm3.weight",
553
+ "decoder.actor_decoder.layers.1.norm3.bias",
554
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
555
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
556
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
557
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
558
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
559
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
560
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
561
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
562
+ "decoder.actor_decoder.layers.2.linear1.weight",
563
+ "decoder.actor_decoder.layers.2.linear1.bias",
564
+ "decoder.actor_decoder.layers.2.linear2.weight",
565
+ "decoder.actor_decoder.layers.2.linear2.bias",
566
+ "decoder.actor_decoder.layers.2.norm1.weight",
567
+ "decoder.actor_decoder.layers.2.norm1.bias",
568
+ "decoder.actor_decoder.layers.2.norm2.weight",
569
+ "decoder.actor_decoder.layers.2.norm2.bias",
570
+ "decoder.actor_decoder.layers.2.norm3.weight",
571
+ "decoder.actor_decoder.layers.2.norm3.bias",
572
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
573
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
574
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
575
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
576
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
577
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
578
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
579
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
580
+ "decoder.actor_decoder.layers.3.linear1.weight",
581
+ "decoder.actor_decoder.layers.3.linear1.bias",
582
+ "decoder.actor_decoder.layers.3.linear2.weight",
583
+ "decoder.actor_decoder.layers.3.linear2.bias",
584
+ "decoder.actor_decoder.layers.3.norm1.weight",
585
+ "decoder.actor_decoder.layers.3.norm1.bias",
586
+ "decoder.actor_decoder.layers.3.norm2.weight",
587
+ "decoder.actor_decoder.layers.3.norm2.bias",
588
+ "decoder.actor_decoder.layers.3.norm3.weight",
589
+ "decoder.actor_decoder.layers.3.norm3.bias",
590
+ "decoder.revealer_mean.weight",
591
+ "decoder.revealer_mean.bias",
592
+ "decoder.revealer_log_std.weight",
593
+ "decoder.revealer_log_std.bias",
594
+ "decoder.actor_mean.weight",
595
+ "decoder.actor_mean.bias",
596
+ "decoder.actor_log_std.weight",
597
+ "decoder.actor_log_std.bias",
598
+ "decoder.proposal_score.0.weight",
599
+ "decoder.proposal_score.0.bias",
600
+ "decoder.proposal_score.1.weight",
601
+ "decoder.proposal_score.1.bias"
602
+ ],
603
+ "unexpected_keys": []
604
+ },
605
+ "trainable_parameter_names": [
606
+ "fusion.camera_embedding.weight",
607
+ "fusion.cross_view_transformer.layers.0.self_attn.in_proj_weight",
608
+ "fusion.cross_view_transformer.layers.0.self_attn.in_proj_bias",
609
+ "fusion.cross_view_transformer.layers.0.self_attn.out_proj.weight",
610
+ "fusion.cross_view_transformer.layers.0.self_attn.out_proj.bias",
611
+ "fusion.cross_view_transformer.layers.0.linear1.weight",
612
+ "fusion.cross_view_transformer.layers.0.linear1.bias",
613
+ "fusion.cross_view_transformer.layers.0.linear2.weight",
614
+ "fusion.cross_view_transformer.layers.0.linear2.bias",
615
+ "fusion.cross_view_transformer.layers.0.norm1.weight",
616
+ "fusion.cross_view_transformer.layers.0.norm1.bias",
617
+ "fusion.cross_view_transformer.layers.0.norm2.weight",
618
+ "fusion.cross_view_transformer.layers.0.norm2.bias",
619
+ "fusion.cross_view_transformer.layers.1.self_attn.in_proj_weight",
620
+ "fusion.cross_view_transformer.layers.1.self_attn.in_proj_bias",
621
+ "fusion.cross_view_transformer.layers.1.self_attn.out_proj.weight",
622
+ "fusion.cross_view_transformer.layers.1.self_attn.out_proj.bias",
623
+ "fusion.cross_view_transformer.layers.1.linear1.weight",
624
+ "fusion.cross_view_transformer.layers.1.linear1.bias",
625
+ "fusion.cross_view_transformer.layers.1.linear2.weight",
626
+ "fusion.cross_view_transformer.layers.1.linear2.bias",
627
+ "fusion.cross_view_transformer.layers.1.norm1.weight",
628
+ "fusion.cross_view_transformer.layers.1.norm1.bias",
629
+ "fusion.cross_view_transformer.layers.1.norm2.weight",
630
+ "fusion.cross_view_transformer.layers.1.norm2.bias",
631
+ "fusion.cross_view_transformer.layers.2.self_attn.in_proj_weight",
632
+ "fusion.cross_view_transformer.layers.2.self_attn.in_proj_bias",
633
+ "fusion.cross_view_transformer.layers.2.self_attn.out_proj.weight",
634
+ "fusion.cross_view_transformer.layers.2.self_attn.out_proj.bias",
635
+ "fusion.cross_view_transformer.layers.2.linear1.weight",
636
+ "fusion.cross_view_transformer.layers.2.linear1.bias",
637
+ "fusion.cross_view_transformer.layers.2.linear2.weight",
638
+ "fusion.cross_view_transformer.layers.2.linear2.bias",
639
+ "fusion.cross_view_transformer.layers.2.norm1.weight",
640
+ "fusion.cross_view_transformer.layers.2.norm1.bias",
641
+ "fusion.cross_view_transformer.layers.2.norm2.weight",
642
+ "fusion.cross_view_transformer.layers.2.norm2.bias",
643
+ "fusion.cross_view_transformer.layers.3.self_attn.in_proj_weight",
644
+ "fusion.cross_view_transformer.layers.3.self_attn.in_proj_bias",
645
+ "fusion.cross_view_transformer.layers.3.self_attn.out_proj.weight",
646
+ "fusion.cross_view_transformer.layers.3.self_attn.out_proj.bias",
647
+ "fusion.cross_view_transformer.layers.3.linear1.weight",
648
+ "fusion.cross_view_transformer.layers.3.linear1.bias",
649
+ "fusion.cross_view_transformer.layers.3.linear2.weight",
650
+ "fusion.cross_view_transformer.layers.3.linear2.bias",
651
+ "fusion.cross_view_transformer.layers.3.norm1.weight",
652
+ "fusion.cross_view_transformer.layers.3.norm1.bias",
653
+ "fusion.cross_view_transformer.layers.3.norm2.weight",
654
+ "fusion.cross_view_transformer.layers.3.norm2.bias",
655
+ "fusion.geometry_fusion.attn.in_proj_weight",
656
+ "fusion.geometry_fusion.attn.in_proj_bias",
657
+ "fusion.geometry_fusion.attn.out_proj.weight",
658
+ "fusion.geometry_fusion.attn.out_proj.bias",
659
+ "fusion.geometry_fusion.gate.0.weight",
660
+ "fusion.geometry_fusion.gate.0.bias",
661
+ "fusion.geometry_fusion.gate.1.weight",
662
+ "fusion.geometry_fusion.gate.1.bias",
663
+ "fusion.geometry_fusion.gate.3.weight",
664
+ "fusion.geometry_fusion.gate.3.bias",
665
+ "fusion.geometry_fusion.out.0.weight",
666
+ "fusion.geometry_fusion.out.0.bias",
667
+ "fusion.geometry_fusion.out.1.weight",
668
+ "fusion.geometry_fusion.out.1.bias",
669
+ "fusion.proprio_adapter.0.weight",
670
+ "fusion.proprio_adapter.0.bias",
671
+ "fusion.proprio_adapter.1.weight",
672
+ "fusion.proprio_adapter.1.bias",
673
+ "memory.gru.weight_ih_l0",
674
+ "memory.gru.weight_hh_l0",
675
+ "memory.gru.bias_ih_l0",
676
+ "memory.gru.bias_hh_l0",
677
+ "memory.gru.weight_ih_l1",
678
+ "memory.gru.weight_hh_l1",
679
+ "memory.gru.bias_ih_l1",
680
+ "memory.gru.bias_hh_l1",
681
+ "memory.token_proj.0.weight",
682
+ "memory.token_proj.0.bias",
683
+ "memory.token_proj.1.weight",
684
+ "memory.token_proj.1.bias",
685
+ "memory.action_proj.0.weight",
686
+ "memory.action_proj.0.bias",
687
+ "memory.action_proj.1.weight",
688
+ "memory.action_proj.1.bias",
689
+ "memory.uncertainty_head.0.weight",
690
+ "memory.uncertainty_head.0.bias",
691
+ "memory.uncertainty_head.1.weight",
692
+ "memory.uncertainty_head.1.bias",
693
+ "decoder.actor_role_bias",
694
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
695
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
696
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
697
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
698
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
699
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
700
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
701
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
702
+ "decoder.revealer_decoder.layers.0.linear1.weight",
703
+ "decoder.revealer_decoder.layers.0.linear1.bias",
704
+ "decoder.revealer_decoder.layers.0.linear2.weight",
705
+ "decoder.revealer_decoder.layers.0.linear2.bias",
706
+ "decoder.revealer_decoder.layers.0.norm1.weight",
707
+ "decoder.revealer_decoder.layers.0.norm1.bias",
708
+ "decoder.revealer_decoder.layers.0.norm2.weight",
709
+ "decoder.revealer_decoder.layers.0.norm2.bias",
710
+ "decoder.revealer_decoder.layers.0.norm3.weight",
711
+ "decoder.revealer_decoder.layers.0.norm3.bias",
712
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
713
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
714
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
715
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
716
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
717
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
718
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
719
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
720
+ "decoder.revealer_decoder.layers.1.linear1.weight",
721
+ "decoder.revealer_decoder.layers.1.linear1.bias",
722
+ "decoder.revealer_decoder.layers.1.linear2.weight",
723
+ "decoder.revealer_decoder.layers.1.linear2.bias",
724
+ "decoder.revealer_decoder.layers.1.norm1.weight",
725
+ "decoder.revealer_decoder.layers.1.norm1.bias",
726
+ "decoder.revealer_decoder.layers.1.norm2.weight",
727
+ "decoder.revealer_decoder.layers.1.norm2.bias",
728
+ "decoder.revealer_decoder.layers.1.norm3.weight",
729
+ "decoder.revealer_decoder.layers.1.norm3.bias",
730
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
731
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
732
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
733
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
734
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
735
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
736
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
737
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
738
+ "decoder.revealer_decoder.layers.2.linear1.weight",
739
+ "decoder.revealer_decoder.layers.2.linear1.bias",
740
+ "decoder.revealer_decoder.layers.2.linear2.weight",
741
+ "decoder.revealer_decoder.layers.2.linear2.bias",
742
+ "decoder.revealer_decoder.layers.2.norm1.weight",
743
+ "decoder.revealer_decoder.layers.2.norm1.bias",
744
+ "decoder.revealer_decoder.layers.2.norm2.weight",
745
+ "decoder.revealer_decoder.layers.2.norm2.bias",
746
+ "decoder.revealer_decoder.layers.2.norm3.weight",
747
+ "decoder.revealer_decoder.layers.2.norm3.bias",
748
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
749
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
750
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
751
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
752
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
753
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
754
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
755
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
756
+ "decoder.revealer_decoder.layers.3.linear1.weight",
757
+ "decoder.revealer_decoder.layers.3.linear1.bias",
758
+ "decoder.revealer_decoder.layers.3.linear2.weight",
759
+ "decoder.revealer_decoder.layers.3.linear2.bias",
760
+ "decoder.revealer_decoder.layers.3.norm1.weight",
761
+ "decoder.revealer_decoder.layers.3.norm1.bias",
762
+ "decoder.revealer_decoder.layers.3.norm2.weight",
763
+ "decoder.revealer_decoder.layers.3.norm2.bias",
764
+ "decoder.revealer_decoder.layers.3.norm3.weight",
765
+ "decoder.revealer_decoder.layers.3.norm3.bias",
766
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
767
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
768
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
769
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
770
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
771
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
772
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
773
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
774
+ "decoder.actor_decoder.layers.0.linear1.weight",
775
+ "decoder.actor_decoder.layers.0.linear1.bias",
776
+ "decoder.actor_decoder.layers.0.linear2.weight",
777
+ "decoder.actor_decoder.layers.0.linear2.bias",
778
+ "decoder.actor_decoder.layers.0.norm1.weight",
779
+ "decoder.actor_decoder.layers.0.norm1.bias",
780
+ "decoder.actor_decoder.layers.0.norm2.weight",
781
+ "decoder.actor_decoder.layers.0.norm2.bias",
782
+ "decoder.actor_decoder.layers.0.norm3.weight",
783
+ "decoder.actor_decoder.layers.0.norm3.bias",
784
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
785
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
786
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
787
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
788
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
789
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
790
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
791
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
792
+ "decoder.actor_decoder.layers.1.linear1.weight",
793
+ "decoder.actor_decoder.layers.1.linear1.bias",
794
+ "decoder.actor_decoder.layers.1.linear2.weight",
795
+ "decoder.actor_decoder.layers.1.linear2.bias",
796
+ "decoder.actor_decoder.layers.1.norm1.weight",
797
+ "decoder.actor_decoder.layers.1.norm1.bias",
798
+ "decoder.actor_decoder.layers.1.norm2.weight",
799
+ "decoder.actor_decoder.layers.1.norm2.bias",
800
+ "decoder.actor_decoder.layers.1.norm3.weight",
801
+ "decoder.actor_decoder.layers.1.norm3.bias",
802
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
803
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
804
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
805
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
806
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
807
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
808
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
809
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
810
+ "decoder.actor_decoder.layers.2.linear1.weight",
811
+ "decoder.actor_decoder.layers.2.linear1.bias",
812
+ "decoder.actor_decoder.layers.2.linear2.weight",
813
+ "decoder.actor_decoder.layers.2.linear2.bias",
814
+ "decoder.actor_decoder.layers.2.norm1.weight",
815
+ "decoder.actor_decoder.layers.2.norm1.bias",
816
+ "decoder.actor_decoder.layers.2.norm2.weight",
817
+ "decoder.actor_decoder.layers.2.norm2.bias",
818
+ "decoder.actor_decoder.layers.2.norm3.weight",
819
+ "decoder.actor_decoder.layers.2.norm3.bias",
820
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
821
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
822
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
823
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
824
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
825
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
826
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
827
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
828
+ "decoder.actor_decoder.layers.3.linear1.weight",
829
+ "decoder.actor_decoder.layers.3.linear1.bias",
830
+ "decoder.actor_decoder.layers.3.linear2.weight",
831
+ "decoder.actor_decoder.layers.3.linear2.bias",
832
+ "decoder.actor_decoder.layers.3.norm1.weight",
833
+ "decoder.actor_decoder.layers.3.norm1.bias",
834
+ "decoder.actor_decoder.layers.3.norm2.weight",
835
+ "decoder.actor_decoder.layers.3.norm2.bias",
836
+ "decoder.actor_decoder.layers.3.norm3.weight",
837
+ "decoder.actor_decoder.layers.3.norm3.bias",
838
+ "decoder.query_embed.weight",
839
+ "decoder.revealer_mean.weight",
840
+ "decoder.revealer_mean.bias",
841
+ "decoder.revealer_log_std.weight",
842
+ "decoder.revealer_log_std.bias",
843
+ "decoder.actor_mean.weight",
844
+ "decoder.actor_mean.bias",
845
+ "decoder.actor_log_std.weight",
846
+ "decoder.actor_log_std.bias",
847
+ "decoder.coordination.0.weight",
848
+ "decoder.coordination.0.bias",
849
+ "decoder.coordination.1.weight",
850
+ "decoder.coordination.1.bias",
851
+ "decoder.coordination.3.weight",
852
+ "decoder.coordination.3.bias",
853
+ "decoder.proposal_score.0.weight",
854
+ "decoder.proposal_score.0.bias",
855
+ "decoder.proposal_score.1.weight",
856
+ "decoder.proposal_score.1.bias"
857
+ ],
858
+ "best_val_total": 0.37670365827424185,
859
+ "history": [
860
+ {
861
+ "epoch": 0,
862
+ "train": {
863
+ "action": 0.6513718327409342,
864
+ "distillation": 0.0,
865
+ "gate": 0.0,
866
+ "planner_ranking": 0.0,
867
+ "planner_risk": 0.0,
868
+ "planner_success": 0.0,
869
+ "proposal_diversity": 0.0,
870
+ "proposal_mode": 0.0,
871
+ "proposal_ranking": 1.2277212582136456,
872
+ "proposal_reconstruction": 0.0,
873
+ "proposal_success": 0.7679335788676613,
874
+ "role_swap_consistency": 0.0,
875
+ "total": 0.9276820499646036,
876
+ "transition": 0.0,
877
+ "world_model": 0.0
878
+ },
879
+ "val": {
880
+ "action": 0.16291735001972743,
881
+ "distillation": 0.0,
882
+ "gate": 0.0,
883
+ "planner_ranking": 0.0,
884
+ "planner_risk": 0.0,
885
+ "planner_success": 0.0,
886
+ "proposal_diversity": 0.0,
887
+ "proposal_mode": 0.0,
888
+ "proposal_ranking": 1.1196258578981673,
889
+ "proposal_reconstruction": 0.0,
890
+ "proposal_success": 0.6713550175939288,
891
+ "role_swap_consistency": 0.0,
892
+ "total": 0.41142383217811584,
893
+ "transition": 0.0,
894
+ "world_model": 0.0
895
+ }
896
+ },
897
+ {
898
+ "epoch": 1,
899
+ "train": {
900
+ "action": 0.21071406963624453,
901
+ "distillation": 0.0,
902
+ "gate": 0.0,
903
+ "planner_ranking": 0.0,
904
+ "planner_risk": 0.0,
905
+ "planner_success": 0.0,
906
+ "proposal_diversity": 0.0,
907
+ "proposal_mode": 0.0,
908
+ "proposal_ranking": 1.155882295809294,
909
+ "proposal_reconstruction": 0.0,
910
+ "proposal_success": 0.6945144220402366,
911
+ "role_swap_consistency": 0.0,
912
+ "total": 0.467438153530422,
913
+ "transition": 0.0,
914
+ "world_model": 0.0
915
+ },
916
+ "val": {
917
+ "action": 0.23247837594577245,
918
+ "distillation": 0.0,
919
+ "gate": 0.0,
920
+ "planner_ranking": 0.0,
921
+ "planner_risk": 0.0,
922
+ "planner_success": 0.0,
923
+ "proposal_diversity": 0.0,
924
+ "proposal_mode": 0.0,
925
+ "proposal_ranking": 1.1327585322516305,
926
+ "proposal_reconstruction": 0.0,
927
+ "proposal_success": 0.6880117058753967,
928
+ "role_swap_consistency": 0.0,
929
+ "total": 0.48495357377188547,
930
+ "transition": 0.0,
931
+ "world_model": 0.0
932
+ }
933
+ },
934
+ {
935
+ "epoch": 2,
936
+ "train": {
937
+ "action": 0.1608393647168812,
938
+ "distillation": 0.0,
939
+ "gate": 0.0,
940
+ "planner_ranking": 0.0,
941
+ "planner_risk": 0.0,
942
+ "planner_success": 0.0,
943
+ "proposal_diversity": 0.0,
944
+ "proposal_mode": 0.0,
945
+ "proposal_ranking": 1.2125610175885653,
946
+ "proposal_reconstruction": 0.0,
947
+ "proposal_success": 0.7512044781132748,
948
+ "role_swap_consistency": 0.0,
949
+ "total": 0.4328680618813163,
950
+ "transition": 0.0,
951
+ "world_model": 0.0
952
+ },
953
+ "val": {
954
+ "action": 0.09432156596864973,
955
+ "distillation": 0.0,
956
+ "gate": 0.0,
957
+ "planner_ranking": 0.0,
958
+ "planner_risk": 0.0,
959
+ "planner_success": 0.0,
960
+ "proposal_diversity": 0.0,
961
+ "proposal_mode": 0.0,
962
+ "proposal_ranking": 1.305886251585824,
963
+ "proposal_reconstruction": 0.0,
964
+ "proposal_success": 0.798845146383558,
965
+ "role_swap_consistency": 0.0,
966
+ "total": 0.3860659216131483,
967
+ "transition": 0.0,
968
+ "world_model": 0.0
969
+ }
970
+ },
971
+ {
972
+ "epoch": 3,
973
+ "train": {
974
+ "action": 0.1447360997921542,
975
+ "distillation": 0.0,
976
+ "gate": 0.0,
977
+ "planner_ranking": 0.0,
978
+ "planner_risk": 0.0,
979
+ "planner_success": 0.0,
980
+ "proposal_diversity": 0.0,
981
+ "proposal_mode": 0.0,
982
+ "proposal_ranking": 1.175407836311742,
983
+ "proposal_reconstruction": 0.0,
984
+ "proposal_success": 0.7467456679595145,
985
+ "role_swap_consistency": 0.0,
986
+ "total": 0.4106567674561551,
987
+ "transition": 0.0,
988
+ "world_model": 0.0
989
+ },
990
+ "val": {
991
+ "action": 0.13610392383166722,
992
+ "distillation": 0.0,
993
+ "gate": 0.0,
994
+ "planner_ranking": 0.0,
995
+ "planner_risk": 0.0,
996
+ "planner_success": 0.0,
997
+ "proposal_diversity": 0.0,
998
+ "proposal_mode": 0.0,
999
+ "proposal_ranking": 1.1333176749093192,
1000
+ "proposal_reconstruction": 0.0,
1001
+ "proposal_success": 0.6300536692142487,
1002
+ "role_swap_consistency": 0.0,
1003
+ "total": 0.38170802167483736,
1004
+ "transition": 0.0,
1005
+ "world_model": 0.0
1006
+ }
1007
+ },
1008
+ {
1009
+ "epoch": 4,
1010
+ "train": {
1011
+ "action": 0.13893984062106987,
1012
+ "distillation": 0.0,
1013
+ "gate": 0.0,
1014
+ "planner_ranking": 0.0,
1015
+ "planner_risk": 0.0,
1016
+ "planner_success": 0.0,
1017
+ "proposal_diversity": 0.0,
1018
+ "proposal_mode": 0.0,
1019
+ "proposal_ranking": 1.2317605771516498,
1020
+ "proposal_reconstruction": 0.0,
1021
+ "proposal_success": 0.7522393684638174,
1022
+ "role_swap_consistency": 0.0,
1023
+ "total": 0.4139726554092608,
1024
+ "transition": 0.0,
1025
+ "world_model": 0.0
1026
+ },
1027
+ "val": {
1028
+ "action": 0.08872867269175393,
1029
+ "distillation": 0.0,
1030
+ "gate": 0.0,
1031
+ "planner_ranking": 0.0,
1032
+ "planner_risk": 0.0,
1033
+ "planner_success": 0.0,
1034
+ "proposal_diversity": 0.0,
1035
+ "proposal_mode": 0.0,
1036
+ "proposal_ranking": 1.3585667099271501,
1037
+ "proposal_reconstruction": 0.0,
1038
+ "proposal_success": 0.8373270290238517,
1039
+ "role_swap_consistency": 0.0,
1040
+ "total": 0.39299292649541584,
1041
+ "transition": 0.0,
1042
+ "world_model": 0.0
1043
+ }
1044
+ },
1045
+ {
1046
+ "epoch": 5,
1047
+ "train": {
1048
+ "action": 0.14546634727402738,
1049
+ "distillation": 0.0,
1050
+ "gate": 0.0,
1051
+ "planner_ranking": 0.0,
1052
+ "planner_risk": 0.0,
1053
+ "planner_success": 0.0,
1054
+ "proposal_diversity": 0.0,
1055
+ "proposal_mode": 0.0,
1056
+ "proposal_ranking": 1.1457386205070896,
1057
+ "proposal_reconstruction": 0.0,
1058
+ "proposal_success": 0.6874889543181971,
1059
+ "role_swap_consistency": 0.0,
1060
+ "total": 0.3998258223659114,
1061
+ "transition": 0.0,
1062
+ "world_model": 0.0
1063
+ },
1064
+ "val": {
1065
+ "action": 0.13263668226344244,
1066
+ "distillation": 0.0,
1067
+ "gate": 0.0,
1068
+ "planner_ranking": 0.0,
1069
+ "planner_risk": 0.0,
1070
+ "planner_success": 0.0,
1071
+ "proposal_diversity": 0.0,
1072
+ "proposal_mode": 0.0,
1073
+ "proposal_ranking": 1.099511010306222,
1074
+ "proposal_reconstruction": 0.0,
1075
+ "proposal_success": 0.6595027276447841,
1076
+ "role_swap_consistency": 0.0,
1077
+ "total": 0.37670365827424185,
1078
+ "transition": 0.0,
1079
+ "world_model": 0.0
1080
+ }
1081
+ }
1082
+ ],
1083
+ "train_spec": {
1084
+ "track_id": "bag_track",
1085
+ "suite": "maniskill3",
1086
+ "benchmark_task": "PutEggplantInBasketRetrievalProxy-v1",
1087
+ "model_variant": "trunk_only_ft",
1088
+ "seed": 17,
1089
+ "train_demos": 32,
1090
+ "val_demos": 8,
1091
+ "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
1092
+ "optimizer": "adamw",
1093
+ "learning_rate": 0.0001,
1094
+ "lr_schedule": "constant",
1095
+ "batch_size": 4,
1096
+ "augmentations": "none",
1097
+ "early_stopping_metric": "val_total",
1098
+ "max_gradient_steps": 114,
1099
+ "unfreeze_scope": "fusion_memory_decoder",
1100
+ "dataset_split_id": "bag_bridge_smoke_v1_seed17",
1101
+ "same_data_policy": true,
1102
+ "same_init_policy": true
1103
+ }
1104
+ }
outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9245c09caccaea670d910bf86d60a35d7db1d345cdc71d55a4e854d903ce0b3
3
+ size 817966700
outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23/summary.json ADDED
@@ -0,0 +1,1030 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "task": "bag",
3
+ "variant": "trunk_only_ft",
4
+ "checkpoint_path": "/workspace/workspace/outputs/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23/checkpoint_best.pt",
5
+ "init_info": {
6
+ "path": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
7
+ "loaded_keys": 489,
8
+ "skipped_shape_mismatch_keys": [
9
+ "memory.scene_memory.position_embedding",
10
+ "memory.scene_memory.bank_queries",
11
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
12
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
13
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
14
+ "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
15
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.weight",
16
+ "memory.scene_memory.sequence_encoder.layers.0.linear1.bias",
17
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.weight",
18
+ "memory.scene_memory.sequence_encoder.layers.0.linear2.bias",
19
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.weight",
20
+ "memory.scene_memory.sequence_encoder.layers.0.norm1.bias",
21
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.weight",
22
+ "memory.scene_memory.sequence_encoder.layers.0.norm2.bias",
23
+ "memory.scene_memory.bank_attention.in_proj_weight",
24
+ "memory.scene_memory.bank_attention.in_proj_bias",
25
+ "memory.scene_memory.bank_attention.out_proj.weight",
26
+ "memory.scene_memory.bank_attention.out_proj.bias",
27
+ "memory.scene_memory.action_proj.0.weight",
28
+ "memory.scene_memory.action_proj.0.bias",
29
+ "memory.scene_memory.action_proj.1.weight",
30
+ "memory.scene_memory.action_proj.1.bias",
31
+ "memory.scene_memory.write_gate.0.weight",
32
+ "memory.scene_memory.write_gate.0.bias",
33
+ "memory.scene_memory.write_gate.1.weight",
34
+ "memory.scene_memory.write_gate.1.bias",
35
+ "memory.scene_memory.write_gate.3.weight",
36
+ "memory.scene_memory.write_gate.3.bias",
37
+ "memory.scene_memory.token_proj.0.weight",
38
+ "memory.scene_memory.token_proj.0.bias",
39
+ "memory.scene_memory.token_proj.1.weight",
40
+ "memory.scene_memory.token_proj.1.bias",
41
+ "memory.belief_memory.position_embedding",
42
+ "memory.belief_memory.bank_queries",
43
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight",
44
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias",
45
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight",
46
+ "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias",
47
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.weight",
48
+ "memory.belief_memory.sequence_encoder.layers.0.linear1.bias",
49
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.weight",
50
+ "memory.belief_memory.sequence_encoder.layers.0.linear2.bias",
51
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.weight",
52
+ "memory.belief_memory.sequence_encoder.layers.0.norm1.bias",
53
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.weight",
54
+ "memory.belief_memory.sequence_encoder.layers.0.norm2.bias",
55
+ "memory.belief_memory.bank_attention.in_proj_weight",
56
+ "memory.belief_memory.bank_attention.in_proj_bias",
57
+ "memory.belief_memory.bank_attention.out_proj.weight",
58
+ "memory.belief_memory.bank_attention.out_proj.bias",
59
+ "memory.belief_memory.action_proj.0.weight",
60
+ "memory.belief_memory.action_proj.0.bias",
61
+ "memory.belief_memory.action_proj.1.weight",
62
+ "memory.belief_memory.action_proj.1.bias",
63
+ "memory.belief_memory.write_gate.0.weight",
64
+ "memory.belief_memory.write_gate.0.bias",
65
+ "memory.belief_memory.write_gate.1.weight",
66
+ "memory.belief_memory.write_gate.1.bias",
67
+ "memory.belief_memory.write_gate.3.weight",
68
+ "memory.belief_memory.write_gate.3.bias",
69
+ "memory.belief_memory.token_proj.0.weight",
70
+ "memory.belief_memory.token_proj.0.bias",
71
+ "memory.belief_memory.token_proj.1.weight",
72
+ "memory.belief_memory.token_proj.1.bias",
73
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_weight",
74
+ "decoder.arm_decoder.layers.0.self_attn.in_proj_bias",
75
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.weight",
76
+ "decoder.arm_decoder.layers.0.self_attn.out_proj.bias",
77
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight",
78
+ "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias",
79
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight",
80
+ "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias",
81
+ "decoder.arm_decoder.layers.0.linear1.weight",
82
+ "decoder.arm_decoder.layers.0.linear1.bias",
83
+ "decoder.arm_decoder.layers.0.linear2.weight",
84
+ "decoder.arm_decoder.layers.0.linear2.bias",
85
+ "decoder.arm_decoder.layers.0.norm1.weight",
86
+ "decoder.arm_decoder.layers.0.norm1.bias",
87
+ "decoder.arm_decoder.layers.0.norm2.weight",
88
+ "decoder.arm_decoder.layers.0.norm2.bias",
89
+ "decoder.arm_decoder.layers.0.norm3.weight",
90
+ "decoder.arm_decoder.layers.0.norm3.bias",
91
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_weight",
92
+ "decoder.arm_decoder.layers.1.self_attn.in_proj_bias",
93
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.weight",
94
+ "decoder.arm_decoder.layers.1.self_attn.out_proj.bias",
95
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight",
96
+ "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias",
97
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight",
98
+ "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias",
99
+ "decoder.arm_decoder.layers.1.linear1.weight",
100
+ "decoder.arm_decoder.layers.1.linear1.bias",
101
+ "decoder.arm_decoder.layers.1.linear2.weight",
102
+ "decoder.arm_decoder.layers.1.linear2.bias",
103
+ "decoder.arm_decoder.layers.1.norm1.weight",
104
+ "decoder.arm_decoder.layers.1.norm1.bias",
105
+ "decoder.arm_decoder.layers.1.norm2.weight",
106
+ "decoder.arm_decoder.layers.1.norm2.bias",
107
+ "decoder.arm_decoder.layers.1.norm3.weight",
108
+ "decoder.arm_decoder.layers.1.norm3.bias",
109
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_weight",
110
+ "decoder.arm_decoder.layers.2.self_attn.in_proj_bias",
111
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.weight",
112
+ "decoder.arm_decoder.layers.2.self_attn.out_proj.bias",
113
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight",
114
+ "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias",
115
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight",
116
+ "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias",
117
+ "decoder.arm_decoder.layers.2.linear1.weight",
118
+ "decoder.arm_decoder.layers.2.linear1.bias",
119
+ "decoder.arm_decoder.layers.2.linear2.weight",
120
+ "decoder.arm_decoder.layers.2.linear2.bias",
121
+ "decoder.arm_decoder.layers.2.norm1.weight",
122
+ "decoder.arm_decoder.layers.2.norm1.bias",
123
+ "decoder.arm_decoder.layers.2.norm2.weight",
124
+ "decoder.arm_decoder.layers.2.norm2.bias",
125
+ "decoder.arm_decoder.layers.2.norm3.weight",
126
+ "decoder.arm_decoder.layers.2.norm3.bias",
127
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_weight",
128
+ "decoder.arm_decoder.layers.3.self_attn.in_proj_bias",
129
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.weight",
130
+ "decoder.arm_decoder.layers.3.self_attn.out_proj.bias",
131
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight",
132
+ "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias",
133
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight",
134
+ "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias",
135
+ "decoder.arm_decoder.layers.3.linear1.weight",
136
+ "decoder.arm_decoder.layers.3.linear1.bias",
137
+ "decoder.arm_decoder.layers.3.linear2.weight",
138
+ "decoder.arm_decoder.layers.3.linear2.bias",
139
+ "decoder.arm_decoder.layers.3.norm1.weight",
140
+ "decoder.arm_decoder.layers.3.norm1.bias",
141
+ "decoder.arm_decoder.layers.3.norm2.weight",
142
+ "decoder.arm_decoder.layers.3.norm2.bias",
143
+ "decoder.arm_decoder.layers.3.norm3.weight",
144
+ "decoder.arm_decoder.layers.3.norm3.bias",
145
+ "decoder.arm_identity.weight",
146
+ "decoder.task_embedding.weight",
147
+ "decoder.phase_adapter.weight",
148
+ "decoder.phase_adapter.bias",
149
+ "decoder.role_adapter.weight",
150
+ "decoder.role_adapter.bias",
151
+ "decoder.context_proj.0.weight",
152
+ "decoder.context_proj.0.bias",
153
+ "decoder.context_proj.1.weight",
154
+ "decoder.context_proj.1.bias",
155
+ "decoder.arm_head.0.weight",
156
+ "decoder.arm_head.0.bias",
157
+ "decoder.arm_head.1.weight",
158
+ "decoder.arm_head.1.bias",
159
+ "decoder.arm_mean.weight",
160
+ "decoder.arm_mean.bias",
161
+ "decoder.arm_log_std.weight",
162
+ "decoder.arm_log_std.bias",
163
+ "decoder.proposal_mode_head.0.weight",
164
+ "decoder.proposal_mode_head.0.bias",
165
+ "decoder.proposal_mode_head.1.weight",
166
+ "decoder.proposal_mode_head.1.bias",
167
+ "decoder.proposal_mode_head.3.weight",
168
+ "decoder.proposal_mode_head.3.bias",
169
+ "decoder.proposal_mode_embeddings.weight",
170
+ "decoder.proposal_slot_embeddings.weight",
171
+ "decoder.mode_residual_heads.0.0.weight",
172
+ "decoder.mode_residual_heads.0.0.bias",
173
+ "decoder.mode_residual_heads.0.1.weight",
174
+ "decoder.mode_residual_heads.0.1.bias",
175
+ "decoder.mode_residual_heads.0.3.weight",
176
+ "decoder.mode_residual_heads.0.3.bias",
177
+ "decoder.mode_residual_heads.1.0.weight",
178
+ "decoder.mode_residual_heads.1.0.bias",
179
+ "decoder.mode_residual_heads.1.1.weight",
180
+ "decoder.mode_residual_heads.1.1.bias",
181
+ "decoder.mode_residual_heads.1.3.weight",
182
+ "decoder.mode_residual_heads.1.3.bias",
183
+ "decoder.mode_residual_heads.2.0.weight",
184
+ "decoder.mode_residual_heads.2.0.bias",
185
+ "decoder.mode_residual_heads.2.1.weight",
186
+ "decoder.mode_residual_heads.2.1.bias",
187
+ "decoder.mode_residual_heads.2.3.weight",
188
+ "decoder.mode_residual_heads.2.3.bias",
189
+ "decoder.mode_residual_heads.3.0.weight",
190
+ "decoder.mode_residual_heads.3.0.bias",
191
+ "decoder.mode_residual_heads.3.1.weight",
192
+ "decoder.mode_residual_heads.3.1.bias",
193
+ "decoder.mode_residual_heads.3.3.weight",
194
+ "decoder.mode_residual_heads.3.3.bias",
195
+ "decoder.mode_residual_heads.4.0.weight",
196
+ "decoder.mode_residual_heads.4.0.bias",
197
+ "decoder.mode_residual_heads.4.1.weight",
198
+ "decoder.mode_residual_heads.4.1.bias",
199
+ "decoder.mode_residual_heads.4.3.weight",
200
+ "decoder.mode_residual_heads.4.3.bias",
201
+ "decoder.mode_residual_heads.5.0.weight",
202
+ "decoder.mode_residual_heads.5.0.bias",
203
+ "decoder.mode_residual_heads.5.1.weight",
204
+ "decoder.mode_residual_heads.5.1.bias",
205
+ "decoder.mode_residual_heads.5.3.weight",
206
+ "decoder.mode_residual_heads.5.3.bias",
207
+ "decoder.mode_residual_heads.6.0.weight",
208
+ "decoder.mode_residual_heads.6.0.bias",
209
+ "decoder.mode_residual_heads.6.1.weight",
210
+ "decoder.mode_residual_heads.6.1.bias",
211
+ "decoder.mode_residual_heads.6.3.weight",
212
+ "decoder.mode_residual_heads.6.3.bias",
213
+ "decoder.slot_delta.0.weight",
214
+ "decoder.slot_delta.0.bias",
215
+ "decoder.slot_delta.1.weight",
216
+ "decoder.slot_delta.1.bias",
217
+ "decoder.slot_delta.3.weight",
218
+ "decoder.slot_delta.3.bias",
219
+ "decoder.proposal_score.0.weight",
220
+ "decoder.proposal_score.0.bias",
221
+ "decoder.proposal_score.1.weight",
222
+ "decoder.proposal_score.1.bias",
223
+ "decoder.proposal_score.3.weight",
224
+ "decoder.proposal_score.3.bias",
225
+ "elastic_state_head.interaction_queries",
226
+ "elastic_state_head.interaction_attention.in_proj_weight",
227
+ "elastic_state_head.interaction_attention.in_proj_bias",
228
+ "elastic_state_head.interaction_attention.out_proj.weight",
229
+ "elastic_state_head.interaction_attention.out_proj.bias",
230
+ "elastic_state_head.interaction_mlp.0.weight",
231
+ "elastic_state_head.interaction_mlp.0.bias",
232
+ "elastic_state_head.interaction_mlp.1.weight",
233
+ "elastic_state_head.interaction_mlp.1.bias",
234
+ "elastic_state_head.interaction_mlp.3.weight",
235
+ "elastic_state_head.interaction_mlp.3.bias",
236
+ "elastic_state_head.decoder.field_queries",
237
+ "elastic_state_head.decoder.field_attention.in_proj_weight",
238
+ "elastic_state_head.decoder.field_attention.in_proj_bias",
239
+ "elastic_state_head.decoder.field_attention.out_proj.weight",
240
+ "elastic_state_head.decoder.field_attention.out_proj.bias",
241
+ "elastic_state_head.decoder.field_mlp.0.weight",
242
+ "elastic_state_head.decoder.field_mlp.0.bias",
243
+ "elastic_state_head.decoder.field_mlp.1.weight",
244
+ "elastic_state_head.decoder.field_mlp.1.bias",
245
+ "elastic_state_head.decoder.field_mlp.3.weight",
246
+ "elastic_state_head.decoder.field_mlp.3.bias",
247
+ "elastic_state_head.decoder.summary_proj.0.weight",
248
+ "elastic_state_head.decoder.summary_proj.0.bias",
249
+ "elastic_state_head.decoder.summary_proj.1.weight",
250
+ "elastic_state_head.decoder.summary_proj.1.bias",
251
+ "elastic_state_head.decoder.phase_head.0.weight",
252
+ "elastic_state_head.decoder.phase_head.0.bias",
253
+ "elastic_state_head.decoder.phase_head.1.weight",
254
+ "elastic_state_head.decoder.phase_head.1.bias",
255
+ "elastic_state_head.decoder.phase_head.3.weight",
256
+ "elastic_state_head.decoder.phase_head.3.bias",
257
+ "elastic_state_head.decoder.arm_role_head.0.weight",
258
+ "elastic_state_head.decoder.arm_role_head.0.bias",
259
+ "elastic_state_head.decoder.arm_role_head.1.weight",
260
+ "elastic_state_head.decoder.arm_role_head.1.bias",
261
+ "elastic_state_head.decoder.arm_role_head.3.weight",
262
+ "elastic_state_head.decoder.arm_role_head.3.bias",
263
+ "elastic_state_head.decoder.arm_identity.weight",
264
+ "elastic_state_head.decoder.support_mode.0.weight",
265
+ "elastic_state_head.decoder.support_mode.0.bias",
266
+ "elastic_state_head.decoder.support_mode.1.weight",
267
+ "elastic_state_head.decoder.support_mode.1.bias",
268
+ "elastic_state_head.decoder.support_mode.3.weight",
269
+ "elastic_state_head.decoder.support_mode.3.bias",
270
+ "elastic_state_head.decoder.access_field.weight",
271
+ "elastic_state_head.decoder.access_field.bias",
272
+ "elastic_state_head.decoder.target_belief_field.weight",
273
+ "elastic_state_head.decoder.target_belief_field.bias",
274
+ "elastic_state_head.decoder.visibility_field.weight",
275
+ "elastic_state_head.decoder.visibility_field.bias",
276
+ "elastic_state_head.decoder.clearance_field.weight",
277
+ "elastic_state_head.decoder.clearance_field.bias",
278
+ "elastic_state_head.decoder.occluder_contact_field.weight",
279
+ "elastic_state_head.decoder.occluder_contact_field.bias",
280
+ "elastic_state_head.decoder.grasp_affordance_field.weight",
281
+ "elastic_state_head.decoder.grasp_affordance_field.bias",
282
+ "elastic_state_head.decoder.support_stability_field.weight",
283
+ "elastic_state_head.decoder.support_stability_field.bias",
284
+ "elastic_state_head.decoder.persistence_field.weight",
285
+ "elastic_state_head.decoder.persistence_field.bias",
286
+ "elastic_state_head.decoder.reocclusion_field.weight",
287
+ "elastic_state_head.decoder.reocclusion_field.bias",
288
+ "elastic_state_head.decoder.disturbance_field.weight",
289
+ "elastic_state_head.decoder.disturbance_field.bias",
290
+ "elastic_state_head.decoder.uncertainty_field.weight",
291
+ "elastic_state_head.decoder.uncertainty_field.bias",
292
+ "elastic_state_head.decoder.reocclusion_head.0.weight",
293
+ "elastic_state_head.decoder.reocclusion_head.0.bias",
294
+ "elastic_state_head.decoder.reocclusion_head.1.weight",
295
+ "elastic_state_head.decoder.reocclusion_head.1.bias",
296
+ "elastic_state_head.decoder.reocclusion_head.3.weight",
297
+ "elastic_state_head.decoder.reocclusion_head.3.bias",
298
+ "elastic_state_head.decoder.task_embedding.weight",
299
+ "elastic_state_head.decoder.task_field_affine.weight",
300
+ "elastic_state_head.decoder.task_field_affine.bias",
301
+ "elastic_state_head.decoder.task_summary_adapter.0.weight",
302
+ "elastic_state_head.decoder.task_summary_adapter.0.bias",
303
+ "elastic_state_head.decoder.task_summary_adapter.1.weight",
304
+ "elastic_state_head.decoder.task_summary_adapter.1.bias",
305
+ "elastic_state_head.decoder.task_phase_head.weight",
306
+ "elastic_state_head.decoder.task_phase_head.bias",
307
+ "elastic_state_head.decoder.task_support_head.weight",
308
+ "elastic_state_head.decoder.task_support_head.bias",
309
+ "elastic_state_head.decoder.task_reocclusion_head.weight",
310
+ "elastic_state_head.decoder.task_reocclusion_head.bias",
311
+ "elastic_state_head.decoder.task_metric_head.0.weight",
312
+ "elastic_state_head.decoder.task_metric_head.0.bias",
313
+ "elastic_state_head.decoder.task_metric_head.1.weight",
314
+ "elastic_state_head.decoder.task_metric_head.1.bias",
315
+ "elastic_state_head.decoder.task_metric_head.3.weight",
316
+ "elastic_state_head.decoder.task_metric_head.3.bias",
317
+ "world_model.state_encoder.0.weight",
318
+ "world_model.state_encoder.0.bias",
319
+ "world_model.state_encoder.1.weight",
320
+ "world_model.state_encoder.1.bias",
321
+ "world_model.scene_memory_proj.0.weight",
322
+ "world_model.scene_memory_proj.0.bias",
323
+ "world_model.scene_memory_proj.1.weight",
324
+ "world_model.scene_memory_proj.1.bias",
325
+ "world_model.belief_memory_proj.0.weight",
326
+ "world_model.belief_memory_proj.0.bias",
327
+ "world_model.belief_memory_proj.1.weight",
328
+ "world_model.belief_memory_proj.1.bias",
329
+ "world_model.action_encoder.0.weight",
330
+ "world_model.action_encoder.0.bias",
331
+ "world_model.action_encoder.1.weight",
332
+ "world_model.action_encoder.1.bias",
333
+ "world_model.transition.weight_ih",
334
+ "world_model.transition.weight_hh",
335
+ "world_model.transition.bias_ih",
336
+ "world_model.transition.bias_hh",
337
+ "world_model.scene_memory_update.weight",
338
+ "world_model.scene_memory_update.bias",
339
+ "world_model.belief_memory_update.weight",
340
+ "world_model.belief_memory_update.bias",
341
+ "world_model.compact_decoder.weight",
342
+ "world_model.compact_decoder.bias",
343
+ "world_model.target_belief_head.weight",
344
+ "world_model.target_belief_head.bias",
345
+ "world_model.visibility_head.weight",
346
+ "world_model.visibility_head.bias",
347
+ "world_model.clearance_head.weight",
348
+ "world_model.clearance_head.bias",
349
+ "world_model.occluder_contact_head.weight",
350
+ "world_model.occluder_contact_head.bias",
351
+ "world_model.grasp_affordance_head.weight",
352
+ "world_model.grasp_affordance_head.bias",
353
+ "world_model.support_stability_head.weight",
354
+ "world_model.support_stability_head.bias",
355
+ "world_model.persistence_head.weight",
356
+ "world_model.persistence_head.bias",
357
+ "world_model.reocclusion_head.weight",
358
+ "world_model.reocclusion_head.bias",
359
+ "world_model.disturbance_head.weight",
360
+ "world_model.disturbance_head.bias",
361
+ "world_model.uncertainty_head.weight",
362
+ "world_model.uncertainty_head.bias",
363
+ "world_model.access_head.weight",
364
+ "world_model.access_head.bias",
365
+ "world_model.task_embedding.weight",
366
+ "world_model.spatial_field_encoder.0.weight",
367
+ "world_model.spatial_field_encoder.0.bias",
368
+ "world_model.spatial_field_encoder.2.weight",
369
+ "world_model.spatial_field_encoder.2.bias",
370
+ "world_model.spatial_context_proj.0.weight",
371
+ "world_model.spatial_context_proj.0.bias",
372
+ "world_model.spatial_context_proj.1.weight",
373
+ "world_model.spatial_context_proj.1.bias",
374
+ "world_model.spatial_gate_z.weight",
375
+ "world_model.spatial_gate_z.bias",
376
+ "world_model.spatial_gate_r.weight",
377
+ "world_model.spatial_gate_r.bias",
378
+ "world_model.spatial_candidate.weight",
379
+ "world_model.spatial_candidate.bias",
380
+ "world_model.spatial_summary_proj.0.weight",
381
+ "world_model.spatial_summary_proj.0.bias",
382
+ "world_model.spatial_summary_proj.1.weight",
383
+ "world_model.spatial_summary_proj.1.bias",
384
+ "world_model.spatial_phase_head.weight",
385
+ "world_model.spatial_phase_head.bias",
386
+ "world_model.spatial_support_mode_head.weight",
387
+ "world_model.spatial_support_mode_head.bias",
388
+ "world_model.spatial_arm_role_head.weight",
389
+ "world_model.spatial_arm_role_head.bias",
390
+ "world_model.spatial_reocclusion_head.weight",
391
+ "world_model.spatial_reocclusion_head.bias",
392
+ "world_model.spatial_target_belief_head.weight",
393
+ "world_model.spatial_target_belief_head.bias",
394
+ "world_model.spatial_visibility_head.weight",
395
+ "world_model.spatial_visibility_head.bias",
396
+ "world_model.spatial_clearance_head.weight",
397
+ "world_model.spatial_clearance_head.bias",
398
+ "world_model.spatial_occluder_contact_head.weight",
399
+ "world_model.spatial_occluder_contact_head.bias",
400
+ "world_model.spatial_grasp_affordance_head.weight",
401
+ "world_model.spatial_grasp_affordance_head.bias",
402
+ "world_model.spatial_support_stability_head.weight",
403
+ "world_model.spatial_support_stability_head.bias",
404
+ "world_model.spatial_persistence_head.weight",
405
+ "world_model.spatial_persistence_head.bias",
406
+ "world_model.spatial_reocclusion_field_head.weight",
407
+ "world_model.spatial_reocclusion_field_head.bias",
408
+ "world_model.spatial_disturbance_head.weight",
409
+ "world_model.spatial_disturbance_head.bias",
410
+ "world_model.spatial_uncertainty_head.weight",
411
+ "world_model.spatial_uncertainty_head.bias",
412
+ "world_model.spatial_access_head.weight",
413
+ "world_model.spatial_access_head.bias",
414
+ "planner.residual.trunk.0.weight",
415
+ "planner.residual.trunk.0.bias",
416
+ "planner.residual.trunk.1.weight",
417
+ "planner.residual.trunk.1.bias",
418
+ "planner.residual.trunk.3.weight",
419
+ "planner.residual.trunk.3.bias",
420
+ "planner.residual.success_head.weight",
421
+ "planner.residual.success_head.bias",
422
+ "planner.residual.risk_head.weight",
423
+ "planner.residual.risk_head.bias",
424
+ "planner.residual.residual_head.weight",
425
+ "planner.residual.residual_head.bias"
426
+ ],
427
+ "remapped_keys": {},
428
+ "missing_keys": [
429
+ "memory.gru.weight_ih_l0",
430
+ "memory.gru.weight_hh_l0",
431
+ "memory.gru.bias_ih_l0",
432
+ "memory.gru.bias_hh_l0",
433
+ "memory.gru.weight_ih_l1",
434
+ "memory.gru.weight_hh_l1",
435
+ "memory.gru.bias_ih_l1",
436
+ "memory.gru.bias_hh_l1",
437
+ "memory.token_proj.0.weight",
438
+ "memory.token_proj.0.bias",
439
+ "memory.token_proj.1.weight",
440
+ "memory.token_proj.1.bias",
441
+ "memory.action_proj.0.weight",
442
+ "memory.action_proj.0.bias",
443
+ "memory.action_proj.1.weight",
444
+ "memory.action_proj.1.bias",
445
+ "decoder.actor_role_bias",
446
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
447
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
448
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
449
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
450
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
451
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
452
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
453
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
454
+ "decoder.revealer_decoder.layers.0.linear1.weight",
455
+ "decoder.revealer_decoder.layers.0.linear1.bias",
456
+ "decoder.revealer_decoder.layers.0.linear2.weight",
457
+ "decoder.revealer_decoder.layers.0.linear2.bias",
458
+ "decoder.revealer_decoder.layers.0.norm1.weight",
459
+ "decoder.revealer_decoder.layers.0.norm1.bias",
460
+ "decoder.revealer_decoder.layers.0.norm2.weight",
461
+ "decoder.revealer_decoder.layers.0.norm2.bias",
462
+ "decoder.revealer_decoder.layers.0.norm3.weight",
463
+ "decoder.revealer_decoder.layers.0.norm3.bias",
464
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
465
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
466
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
467
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
468
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
469
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
470
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
471
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
472
+ "decoder.revealer_decoder.layers.1.linear1.weight",
473
+ "decoder.revealer_decoder.layers.1.linear1.bias",
474
+ "decoder.revealer_decoder.layers.1.linear2.weight",
475
+ "decoder.revealer_decoder.layers.1.linear2.bias",
476
+ "decoder.revealer_decoder.layers.1.norm1.weight",
477
+ "decoder.revealer_decoder.layers.1.norm1.bias",
478
+ "decoder.revealer_decoder.layers.1.norm2.weight",
479
+ "decoder.revealer_decoder.layers.1.norm2.bias",
480
+ "decoder.revealer_decoder.layers.1.norm3.weight",
481
+ "decoder.revealer_decoder.layers.1.norm3.bias",
482
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
483
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
484
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
485
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
486
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
487
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
488
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
489
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
490
+ "decoder.revealer_decoder.layers.2.linear1.weight",
491
+ "decoder.revealer_decoder.layers.2.linear1.bias",
492
+ "decoder.revealer_decoder.layers.2.linear2.weight",
493
+ "decoder.revealer_decoder.layers.2.linear2.bias",
494
+ "decoder.revealer_decoder.layers.2.norm1.weight",
495
+ "decoder.revealer_decoder.layers.2.norm1.bias",
496
+ "decoder.revealer_decoder.layers.2.norm2.weight",
497
+ "decoder.revealer_decoder.layers.2.norm2.bias",
498
+ "decoder.revealer_decoder.layers.2.norm3.weight",
499
+ "decoder.revealer_decoder.layers.2.norm3.bias",
500
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
501
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
502
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
503
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
504
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
505
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
506
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
507
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
508
+ "decoder.revealer_decoder.layers.3.linear1.weight",
509
+ "decoder.revealer_decoder.layers.3.linear1.bias",
510
+ "decoder.revealer_decoder.layers.3.linear2.weight",
511
+ "decoder.revealer_decoder.layers.3.linear2.bias",
512
+ "decoder.revealer_decoder.layers.3.norm1.weight",
513
+ "decoder.revealer_decoder.layers.3.norm1.bias",
514
+ "decoder.revealer_decoder.layers.3.norm2.weight",
515
+ "decoder.revealer_decoder.layers.3.norm2.bias",
516
+ "decoder.revealer_decoder.layers.3.norm3.weight",
517
+ "decoder.revealer_decoder.layers.3.norm3.bias",
518
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
519
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
520
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
521
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
522
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
523
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
524
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
525
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
526
+ "decoder.actor_decoder.layers.0.linear1.weight",
527
+ "decoder.actor_decoder.layers.0.linear1.bias",
528
+ "decoder.actor_decoder.layers.0.linear2.weight",
529
+ "decoder.actor_decoder.layers.0.linear2.bias",
530
+ "decoder.actor_decoder.layers.0.norm1.weight",
531
+ "decoder.actor_decoder.layers.0.norm1.bias",
532
+ "decoder.actor_decoder.layers.0.norm2.weight",
533
+ "decoder.actor_decoder.layers.0.norm2.bias",
534
+ "decoder.actor_decoder.layers.0.norm3.weight",
535
+ "decoder.actor_decoder.layers.0.norm3.bias",
536
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
537
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
538
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
539
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
540
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
541
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
542
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
543
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
544
+ "decoder.actor_decoder.layers.1.linear1.weight",
545
+ "decoder.actor_decoder.layers.1.linear1.bias",
546
+ "decoder.actor_decoder.layers.1.linear2.weight",
547
+ "decoder.actor_decoder.layers.1.linear2.bias",
548
+ "decoder.actor_decoder.layers.1.norm1.weight",
549
+ "decoder.actor_decoder.layers.1.norm1.bias",
550
+ "decoder.actor_decoder.layers.1.norm2.weight",
551
+ "decoder.actor_decoder.layers.1.norm2.bias",
552
+ "decoder.actor_decoder.layers.1.norm3.weight",
553
+ "decoder.actor_decoder.layers.1.norm3.bias",
554
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
555
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
556
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
557
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
558
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
559
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
560
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
561
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
562
+ "decoder.actor_decoder.layers.2.linear1.weight",
563
+ "decoder.actor_decoder.layers.2.linear1.bias",
564
+ "decoder.actor_decoder.layers.2.linear2.weight",
565
+ "decoder.actor_decoder.layers.2.linear2.bias",
566
+ "decoder.actor_decoder.layers.2.norm1.weight",
567
+ "decoder.actor_decoder.layers.2.norm1.bias",
568
+ "decoder.actor_decoder.layers.2.norm2.weight",
569
+ "decoder.actor_decoder.layers.2.norm2.bias",
570
+ "decoder.actor_decoder.layers.2.norm3.weight",
571
+ "decoder.actor_decoder.layers.2.norm3.bias",
572
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
573
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
574
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
575
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
576
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
577
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
578
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
579
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
580
+ "decoder.actor_decoder.layers.3.linear1.weight",
581
+ "decoder.actor_decoder.layers.3.linear1.bias",
582
+ "decoder.actor_decoder.layers.3.linear2.weight",
583
+ "decoder.actor_decoder.layers.3.linear2.bias",
584
+ "decoder.actor_decoder.layers.3.norm1.weight",
585
+ "decoder.actor_decoder.layers.3.norm1.bias",
586
+ "decoder.actor_decoder.layers.3.norm2.weight",
587
+ "decoder.actor_decoder.layers.3.norm2.bias",
588
+ "decoder.actor_decoder.layers.3.norm3.weight",
589
+ "decoder.actor_decoder.layers.3.norm3.bias",
590
+ "decoder.revealer_mean.weight",
591
+ "decoder.revealer_mean.bias",
592
+ "decoder.revealer_log_std.weight",
593
+ "decoder.revealer_log_std.bias",
594
+ "decoder.actor_mean.weight",
595
+ "decoder.actor_mean.bias",
596
+ "decoder.actor_log_std.weight",
597
+ "decoder.actor_log_std.bias",
598
+ "decoder.proposal_score.0.weight",
599
+ "decoder.proposal_score.0.bias",
600
+ "decoder.proposal_score.1.weight",
601
+ "decoder.proposal_score.1.bias"
602
+ ],
603
+ "unexpected_keys": []
604
+ },
605
+ "trainable_parameter_names": [
606
+ "fusion.camera_embedding.weight",
607
+ "fusion.cross_view_transformer.layers.0.self_attn.in_proj_weight",
608
+ "fusion.cross_view_transformer.layers.0.self_attn.in_proj_bias",
609
+ "fusion.cross_view_transformer.layers.0.self_attn.out_proj.weight",
610
+ "fusion.cross_view_transformer.layers.0.self_attn.out_proj.bias",
611
+ "fusion.cross_view_transformer.layers.0.linear1.weight",
612
+ "fusion.cross_view_transformer.layers.0.linear1.bias",
613
+ "fusion.cross_view_transformer.layers.0.linear2.weight",
614
+ "fusion.cross_view_transformer.layers.0.linear2.bias",
615
+ "fusion.cross_view_transformer.layers.0.norm1.weight",
616
+ "fusion.cross_view_transformer.layers.0.norm1.bias",
617
+ "fusion.cross_view_transformer.layers.0.norm2.weight",
618
+ "fusion.cross_view_transformer.layers.0.norm2.bias",
619
+ "fusion.cross_view_transformer.layers.1.self_attn.in_proj_weight",
620
+ "fusion.cross_view_transformer.layers.1.self_attn.in_proj_bias",
621
+ "fusion.cross_view_transformer.layers.1.self_attn.out_proj.weight",
622
+ "fusion.cross_view_transformer.layers.1.self_attn.out_proj.bias",
623
+ "fusion.cross_view_transformer.layers.1.linear1.weight",
624
+ "fusion.cross_view_transformer.layers.1.linear1.bias",
625
+ "fusion.cross_view_transformer.layers.1.linear2.weight",
626
+ "fusion.cross_view_transformer.layers.1.linear2.bias",
627
+ "fusion.cross_view_transformer.layers.1.norm1.weight",
628
+ "fusion.cross_view_transformer.layers.1.norm1.bias",
629
+ "fusion.cross_view_transformer.layers.1.norm2.weight",
630
+ "fusion.cross_view_transformer.layers.1.norm2.bias",
631
+ "fusion.cross_view_transformer.layers.2.self_attn.in_proj_weight",
632
+ "fusion.cross_view_transformer.layers.2.self_attn.in_proj_bias",
633
+ "fusion.cross_view_transformer.layers.2.self_attn.out_proj.weight",
634
+ "fusion.cross_view_transformer.layers.2.self_attn.out_proj.bias",
635
+ "fusion.cross_view_transformer.layers.2.linear1.weight",
636
+ "fusion.cross_view_transformer.layers.2.linear1.bias",
637
+ "fusion.cross_view_transformer.layers.2.linear2.weight",
638
+ "fusion.cross_view_transformer.layers.2.linear2.bias",
639
+ "fusion.cross_view_transformer.layers.2.norm1.weight",
640
+ "fusion.cross_view_transformer.layers.2.norm1.bias",
641
+ "fusion.cross_view_transformer.layers.2.norm2.weight",
642
+ "fusion.cross_view_transformer.layers.2.norm2.bias",
643
+ "fusion.cross_view_transformer.layers.3.self_attn.in_proj_weight",
644
+ "fusion.cross_view_transformer.layers.3.self_attn.in_proj_bias",
645
+ "fusion.cross_view_transformer.layers.3.self_attn.out_proj.weight",
646
+ "fusion.cross_view_transformer.layers.3.self_attn.out_proj.bias",
647
+ "fusion.cross_view_transformer.layers.3.linear1.weight",
648
+ "fusion.cross_view_transformer.layers.3.linear1.bias",
649
+ "fusion.cross_view_transformer.layers.3.linear2.weight",
650
+ "fusion.cross_view_transformer.layers.3.linear2.bias",
651
+ "fusion.cross_view_transformer.layers.3.norm1.weight",
652
+ "fusion.cross_view_transformer.layers.3.norm1.bias",
653
+ "fusion.cross_view_transformer.layers.3.norm2.weight",
654
+ "fusion.cross_view_transformer.layers.3.norm2.bias",
655
+ "fusion.geometry_fusion.attn.in_proj_weight",
656
+ "fusion.geometry_fusion.attn.in_proj_bias",
657
+ "fusion.geometry_fusion.attn.out_proj.weight",
658
+ "fusion.geometry_fusion.attn.out_proj.bias",
659
+ "fusion.geometry_fusion.gate.0.weight",
660
+ "fusion.geometry_fusion.gate.0.bias",
661
+ "fusion.geometry_fusion.gate.1.weight",
662
+ "fusion.geometry_fusion.gate.1.bias",
663
+ "fusion.geometry_fusion.gate.3.weight",
664
+ "fusion.geometry_fusion.gate.3.bias",
665
+ "fusion.geometry_fusion.out.0.weight",
666
+ "fusion.geometry_fusion.out.0.bias",
667
+ "fusion.geometry_fusion.out.1.weight",
668
+ "fusion.geometry_fusion.out.1.bias",
669
+ "fusion.proprio_adapter.0.weight",
670
+ "fusion.proprio_adapter.0.bias",
671
+ "fusion.proprio_adapter.1.weight",
672
+ "fusion.proprio_adapter.1.bias",
673
+ "memory.gru.weight_ih_l0",
674
+ "memory.gru.weight_hh_l0",
675
+ "memory.gru.bias_ih_l0",
676
+ "memory.gru.bias_hh_l0",
677
+ "memory.gru.weight_ih_l1",
678
+ "memory.gru.weight_hh_l1",
679
+ "memory.gru.bias_ih_l1",
680
+ "memory.gru.bias_hh_l1",
681
+ "memory.token_proj.0.weight",
682
+ "memory.token_proj.0.bias",
683
+ "memory.token_proj.1.weight",
684
+ "memory.token_proj.1.bias",
685
+ "memory.action_proj.0.weight",
686
+ "memory.action_proj.0.bias",
687
+ "memory.action_proj.1.weight",
688
+ "memory.action_proj.1.bias",
689
+ "memory.uncertainty_head.0.weight",
690
+ "memory.uncertainty_head.0.bias",
691
+ "memory.uncertainty_head.1.weight",
692
+ "memory.uncertainty_head.1.bias",
693
+ "decoder.actor_role_bias",
694
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_weight",
695
+ "decoder.revealer_decoder.layers.0.self_attn.in_proj_bias",
696
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.weight",
697
+ "decoder.revealer_decoder.layers.0.self_attn.out_proj.bias",
698
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight",
699
+ "decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias",
700
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight",
701
+ "decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias",
702
+ "decoder.revealer_decoder.layers.0.linear1.weight",
703
+ "decoder.revealer_decoder.layers.0.linear1.bias",
704
+ "decoder.revealer_decoder.layers.0.linear2.weight",
705
+ "decoder.revealer_decoder.layers.0.linear2.bias",
706
+ "decoder.revealer_decoder.layers.0.norm1.weight",
707
+ "decoder.revealer_decoder.layers.0.norm1.bias",
708
+ "decoder.revealer_decoder.layers.0.norm2.weight",
709
+ "decoder.revealer_decoder.layers.0.norm2.bias",
710
+ "decoder.revealer_decoder.layers.0.norm3.weight",
711
+ "decoder.revealer_decoder.layers.0.norm3.bias",
712
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_weight",
713
+ "decoder.revealer_decoder.layers.1.self_attn.in_proj_bias",
714
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.weight",
715
+ "decoder.revealer_decoder.layers.1.self_attn.out_proj.bias",
716
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight",
717
+ "decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias",
718
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight",
719
+ "decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias",
720
+ "decoder.revealer_decoder.layers.1.linear1.weight",
721
+ "decoder.revealer_decoder.layers.1.linear1.bias",
722
+ "decoder.revealer_decoder.layers.1.linear2.weight",
723
+ "decoder.revealer_decoder.layers.1.linear2.bias",
724
+ "decoder.revealer_decoder.layers.1.norm1.weight",
725
+ "decoder.revealer_decoder.layers.1.norm1.bias",
726
+ "decoder.revealer_decoder.layers.1.norm2.weight",
727
+ "decoder.revealer_decoder.layers.1.norm2.bias",
728
+ "decoder.revealer_decoder.layers.1.norm3.weight",
729
+ "decoder.revealer_decoder.layers.1.norm3.bias",
730
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_weight",
731
+ "decoder.revealer_decoder.layers.2.self_attn.in_proj_bias",
732
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.weight",
733
+ "decoder.revealer_decoder.layers.2.self_attn.out_proj.bias",
734
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight",
735
+ "decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias",
736
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight",
737
+ "decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias",
738
+ "decoder.revealer_decoder.layers.2.linear1.weight",
739
+ "decoder.revealer_decoder.layers.2.linear1.bias",
740
+ "decoder.revealer_decoder.layers.2.linear2.weight",
741
+ "decoder.revealer_decoder.layers.2.linear2.bias",
742
+ "decoder.revealer_decoder.layers.2.norm1.weight",
743
+ "decoder.revealer_decoder.layers.2.norm1.bias",
744
+ "decoder.revealer_decoder.layers.2.norm2.weight",
745
+ "decoder.revealer_decoder.layers.2.norm2.bias",
746
+ "decoder.revealer_decoder.layers.2.norm3.weight",
747
+ "decoder.revealer_decoder.layers.2.norm3.bias",
748
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_weight",
749
+ "decoder.revealer_decoder.layers.3.self_attn.in_proj_bias",
750
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.weight",
751
+ "decoder.revealer_decoder.layers.3.self_attn.out_proj.bias",
752
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight",
753
+ "decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias",
754
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight",
755
+ "decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias",
756
+ "decoder.revealer_decoder.layers.3.linear1.weight",
757
+ "decoder.revealer_decoder.layers.3.linear1.bias",
758
+ "decoder.revealer_decoder.layers.3.linear2.weight",
759
+ "decoder.revealer_decoder.layers.3.linear2.bias",
760
+ "decoder.revealer_decoder.layers.3.norm1.weight",
761
+ "decoder.revealer_decoder.layers.3.norm1.bias",
762
+ "decoder.revealer_decoder.layers.3.norm2.weight",
763
+ "decoder.revealer_decoder.layers.3.norm2.bias",
764
+ "decoder.revealer_decoder.layers.3.norm3.weight",
765
+ "decoder.revealer_decoder.layers.3.norm3.bias",
766
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_weight",
767
+ "decoder.actor_decoder.layers.0.self_attn.in_proj_bias",
768
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.weight",
769
+ "decoder.actor_decoder.layers.0.self_attn.out_proj.bias",
770
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight",
771
+ "decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias",
772
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight",
773
+ "decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias",
774
+ "decoder.actor_decoder.layers.0.linear1.weight",
775
+ "decoder.actor_decoder.layers.0.linear1.bias",
776
+ "decoder.actor_decoder.layers.0.linear2.weight",
777
+ "decoder.actor_decoder.layers.0.linear2.bias",
778
+ "decoder.actor_decoder.layers.0.norm1.weight",
779
+ "decoder.actor_decoder.layers.0.norm1.bias",
780
+ "decoder.actor_decoder.layers.0.norm2.weight",
781
+ "decoder.actor_decoder.layers.0.norm2.bias",
782
+ "decoder.actor_decoder.layers.0.norm3.weight",
783
+ "decoder.actor_decoder.layers.0.norm3.bias",
784
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_weight",
785
+ "decoder.actor_decoder.layers.1.self_attn.in_proj_bias",
786
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.weight",
787
+ "decoder.actor_decoder.layers.1.self_attn.out_proj.bias",
788
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight",
789
+ "decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias",
790
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight",
791
+ "decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias",
792
+ "decoder.actor_decoder.layers.1.linear1.weight",
793
+ "decoder.actor_decoder.layers.1.linear1.bias",
794
+ "decoder.actor_decoder.layers.1.linear2.weight",
795
+ "decoder.actor_decoder.layers.1.linear2.bias",
796
+ "decoder.actor_decoder.layers.1.norm1.weight",
797
+ "decoder.actor_decoder.layers.1.norm1.bias",
798
+ "decoder.actor_decoder.layers.1.norm2.weight",
799
+ "decoder.actor_decoder.layers.1.norm2.bias",
800
+ "decoder.actor_decoder.layers.1.norm3.weight",
801
+ "decoder.actor_decoder.layers.1.norm3.bias",
802
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_weight",
803
+ "decoder.actor_decoder.layers.2.self_attn.in_proj_bias",
804
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.weight",
805
+ "decoder.actor_decoder.layers.2.self_attn.out_proj.bias",
806
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight",
807
+ "decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias",
808
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight",
809
+ "decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias",
810
+ "decoder.actor_decoder.layers.2.linear1.weight",
811
+ "decoder.actor_decoder.layers.2.linear1.bias",
812
+ "decoder.actor_decoder.layers.2.linear2.weight",
813
+ "decoder.actor_decoder.layers.2.linear2.bias",
814
+ "decoder.actor_decoder.layers.2.norm1.weight",
815
+ "decoder.actor_decoder.layers.2.norm1.bias",
816
+ "decoder.actor_decoder.layers.2.norm2.weight",
817
+ "decoder.actor_decoder.layers.2.norm2.bias",
818
+ "decoder.actor_decoder.layers.2.norm3.weight",
819
+ "decoder.actor_decoder.layers.2.norm3.bias",
820
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_weight",
821
+ "decoder.actor_decoder.layers.3.self_attn.in_proj_bias",
822
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.weight",
823
+ "decoder.actor_decoder.layers.3.self_attn.out_proj.bias",
824
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight",
825
+ "decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias",
826
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight",
827
+ "decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias",
828
+ "decoder.actor_decoder.layers.3.linear1.weight",
829
+ "decoder.actor_decoder.layers.3.linear1.bias",
830
+ "decoder.actor_decoder.layers.3.linear2.weight",
831
+ "decoder.actor_decoder.layers.3.linear2.bias",
832
+ "decoder.actor_decoder.layers.3.norm1.weight",
833
+ "decoder.actor_decoder.layers.3.norm1.bias",
834
+ "decoder.actor_decoder.layers.3.norm2.weight",
835
+ "decoder.actor_decoder.layers.3.norm2.bias",
836
+ "decoder.actor_decoder.layers.3.norm3.weight",
837
+ "decoder.actor_decoder.layers.3.norm3.bias",
838
+ "decoder.query_embed.weight",
839
+ "decoder.revealer_mean.weight",
840
+ "decoder.revealer_mean.bias",
841
+ "decoder.revealer_log_std.weight",
842
+ "decoder.revealer_log_std.bias",
843
+ "decoder.actor_mean.weight",
844
+ "decoder.actor_mean.bias",
845
+ "decoder.actor_log_std.weight",
846
+ "decoder.actor_log_std.bias",
847
+ "decoder.coordination.0.weight",
848
+ "decoder.coordination.0.bias",
849
+ "decoder.coordination.1.weight",
850
+ "decoder.coordination.1.bias",
851
+ "decoder.coordination.3.weight",
852
+ "decoder.coordination.3.bias",
853
+ "decoder.proposal_score.0.weight",
854
+ "decoder.proposal_score.0.bias",
855
+ "decoder.proposal_score.1.weight",
856
+ "decoder.proposal_score.1.bias"
857
+ ],
858
+ "best_val_total": 0.33879721803324564,
859
+ "history": [
860
+ {
861
+ "epoch": 0,
862
+ "train": {
863
+ "action": 0.6207457804366162,
864
+ "distillation": 0.0,
865
+ "gate": 0.0,
866
+ "planner_ranking": 0.0,
867
+ "planner_risk": 0.0,
868
+ "planner_success": 0.0,
869
+ "proposal_diversity": 0.0,
870
+ "proposal_mode": 0.0,
871
+ "proposal_ranking": 1.210943855737385,
872
+ "proposal_reconstruction": 0.0,
873
+ "proposal_success": 0.7382262051105499,
874
+ "role_swap_consistency": 0.0,
875
+ "total": 0.8909744981088137,
876
+ "transition": 0.0,
877
+ "world_model": 0.0
878
+ },
879
+ "val": {
880
+ "action": 0.19714589416980743,
881
+ "distillation": 0.0,
882
+ "gate": 0.0,
883
+ "planner_ranking": 0.0,
884
+ "planner_risk": 0.0,
885
+ "planner_success": 0.0,
886
+ "proposal_diversity": 0.0,
887
+ "proposal_mode": 0.0,
888
+ "proposal_ranking": 1.1202252422060286,
889
+ "proposal_reconstruction": 0.0,
890
+ "proposal_success": 0.6674523949623108,
891
+ "role_swap_consistency": 0.0,
892
+ "total": 0.44527397411210196,
893
+ "transition": 0.0,
894
+ "world_model": 0.0
895
+ }
896
+ },
897
+ {
898
+ "epoch": 1,
899
+ "train": {
900
+ "action": 0.17574271442074524,
901
+ "distillation": 0.0,
902
+ "gate": 0.0,
903
+ "planner_ranking": 0.0,
904
+ "planner_risk": 0.0,
905
+ "planner_success": 0.0,
906
+ "proposal_diversity": 0.0,
907
+ "proposal_mode": 0.0,
908
+ "proposal_ranking": 1.2226158066799766,
909
+ "proposal_reconstruction": 0.0,
910
+ "proposal_success": 0.7345987056431017,
911
+ "role_swap_consistency": 0.0,
912
+ "total": 0.44728693836613703,
913
+ "transition": 0.0,
914
+ "world_model": 0.0
915
+ },
916
+ "val": {
917
+ "action": 0.1045121390904699,
918
+ "distillation": 0.0,
919
+ "gate": 0.0,
920
+ "planner_ranking": 0.0,
921
+ "planner_risk": 0.0,
922
+ "planner_success": 0.0,
923
+ "proposal_diversity": 0.0,
924
+ "proposal_mode": 0.0,
925
+ "proposal_ranking": 1.2785813467843192,
926
+ "proposal_reconstruction": 0.0,
927
+ "proposal_success": 0.7802676217896598,
928
+ "role_swap_consistency": 0.0,
929
+ "total": 0.3899314616407667,
930
+ "transition": 0.0,
931
+ "world_model": 0.0
932
+ }
933
+ },
934
+ {
935
+ "epoch": 2,
936
+ "train": {
937
+ "action": 0.18294762859219,
938
+ "distillation": 0.0,
939
+ "gate": 0.0,
940
+ "planner_ranking": 0.0,
941
+ "planner_risk": 0.0,
942
+ "planner_success": 0.0,
943
+ "proposal_diversity": 0.0,
944
+ "proposal_mode": 0.0,
945
+ "proposal_ranking": 1.2462087493193776,
946
+ "proposal_reconstruction": 0.0,
947
+ "proposal_success": 0.7633898587603318,
948
+ "role_swap_consistency": 0.0,
949
+ "total": 0.46148573254284103,
950
+ "transition": 0.0,
951
+ "world_model": 0.0
952
+ },
953
+ "val": {
954
+ "action": 0.10954179934092931,
955
+ "distillation": 0.0,
956
+ "gate": 0.0,
957
+ "planner_ranking": 0.0,
958
+ "planner_risk": 0.0,
959
+ "planner_success": 0.0,
960
+ "proposal_diversity": 0.0,
961
+ "proposal_mode": 0.0,
962
+ "proposal_ranking": 1.1277755498886108,
963
+ "proposal_reconstruction": 0.0,
964
+ "proposal_success": 0.6412685768944877,
965
+ "role_swap_consistency": 0.0,
966
+ "total": 0.355660366160529,
967
+ "transition": 0.0,
968
+ "world_model": 0.0
969
+ }
970
+ },
971
+ {
972
+ "epoch": 3,
973
+ "train": {
974
+ "action": 0.175935955031922,
975
+ "distillation": 0.0,
976
+ "gate": 0.0,
977
+ "planner_ranking": 0.0,
978
+ "planner_risk": 0.0,
979
+ "planner_success": 0.0,
980
+ "proposal_diversity": 0.0,
981
+ "proposal_mode": 0.0,
982
+ "proposal_ranking": 1.230568854432357,
983
+ "proposal_reconstruction": 0.0,
984
+ "proposal_success": 0.7617053389549255,
985
+ "role_swap_consistency": 0.0,
986
+ "total": 0.4519259302239669,
987
+ "transition": 0.0,
988
+ "world_model": 0.0
989
+ },
990
+ "val": {
991
+ "action": 0.0885860047170094,
992
+ "distillation": 0.0,
993
+ "gate": 0.0,
994
+ "planner_ranking": 0.0,
995
+ "planner_risk": 0.0,
996
+ "planner_success": 0.0,
997
+ "proposal_diversity": 0.0,
998
+ "proposal_mode": 0.0,
999
+ "proposal_ranking": 1.1679236718586512,
1000
+ "proposal_reconstruction": 0.0,
1001
+ "proposal_success": 0.6251888232571738,
1002
+ "role_swap_consistency": 0.0,
1003
+ "total": 0.33879721803324564,
1004
+ "transition": 0.0,
1005
+ "world_model": 0.0
1006
+ }
1007
+ }
1008
+ ],
1009
+ "train_spec": {
1010
+ "track_id": "bag_track",
1011
+ "suite": "maniskill3",
1012
+ "benchmark_task": "PutEggplantInBasketRetrievalProxy-v1",
1013
+ "model_variant": "trunk_only_ft",
1014
+ "seed": 23,
1015
+ "train_demos": 32,
1016
+ "val_demos": 8,
1017
+ "init_checkpoint_group": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt",
1018
+ "optimizer": "adamw",
1019
+ "learning_rate": 0.0001,
1020
+ "lr_schedule": "constant",
1021
+ "batch_size": 4,
1022
+ "augmentations": "none",
1023
+ "early_stopping_metric": "val_total",
1024
+ "max_gradient_steps": 114,
1025
+ "unfreeze_scope": "fusion_memory_decoder",
1026
+ "dataset_split_id": "bag_bridge_smoke_v1_dataset_seed17",
1027
+ "same_data_policy": true,
1028
+ "same_init_policy": true
1029
+ }
1030
+ }
outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed17/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9927c95e5aea8dff34444bf04db80491212579a22df8a56e568851cdbcba243
3
+ size 878859044
outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed17/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed23/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee11fa2e4f3d467201da345a1da4752a563c87337c7c6183ede39c3246319302
3
+ size 878859044
outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed23/summary.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/maniskill_cloth_bridge_smoke_v1/adapter_active_ft_seed29/checkpoint_best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd061ab8d90059a771b4b8079f0ccb9884b1b30a95a895b7276331c35a0f016
3
+ size 878859044