Tr0612 commited on
Commit
0346a87
·
1 Parent(s): f1cb62b

Add files using upload-large-folder tool

Browse files
Files changed (30) hide show
  1. checkpoints_stage2_moe_full_peg_only/architecture.txt +10 -0
  2. checkpoints_stage2_moe_full_peg_only/moe_router_entropy.csv +17 -0
  3. checkpoints_stage2_moe_full_peg_only/moe_router_weights.csv +65 -0
  4. checkpoints_stage2_moe_full_peg_only/tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0 +3 -0
  5. checkpoints_stage2_moe_full_peg_only/train_config.json +59 -0
  6. checkpoints_stage2_moe_full_unfreeze/action_mean.npy +3 -0
  7. checkpoints_stage2_moe_full_unfreeze/action_std.npy +3 -0
  8. checkpoints_stage2_moe_full_unfreeze/architecture.txt +10 -0
  9. checkpoints_stage2_moe_full_unfreeze/best.pt +3 -0
  10. checkpoints_stage2_moe_full_unfreeze/latest.pt +3 -0
  11. checkpoints_stage2_moe_full_unfreeze/moe_router_entropy.csv +26 -0
  12. checkpoints_stage2_moe_full_unfreeze/moe_router_weights.csv +101 -0
  13. checkpoints_stage2_moe_full_unfreeze/tensorboard/events.out.tfevents.1776640393.praise-5080.2198461.0 +3 -0
  14. checkpoints_stage2_moe_full_unfreeze/train_config.json +59 -0
  15. checkpoints_stage2_moe_text_unfreeze/action_mean.npy +3 -0
  16. checkpoints_stage2_moe_text_unfreeze/action_std.npy +3 -0
  17. checkpoints_stage2_moe_text_unfreeze/architecture.txt +10 -0
  18. checkpoints_stage2_moe_text_unfreeze/best.pt +3 -0
  19. checkpoints_stage2_moe_text_unfreeze/latest.pt +3 -0
  20. checkpoints_stage2_moe_text_unfreeze/moe_router_entropy.csv +21 -0
  21. checkpoints_stage2_moe_text_unfreeze/moe_router_weights.csv +81 -0
  22. checkpoints_stage2_moe_text_unfreeze/tensorboard/events.out.tfevents.1776638724.praise-5080.2184587.0 +3 -0
  23. checkpoints_stage2_moe_text_unfreeze/train_config.json +59 -0
  24. checkpoints_stage2_no_moe_unfreeze/action_mean.npy +3 -0
  25. checkpoints_stage2_no_moe_unfreeze/action_std.npy +3 -0
  26. checkpoints_stage2_no_moe_unfreeze/architecture.txt +10 -0
  27. checkpoints_stage2_no_moe_unfreeze/best.pt +3 -0
  28. checkpoints_stage2_no_moe_unfreeze/latest.pt +3 -0
  29. checkpoints_stage2_no_moe_unfreeze/tensorboard/events.out.tfevents.1776637048.praise-5080.2170321.0 +3 -0
  30. checkpoints_stage2_no_moe_unfreeze/train_config.json +59 -0
checkpoints_stage2_moe_full_peg_only/architecture.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_tag: moe_full
2
+ task: peg-insert-side-v3
3
+ config: experiments/moe_full_unfreeze.yaml
4
+ action_head_type: moe
5
+ router_condition: action_input
6
+ freeze_vision: true
7
+ freeze_text: true
8
+ unfreeze_vision_last_n_layers: 2
9
+ unfreeze_text_last_n_layers: 2
10
+ act_chunk_size: n/a
checkpoints_stage2_moe_full_peg_only/moe_router_entropy.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,mean_router_entropy,sample_count
2
+ 1,short_metaworld,1.0972516559274759,4250
3
+ 2,short_metaworld,0.868278188122546,4250
4
+ 3,short_metaworld,0.8644592178498998,4250
5
+ 4,short_metaworld,0.8801690644621849,4250
6
+ 5,short_metaworld,0.9024338802160585,4250
7
+ 6,short_metaworld,0.9231361929274657,4250
8
+ 7,short_metaworld,0.9505329890706959,4250
9
+ 8,short_metaworld,0.9680941077444484,4250
10
+ 9,short_metaworld,0.9897653412012493,4250
11
+ 10,short_metaworld,1.0097954931697424,4250
12
+ 11,short_metaworld,1.019337093510172,4250
13
+ 12,short_metaworld,1.0326422810493148,4250
14
+ 13,short_metaworld,1.0416921249838436,4250
15
+ 14,short_metaworld,1.0466027086096652,4250
16
+ 15,short_metaworld,1.0440011160680476,4250
17
+ 16,short_metaworld,1.0458287356697462,4250
checkpoints_stage2_moe_full_peg_only/moe_router_weights.csv ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,expert_idx,mean_router_weight,sample_count
2
+ 1,short_metaworld,0,0.2524724304676056,4250
3
+ 1,short_metaworld,1,0.2697482109069824,4250
4
+ 1,short_metaworld,2,0.26585105061531067,4250
5
+ 1,short_metaworld,3,0.21192866563796997,4250
6
+ 2,short_metaworld,0,0.3143608272075653,4250
7
+ 2,short_metaworld,1,0.3268682062625885,4250
8
+ 2,short_metaworld,2,0.21709245443344116,4250
9
+ 2,short_metaworld,3,0.14167837798595428,4250
10
+ 3,short_metaworld,0,0.3089982569217682,4250
11
+ 3,short_metaworld,1,0.3164787292480469,4250
12
+ 3,short_metaworld,2,0.2356213927268982,4250
13
+ 3,short_metaworld,3,0.13890130817890167,4250
14
+ 4,short_metaworld,0,0.29757270216941833,4250
15
+ 4,short_metaworld,1,0.30781999230384827,4250
16
+ 4,short_metaworld,2,0.2540871202945709,4250
17
+ 4,short_metaworld,3,0.1405191272497177,4250
18
+ 5,short_metaworld,0,0.28526824712753296,4250
19
+ 5,short_metaworld,1,0.29758328199386597,4250
20
+ 5,short_metaworld,2,0.27030232548713684,4250
21
+ 5,short_metaworld,3,0.14684562385082245,4250
22
+ 6,short_metaworld,0,0.27950519323349,4250
23
+ 6,short_metaworld,1,0.289709210395813,4250
24
+ 6,short_metaworld,2,0.27237799763679504,4250
25
+ 6,short_metaworld,3,0.15840782225131989,4250
26
+ 7,short_metaworld,0,0.26899853348731995,4250
27
+ 7,short_metaworld,1,0.27762043476104736,4250
28
+ 7,short_metaworld,2,0.2824629247188568,4250
29
+ 7,short_metaworld,3,0.17091910541057587,4250
30
+ 8,short_metaworld,0,0.26036784052848816,4250
31
+ 8,short_metaworld,1,0.26750805974006653,4250
32
+ 8,short_metaworld,2,0.2912062704563141,4250
33
+ 8,short_metaworld,3,0.18091799318790436,4250
34
+ 9,short_metaworld,0,0.2523835301399231,4250
35
+ 9,short_metaworld,1,0.2574305534362793,4250
36
+ 9,short_metaworld,2,0.2924705445766449,4250
37
+ 9,short_metaworld,3,0.19771520793437958,4250
38
+ 10,short_metaworld,0,0.23678657412528992,4250
39
+ 10,short_metaworld,1,0.2441885620355606,4250
40
+ 10,short_metaworld,2,0.29841580986976624,4250
41
+ 10,short_metaworld,3,0.22060911357402802,4250
42
+ 11,short_metaworld,0,0.22633861005306244,4250
43
+ 11,short_metaworld,1,0.23437286913394928,4250
44
+ 11,short_metaworld,2,0.3036373257637024,4250
45
+ 11,short_metaworld,3,0.23565179109573364,4250
46
+ 12,short_metaworld,0,0.220066636800766,4250
47
+ 12,short_metaworld,1,0.22533780336380005,4250
48
+ 12,short_metaworld,2,0.2991962134838104,4250
49
+ 12,short_metaworld,3,0.25539910793304443,4250
50
+ 13,short_metaworld,0,0.21208599209785461,4250
51
+ 13,short_metaworld,1,0.21645447611808777,4250
52
+ 13,short_metaworld,2,0.2984868884086609,4250
53
+ 13,short_metaworld,3,0.2729724049568176,4250
54
+ 14,short_metaworld,0,0.21153134107589722,4250
55
+ 14,short_metaworld,1,0.2144383043050766,4250
56
+ 14,short_metaworld,2,0.2981346845626831,4250
57
+ 14,short_metaworld,3,0.275896281003952,4250
58
+ 15,short_metaworld,0,0.20906934142112732,4250
59
+ 15,short_metaworld,1,0.21298854053020477,4250
60
+ 15,short_metaworld,2,0.29622671008110046,4250
61
+ 15,short_metaworld,3,0.2817155122756958,4250
62
+ 16,short_metaworld,0,0.2072872817516327,4250
63
+ 16,short_metaworld,1,0.21117538213729858,4250
64
+ 16,short_metaworld,2,0.2976193130016327,4250
65
+ 16,short_metaworld,3,0.2839184105396271,4250
checkpoints_stage2_moe_full_peg_only/tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c96de0901951335e66869ff79d7ccf87555bae5029154ea4971f1e68d10ad4
3
+ size 399563
checkpoints_stage2_moe_full_peg_only/train_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_type": "short_metaworld",
3
+ "data_root": "data/short-metaworld-vla",
4
+ "train_jsonl": "",
5
+ "val_jsonl": "",
6
+ "val_ratio": 0.15,
7
+ "vision_model_name": "google/siglip2-base-patch16-224",
8
+ "text_model_name": "google/siglip2-base-patch16-224",
9
+ "separate_backbones": false,
10
+ "image_size": 224,
11
+ "freeze_vision": true,
12
+ "freeze_text": true,
13
+ "unfreeze_vision_last_n_layers": 2,
14
+ "unfreeze_text_last_n_layers": 2,
15
+ "fusion_type": "cross_attn",
16
+ "proj_dim": 512,
17
+ "fusion_hidden_dim": 1024,
18
+ "fusion_out_dim": 512,
19
+ "fusion_num_layers": 3,
20
+ "fusion_num_heads": 8,
21
+ "fusion_dropout": 0.1,
22
+ "normalize_embeddings": true,
23
+ "action_head_type": "moe",
24
+ "action_mlp_hidden_dim": 256,
25
+ "action_mlp_layers": 2,
26
+ "action_mlp_dropout": 0.1,
27
+ "moe_num_experts": 4,
28
+ "moe_hidden_dim": 512,
29
+ "moe_load_balance_weight": 0.001,
30
+ "router_condition": "action_input",
31
+ "act_chunk_size": 8,
32
+ "act_hidden_dim": 512,
33
+ "act_num_layers": 2,
34
+ "act_dropout": 0.1,
35
+ "use_geometry_features": true,
36
+ "geometry_dim": 6,
37
+ "temporal_context": 4,
38
+ "action_dim": 4,
39
+ "num_workers": 8,
40
+ "normalize_action_targets": true,
41
+ "action_norm_eps": 1e-06,
42
+ "learnable_action_scale": true,
43
+ "action_scale_init": 1.0,
44
+ "loss_type": "huber",
45
+ "huber_delta": 0.5,
46
+ "epochs": 80,
47
+ "batch_size": 32,
48
+ "grad_accum_steps": 2,
49
+ "learning_rate": 5e-05,
50
+ "weight_decay": 0.01,
51
+ "grad_clip_norm": 1.0,
52
+ "use_fp16": true,
53
+ "early_stopping_patience": 8,
54
+ "early_stopping_min_delta": 0.0005,
55
+ "seed": 42,
56
+ "device": "cuda",
57
+ "out_dir": "checkpoints_stage2_moe_full_peg_only",
58
+ "save_best_by_val": true
59
+ }
checkpoints_stage2_moe_full_unfreeze/action_mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
3
+ size 144
checkpoints_stage2_moe_full_unfreeze/action_std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
3
+ size 144
checkpoints_stage2_moe_full_unfreeze/architecture.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_tag: moe_full
2
+ setting: core
3
+ config: experiments/moe_full_unfreeze.yaml
4
+ action_head_type: moe
5
+ router_condition: action_input
6
+ freeze_vision: true
7
+ freeze_text: true
8
+ unfreeze_vision_last_n_layers: 2
9
+ unfreeze_text_last_n_layers: 2
10
+ act_chunk_size: n/a
checkpoints_stage2_moe_full_unfreeze/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:440fc24b6261688744fe46c03c581eb6d42ae58198911440bcac36e239fa1885
3
+ size 1826296723
checkpoints_stage2_moe_full_unfreeze/latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebab02b7968b978d0e514cf7097b97d805385101eb50fe0df9b9c89be1ca6800
3
+ size 1826337283
checkpoints_stage2_moe_full_unfreeze/moe_router_entropy.csv ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,mean_router_entropy,sample_count
2
+ 1,short_metaworld,0.8266469451868448,59539
3
+ 2,short_metaworld,0.8533366397670633,59539
4
+ 3,short_metaworld,0.8693474535340717,59539
5
+ 4,short_metaworld,0.8904163963175487,59539
6
+ 5,short_metaworld,0.9250720586699692,59539
7
+ 6,short_metaworld,0.9457602825621302,59539
8
+ 7,short_metaworld,0.9612330587745677,59539
9
+ 8,short_metaworld,0.9723969739556451,59539
10
+ 9,short_metaworld,0.9794298765542427,59539
11
+ 10,short_metaworld,0.9789973156880027,59539
12
+ 11,short_metaworld,1.0188740455718426,59539
13
+ 12,short_metaworld,1.0265689132868232,59539
14
+ 13,short_metaworld,1.040901602255933,59539
15
+ 14,short_metaworld,1.045208848729876,59539
16
+ 15,short_metaworld,1.0550597640867616,59539
17
+ 16,short_metaworld,1.0639260735712261,59539
18
+ 17,short_metaworld,1.078483802683777,59539
19
+ 18,short_metaworld,1.0873469053151474,59539
20
+ 19,short_metaworld,1.08479473295326,59539
21
+ 20,short_metaworld,1.0818844081643484,59539
22
+ 21,short_metaworld,1.088739881516334,59539
23
+ 22,short_metaworld,1.0956593954755363,59539
24
+ 23,short_metaworld,1.1019762048816382,59539
25
+ 24,short_metaworld,1.115751132529953,59539
26
+ 25,short_metaworld,1.1183759485058393,59539
checkpoints_stage2_moe_full_unfreeze/moe_router_weights.csv ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,expert_idx,mean_router_weight,sample_count
2
+ 1,short_metaworld,0,0.1725972592830658,59539
3
+ 1,short_metaworld,1,0.3237016201019287,59539
4
+ 1,short_metaworld,2,0.26370570063591003,59539
5
+ 1,short_metaworld,3,0.23999355733394623,59539
6
+ 2,short_metaworld,0,0.20173285901546478,59539
7
+ 2,short_metaworld,1,0.29363250732421875,59539
8
+ 2,short_metaworld,2,0.26702892780303955,59539
9
+ 2,short_metaworld,3,0.23760712146759033,59539
10
+ 3,short_metaworld,0,0.234486922621727,59539
11
+ 3,short_metaworld,1,0.2745989263057709,59539
12
+ 3,short_metaworld,2,0.2596431076526642,59539
13
+ 3,short_metaworld,3,0.23127387464046478,59539
14
+ 4,short_metaworld,0,0.2463609129190445,59539
15
+ 4,short_metaworld,1,0.26643916964530945,59539
16
+ 4,short_metaworld,2,0.26865747570991516,59539
17
+ 4,short_metaworld,3,0.21854126453399658,59539
18
+ 5,short_metaworld,0,0.2569107711315155,59539
19
+ 5,short_metaworld,1,0.2579309940338135,59539
20
+ 5,short_metaworld,2,0.26801663637161255,59539
21
+ 5,short_metaworld,3,0.21714268624782562,59539
22
+ 6,short_metaworld,0,0.2629309892654419,59539
23
+ 6,short_metaworld,1,0.2564207911491394,59539
24
+ 6,short_metaworld,2,0.26288262009620667,59539
25
+ 6,short_metaworld,3,0.2177649587392807,59539
26
+ 7,short_metaworld,0,0.2639448642730713,59539
27
+ 7,short_metaworld,1,0.2512056529521942,59539
28
+ 7,short_metaworld,2,0.26194506883621216,59539
29
+ 7,short_metaworld,3,0.22290275990962982,59539
30
+ 8,short_metaworld,0,0.2619611322879791,59539
31
+ 8,short_metaworld,1,0.2512024939060211,59539
32
+ 8,short_metaworld,2,0.26084572076797485,59539
33
+ 8,short_metaworld,3,0.22599029541015625,59539
34
+ 9,short_metaworld,0,0.26731523871421814,59539
35
+ 9,short_metaworld,1,0.25023913383483887,59539
36
+ 9,short_metaworld,2,0.25189438462257385,59539
37
+ 9,short_metaworld,3,0.23055225610733032,59539
38
+ 10,short_metaworld,0,0.27526751160621643,59539
39
+ 10,short_metaworld,1,0.24768540263175964,59539
40
+ 10,short_metaworld,2,0.24636392295360565,59539
41
+ 10,short_metaworld,3,0.23067975044250488,59539
42
+ 11,short_metaworld,0,0.2856765687465668,59539
43
+ 11,short_metaworld,1,0.2459157258272171,59539
44
+ 11,short_metaworld,2,0.2368682324886322,59539
45
+ 11,short_metaworld,3,0.23154431581497192,59539
46
+ 12,short_metaworld,0,0.28470495343208313,59539
47
+ 12,short_metaworld,1,0.24562636017799377,59539
48
+ 12,short_metaworld,2,0.2367038130760193,59539
49
+ 12,short_metaworld,3,0.2329660952091217,59539
50
+ 13,short_metaworld,0,0.28792625665664673,59539
51
+ 13,short_metaworld,1,0.244477316737175,59539
52
+ 13,short_metaworld,2,0.23464582860469818,59539
53
+ 13,short_metaworld,3,0.2329520285129547,59539
54
+ 14,short_metaworld,0,0.28744399547576904,59539
55
+ 14,short_metaworld,1,0.24262361228466034,59539
56
+ 14,short_metaworld,2,0.23451165854930878,59539
57
+ 14,short_metaworld,3,0.2354205995798111,59539
58
+ 15,short_metaworld,0,0.28776249289512634,59539
59
+ 15,short_metaworld,1,0.24331533908843994,59539
60
+ 15,short_metaworld,2,0.23419678211212158,59539
61
+ 15,short_metaworld,3,0.23472383618354797,59539
62
+ 16,short_metaworld,0,0.2902672588825226,59539
63
+ 16,short_metaworld,1,0.24256440997123718,59539
64
+ 16,short_metaworld,2,0.2317458987236023,59539
65
+ 16,short_metaworld,3,0.23542441427707672,59539
66
+ 17,short_metaworld,0,0.2897332012653351,59539
67
+ 17,short_metaworld,1,0.24269115924835205,59539
68
+ 17,short_metaworld,2,0.23254677653312683,59539
69
+ 17,short_metaworld,3,0.23502705991268158,59539
70
+ 18,short_metaworld,0,0.2905566394329071,59539
71
+ 18,short_metaworld,1,0.24254554510116577,59539
72
+ 18,short_metaworld,2,0.23163749277591705,59539
73
+ 18,short_metaworld,3,0.23526298999786377,59539
74
+ 19,short_metaworld,0,0.2922336459159851,59539
75
+ 19,short_metaworld,1,0.2397744357585907,59539
76
+ 19,short_metaworld,2,0.23199936747550964,59539
77
+ 19,short_metaworld,3,0.23599739372730255,59539
78
+ 20,short_metaworld,0,0.2911907434463501,59539
79
+ 20,short_metaworld,1,0.2390134483575821,59539
80
+ 20,short_metaworld,2,0.23173364996910095,59539
81
+ 20,short_metaworld,3,0.2380628138780594,59539
82
+ 21,short_metaworld,0,0.2931298315525055,59539
83
+ 21,short_metaworld,1,0.24040372669696808,59539
84
+ 21,short_metaworld,2,0.2293849140405655,59539
85
+ 21,short_metaworld,3,0.23708373308181763,59539
86
+ 22,short_metaworld,0,0.29230207204818726,59539
87
+ 22,short_metaworld,1,0.2397640198469162,59539
88
+ 22,short_metaworld,2,0.22986406087875366,59539
89
+ 22,short_metaworld,3,0.23806846141815186,59539
90
+ 23,short_metaworld,0,0.29536736011505127,59539
91
+ 23,short_metaworld,1,0.2345297634601593,59539
92
+ 23,short_metaworld,2,0.23083822429180145,59539
93
+ 23,short_metaworld,3,0.23926454782485962,59539
94
+ 24,short_metaworld,0,0.29640254378318787,59539
95
+ 24,short_metaworld,1,0.23482196033000946,59539
96
+ 24,short_metaworld,2,0.23173286020755768,59539
97
+ 24,short_metaworld,3,0.2370423674583435,59539
98
+ 25,short_metaworld,0,0.2962692975997925,59539
99
+ 25,short_metaworld,1,0.2354266494512558,59539
100
+ 25,short_metaworld,2,0.23047353327274323,59539
101
+ 25,short_metaworld,3,0.2378319650888443,59539
checkpoints_stage2_moe_full_unfreeze/tensorboard/events.out.tfevents.1776640393.praise-5080.2198461.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e04707a3f3456a3781740e2b9af2a9b5dad7b127a376cc22e4cf1487b9a89876
3
+ size 8663930
checkpoints_stage2_moe_full_unfreeze/train_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_type": "short_metaworld",
3
+ "data_root": "data/short-metaworld-vla",
4
+ "train_jsonl": "",
5
+ "val_jsonl": "",
6
+ "val_ratio": 0.15,
7
+ "vision_model_name": "google/siglip2-base-patch16-224",
8
+ "text_model_name": "google/siglip2-base-patch16-224",
9
+ "separate_backbones": false,
10
+ "image_size": 224,
11
+ "freeze_vision": true,
12
+ "freeze_text": true,
13
+ "unfreeze_vision_last_n_layers": 2,
14
+ "unfreeze_text_last_n_layers": 2,
15
+ "fusion_type": "cross_attn",
16
+ "proj_dim": 512,
17
+ "fusion_hidden_dim": 1024,
18
+ "fusion_out_dim": 512,
19
+ "fusion_num_layers": 3,
20
+ "fusion_num_heads": 8,
21
+ "fusion_dropout": 0.1,
22
+ "normalize_embeddings": true,
23
+ "action_head_type": "moe",
24
+ "action_mlp_hidden_dim": 256,
25
+ "action_mlp_layers": 2,
26
+ "action_mlp_dropout": 0.1,
27
+ "moe_num_experts": 4,
28
+ "moe_hidden_dim": 512,
29
+ "moe_load_balance_weight": 0.001,
30
+ "router_condition": "action_input",
31
+ "act_chunk_size": 8,
32
+ "act_hidden_dim": 512,
33
+ "act_num_layers": 2,
34
+ "act_dropout": 0.1,
35
+ "use_geometry_features": true,
36
+ "geometry_dim": 6,
37
+ "temporal_context": 4,
38
+ "action_dim": 4,
39
+ "num_workers": 8,
40
+ "normalize_action_targets": true,
41
+ "action_norm_eps": 1e-06,
42
+ "learnable_action_scale": true,
43
+ "action_scale_init": 1.0,
44
+ "loss_type": "huber",
45
+ "huber_delta": 0.5,
46
+ "epochs": 80,
47
+ "batch_size": 32,
48
+ "grad_accum_steps": 2,
49
+ "learning_rate": 5e-05,
50
+ "weight_decay": 0.01,
51
+ "grad_clip_norm": 1.0,
52
+ "use_fp16": true,
53
+ "early_stopping_patience": 8,
54
+ "early_stopping_min_delta": 0.0005,
55
+ "seed": 42,
56
+ "device": "cuda",
57
+ "out_dir": "checkpoints_stage2_moe_full_unfreeze",
58
+ "save_best_by_val": true
59
+ }
checkpoints_stage2_moe_text_unfreeze/action_mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
3
+ size 144
checkpoints_stage2_moe_text_unfreeze/action_std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
3
+ size 144
checkpoints_stage2_moe_text_unfreeze/architecture.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_tag: moe_text
2
+ setting: core
3
+ config: experiments/moe_text_unfreeze.yaml
4
+ action_head_type: moe
5
+ router_condition: text
6
+ freeze_vision: true
7
+ freeze_text: true
8
+ unfreeze_vision_last_n_layers: 2
9
+ unfreeze_text_last_n_layers: 2
10
+ act_chunk_size: n/a
checkpoints_stage2_moe_text_unfreeze/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13bbbccc888cdac06329671dc49b830e9fc7adf5191f4b0221fedcb78a62bc7f
3
+ size 1826295571
checkpoints_stage2_moe_text_unfreeze/latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5300c7da06e3009faf4939557e237899b785724d076501d1fe0b80178b3c85a
3
+ size 1826336131
checkpoints_stage2_moe_text_unfreeze/moe_router_entropy.csv ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,mean_router_entropy,sample_count
2
+ 1,short_metaworld,1.3860215749456126,59539
3
+ 2,short_metaworld,1.3859960296536484,59539
4
+ 3,short_metaworld,1.3860550356931578,59539
5
+ 4,short_metaworld,1.3860976992821288,59539
6
+ 5,short_metaworld,1.3861125832824228,59539
7
+ 6,short_metaworld,1.3861275549612804,59539
8
+ 7,short_metaworld,1.386156571432181,59539
9
+ 8,short_metaworld,1.386179281892041,59539
10
+ 9,short_metaworld,1.3861871356216409,59539
11
+ 10,short_metaworld,1.3861988380119112,59539
12
+ 11,short_metaworld,1.3862183840787095,59539
13
+ 12,short_metaworld,1.386233197621406,59539
14
+ 13,short_metaworld,1.3862402749299045,59539
15
+ 14,short_metaworld,1.3862468894707403,59539
16
+ 15,short_metaworld,1.3862460440656648,59539
17
+ 16,short_metaworld,1.386254353549203,59539
18
+ 17,short_metaworld,1.3862646711443478,59539
19
+ 18,short_metaworld,1.3862623862779409,59539
20
+ 19,short_metaworld,1.3862614024464113,59539
21
+ 20,short_metaworld,1.3862660815737053,59539
checkpoints_stage2_moe_text_unfreeze/moe_router_weights.csv ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,expert_idx,mean_router_weight,sample_count
2
+ 1,short_metaworld,0,0.2503420412540436,59539
3
+ 1,short_metaworld,1,0.2457144558429718,59539
4
+ 1,short_metaworld,2,0.25188928842544556,59539
5
+ 1,short_metaworld,3,0.2520378828048706,59539
6
+ 2,short_metaworld,0,0.251309335231781,59539
7
+ 2,short_metaworld,1,0.2445855289697647,59539
8
+ 2,short_metaworld,2,0.2530737817287445,59539
9
+ 2,short_metaworld,3,0.25103235244750977,59539
10
+ 3,short_metaworld,0,0.2510743737220764,59539
11
+ 3,short_metaworld,1,0.24634063243865967,59539
12
+ 3,short_metaworld,2,0.25233784317970276,59539
13
+ 3,short_metaworld,3,0.25024473667144775,59539
14
+ 4,short_metaworld,0,0.25092294812202454,59539
15
+ 4,short_metaworld,1,0.24752888083457947,59539
16
+ 4,short_metaworld,2,0.2518005967140198,59539
17
+ 4,short_metaworld,3,0.24973003566265106,59539
18
+ 5,short_metaworld,0,0.2507861852645874,59539
19
+ 5,short_metaworld,1,0.24724964797496796,59539
20
+ 5,short_metaworld,2,0.25160107016563416,59539
21
+ 5,short_metaworld,3,0.25036120414733887,59539
22
+ 6,short_metaworld,0,0.2508155405521393,59539
23
+ 6,short_metaworld,1,0.24734751880168915,59539
24
+ 6,short_metaworld,2,0.2516288757324219,59539
25
+ 6,short_metaworld,3,0.25018537044525146,59539
26
+ 7,short_metaworld,0,0.2510412335395813,59539
27
+ 7,short_metaworld,1,0.2474389672279358,59539
28
+ 7,short_metaworld,2,0.2514648735523224,59539
29
+ 7,short_metaworld,3,0.2500589191913605,59539
30
+ 8,short_metaworld,0,0.25110000371932983,59539
31
+ 8,short_metaworld,1,0.2477336823940277,59539
32
+ 8,short_metaworld,2,0.2511843144893646,59539
33
+ 8,short_metaworld,3,0.2499760538339615,59539
34
+ 9,short_metaworld,0,0.2511608898639679,59539
35
+ 9,short_metaworld,1,0.24793510138988495,59539
36
+ 9,short_metaworld,2,0.25116100907325745,59539
37
+ 9,short_metaworld,3,0.24973830580711365,59539
38
+ 10,short_metaworld,0,0.25113919377326965,59539
39
+ 10,short_metaworld,1,0.2480745017528534,59539
40
+ 10,short_metaworld,2,0.25087249279022217,59539
41
+ 10,short_metaworld,3,0.24991968274116516,59539
42
+ 11,short_metaworld,0,0.2509872317314148,59539
43
+ 11,short_metaworld,1,0.24823589622974396,59539
44
+ 11,short_metaworld,2,0.25075864791870117,59539
45
+ 11,short_metaworld,3,0.2500267028808594,59539
46
+ 12,short_metaworld,0,0.2509134113788605,59539
47
+ 12,short_metaworld,1,0.24851222336292267,59539
48
+ 12,short_metaworld,2,0.25063180923461914,59539
49
+ 12,short_metaworld,3,0.2499595731496811,59539
50
+ 13,short_metaworld,0,0.2509301006793976,59539
51
+ 13,short_metaworld,1,0.24855764210224152,59539
52
+ 13,short_metaworld,2,0.25048622488975525,59539
53
+ 13,short_metaworld,3,0.2500481605529785,59539
54
+ 14,short_metaworld,0,0.2508739233016968,59539
55
+ 14,short_metaworld,1,0.24877679347991943,59539
56
+ 14,short_metaworld,2,0.25037506222724915,59539
57
+ 14,short_metaworld,3,0.24997255206108093,59539
58
+ 15,short_metaworld,0,0.25082045793533325,59539
59
+ 15,short_metaworld,1,0.24877150356769562,59539
60
+ 15,short_metaworld,2,0.25048789381980896,59539
61
+ 15,short_metaworld,3,0.24988879263401031,59539
62
+ 16,short_metaworld,0,0.2507392466068268,59539
63
+ 16,short_metaworld,1,0.24902907013893127,59539
64
+ 16,short_metaworld,2,0.25044384598731995,59539
65
+ 16,short_metaworld,3,0.24978768825531006,59539
66
+ 17,short_metaworld,0,0.2505834698677063,59539
67
+ 17,short_metaworld,1,0.2492835372686386,59539
68
+ 17,short_metaworld,2,0.2501995861530304,59539
69
+ 17,short_metaworld,3,0.249890998005867,59539
70
+ 18,short_metaworld,0,0.25059205293655396,59539
71
+ 18,short_metaworld,1,0.24905765056610107,59539
72
+ 18,short_metaworld,2,0.25026026368141174,59539
73
+ 18,short_metaworld,3,0.250081866979599,59539
74
+ 19,short_metaworld,0,0.2506750226020813,59539
75
+ 19,short_metaworld,1,0.24909931421279907,59539
76
+ 19,short_metaworld,2,0.2502804398536682,59539
77
+ 19,short_metaworld,3,0.24993231892585754,59539
78
+ 20,short_metaworld,0,0.2507267892360687,59539
79
+ 20,short_metaworld,1,0.2493761032819748,59539
80
+ 20,short_metaworld,2,0.25002411007881165,59539
81
+ 20,short_metaworld,3,0.24984972178936005,59539
checkpoints_stage2_moe_text_unfreeze/tensorboard/events.out.tfevents.1776638724.praise-5080.2184587.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00bebc1285aea38f375f7568c0da9c632b33c19b4a5afe39908a7b2af7c54684
3
+ size 6921243
checkpoints_stage2_moe_text_unfreeze/train_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_type": "short_metaworld",
3
+ "data_root": "data/short-metaworld-vla",
4
+ "train_jsonl": "",
5
+ "val_jsonl": "",
6
+ "val_ratio": 0.15,
7
+ "vision_model_name": "google/siglip2-base-patch16-224",
8
+ "text_model_name": "google/siglip2-base-patch16-224",
9
+ "separate_backbones": false,
10
+ "image_size": 224,
11
+ "freeze_vision": true,
12
+ "freeze_text": true,
13
+ "unfreeze_vision_last_n_layers": 2,
14
+ "unfreeze_text_last_n_layers": 2,
15
+ "fusion_type": "cross_attn",
16
+ "proj_dim": 512,
17
+ "fusion_hidden_dim": 1024,
18
+ "fusion_out_dim": 512,
19
+ "fusion_num_layers": 3,
20
+ "fusion_num_heads": 8,
21
+ "fusion_dropout": 0.1,
22
+ "normalize_embeddings": true,
23
+ "action_head_type": "moe",
24
+ "action_mlp_hidden_dim": 256,
25
+ "action_mlp_layers": 2,
26
+ "action_mlp_dropout": 0.1,
27
+ "moe_num_experts": 4,
28
+ "moe_hidden_dim": 512,
29
+ "moe_load_balance_weight": 0.001,
30
+ "router_condition": "text",
31
+ "act_chunk_size": 8,
32
+ "act_hidden_dim": 512,
33
+ "act_num_layers": 2,
34
+ "act_dropout": 0.1,
35
+ "use_geometry_features": true,
36
+ "geometry_dim": 6,
37
+ "temporal_context": 4,
38
+ "action_dim": 4,
39
+ "num_workers": 8,
40
+ "normalize_action_targets": true,
41
+ "action_norm_eps": 1e-06,
42
+ "learnable_action_scale": true,
43
+ "action_scale_init": 1.0,
44
+ "loss_type": "huber",
45
+ "huber_delta": 0.5,
46
+ "epochs": 80,
47
+ "batch_size": 32,
48
+ "grad_accum_steps": 2,
49
+ "learning_rate": 5e-05,
50
+ "weight_decay": 0.01,
51
+ "grad_clip_norm": 1.0,
52
+ "use_fp16": true,
53
+ "early_stopping_patience": 8,
54
+ "early_stopping_min_delta": 0.0005,
55
+ "seed": 42,
56
+ "device": "cuda",
57
+ "out_dir": "checkpoints_stage2_moe_text_unfreeze",
58
+ "save_best_by_val": true
59
+ }
checkpoints_stage2_no_moe_unfreeze/action_mean.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
3
+ size 144
checkpoints_stage2_no_moe_unfreeze/action_std.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
3
+ size 144
checkpoints_stage2_no_moe_unfreeze/architecture.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_tag: no_moe
2
+ setting: core
3
+ config: experiments/no_moe_unfreeze.yaml
4
+ action_head_type: mlp
5
+ router_condition: n/a
6
+ freeze_vision: true
7
+ freeze_text: true
8
+ unfreeze_vision_last_n_layers: 2
9
+ unfreeze_text_last_n_layers: 2
10
+ act_chunk_size: n/a
checkpoints_stage2_no_moe_unfreeze/best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a6021db530477bcdbdf46e33ade442e56f98bfd01ac4b82bba9ab58e7f7249
3
+ size 1816284011
checkpoints_stage2_no_moe_unfreeze/latest.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15c2056e01fe4c2fefcd0310002e10d680adbd6a267bf8bbf69f5de66a1633b0
3
+ size 1816322539
checkpoints_stage2_no_moe_unfreeze/tensorboard/events.out.tfevents.1776637048.praise-5080.2170321.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a190ac69c19947e3ccc743a8d8eb042974b21581bcce58b22b1947579df5d17f
3
+ size 4464798
checkpoints_stage2_no_moe_unfreeze/train_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_type": "short_metaworld",
3
+ "data_root": "data/short-metaworld-vla",
4
+ "train_jsonl": "",
5
+ "val_jsonl": "",
6
+ "val_ratio": 0.15,
7
+ "vision_model_name": "google/siglip2-base-patch16-224",
8
+ "text_model_name": "google/siglip2-base-patch16-224",
9
+ "separate_backbones": false,
10
+ "image_size": 224,
11
+ "freeze_vision": true,
12
+ "freeze_text": true,
13
+ "unfreeze_vision_last_n_layers": 2,
14
+ "unfreeze_text_last_n_layers": 2,
15
+ "fusion_type": "cross_attn",
16
+ "proj_dim": 512,
17
+ "fusion_hidden_dim": 1024,
18
+ "fusion_out_dim": 512,
19
+ "fusion_num_layers": 3,
20
+ "fusion_num_heads": 8,
21
+ "fusion_dropout": 0.1,
22
+ "normalize_embeddings": true,
23
+ "action_head_type": "mlp",
24
+ "action_mlp_hidden_dim": 512,
25
+ "action_mlp_layers": 2,
26
+ "action_mlp_dropout": 0.1,
27
+ "moe_num_experts": 4,
28
+ "moe_hidden_dim": 512,
29
+ "moe_load_balance_weight": 0.01,
30
+ "router_condition": "text",
31
+ "act_chunk_size": 8,
32
+ "act_hidden_dim": 512,
33
+ "act_num_layers": 2,
34
+ "act_dropout": 0.1,
35
+ "use_geometry_features": true,
36
+ "geometry_dim": 6,
37
+ "temporal_context": 4,
38
+ "action_dim": 4,
39
+ "num_workers": 8,
40
+ "normalize_action_targets": true,
41
+ "action_norm_eps": 1e-06,
42
+ "learnable_action_scale": true,
43
+ "action_scale_init": 1.0,
44
+ "loss_type": "huber",
45
+ "huber_delta": 0.5,
46
+ "epochs": 80,
47
+ "batch_size": 32,
48
+ "grad_accum_steps": 2,
49
+ "learning_rate": 5e-05,
50
+ "weight_decay": 0.01,
51
+ "grad_clip_norm": 1.0,
52
+ "use_fp16": true,
53
+ "early_stopping_patience": 8,
54
+ "early_stopping_min_delta": 0.0005,
55
+ "seed": 42,
56
+ "device": "cuda",
57
+ "out_dir": "checkpoints_stage2_no_moe_unfreeze",
58
+ "save_best_by_val": true
59
+ }