Tr0612 commited on
Commit
f1cb62b
·
1 Parent(s): 875dff8

Add files using upload-large-folder tool

Browse files
architecture.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ model_tag: moe_full
2
+ task: peg-insert-side-v3
3
+ config: experiments/moe_full_unfreeze.yaml
4
+ action_head_type: moe
5
+ router_condition: action_input
6
+ freeze_vision: true
7
+ freeze_text: true
8
+ unfreeze_vision_last_n_layers: 2
9
+ unfreeze_text_last_n_layers: 2
10
+ act_chunk_size: n/a
moe_router_entropy.csv ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,mean_router_entropy,sample_count
2
+ 1,short_metaworld,1.0972516559274759,4250
3
+ 2,short_metaworld,0.868278188122546,4250
4
+ 3,short_metaworld,0.8644592178498998,4250
5
+ 4,short_metaworld,0.8801690644621849,4250
6
+ 5,short_metaworld,0.9024338802160585,4250
7
+ 6,short_metaworld,0.9231361929274657,4250
8
+ 7,short_metaworld,0.9505329890706959,4250
9
+ 8,short_metaworld,0.9680941077444484,4250
10
+ 9,short_metaworld,0.9897653412012493,4250
11
+ 10,short_metaworld,1.0097954931697424,4250
12
+ 11,short_metaworld,1.019337093510172,4250
13
+ 12,short_metaworld,1.0326422810493148,4250
14
+ 13,short_metaworld,1.0416921249838436,4250
15
+ 14,short_metaworld,1.0466027086096652,4250
16
+ 15,short_metaworld,1.0440011160680476,4250
17
+ 16,short_metaworld,1.0458287356697462,4250
moe_router_weights.csv ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,task_name,expert_idx,mean_router_weight,sample_count
2
+ 1,short_metaworld,0,0.2524724304676056,4250
3
+ 1,short_metaworld,1,0.2697482109069824,4250
4
+ 1,short_metaworld,2,0.26585105061531067,4250
5
+ 1,short_metaworld,3,0.21192866563796997,4250
6
+ 2,short_metaworld,0,0.3143608272075653,4250
7
+ 2,short_metaworld,1,0.3268682062625885,4250
8
+ 2,short_metaworld,2,0.21709245443344116,4250
9
+ 2,short_metaworld,3,0.14167837798595428,4250
10
+ 3,short_metaworld,0,0.3089982569217682,4250
11
+ 3,short_metaworld,1,0.3164787292480469,4250
12
+ 3,short_metaworld,2,0.2356213927268982,4250
13
+ 3,short_metaworld,3,0.13890130817890167,4250
14
+ 4,short_metaworld,0,0.29757270216941833,4250
15
+ 4,short_metaworld,1,0.30781999230384827,4250
16
+ 4,short_metaworld,2,0.2540871202945709,4250
17
+ 4,short_metaworld,3,0.1405191272497177,4250
18
+ 5,short_metaworld,0,0.28526824712753296,4250
19
+ 5,short_metaworld,1,0.29758328199386597,4250
20
+ 5,short_metaworld,2,0.27030232548713684,4250
21
+ 5,short_metaworld,3,0.14684562385082245,4250
22
+ 6,short_metaworld,0,0.27950519323349,4250
23
+ 6,short_metaworld,1,0.289709210395813,4250
24
+ 6,short_metaworld,2,0.27237799763679504,4250
25
+ 6,short_metaworld,3,0.15840782225131989,4250
26
+ 7,short_metaworld,0,0.26899853348731995,4250
27
+ 7,short_metaworld,1,0.27762043476104736,4250
28
+ 7,short_metaworld,2,0.2824629247188568,4250
29
+ 7,short_metaworld,3,0.17091910541057587,4250
30
+ 8,short_metaworld,0,0.26036784052848816,4250
31
+ 8,short_metaworld,1,0.26750805974006653,4250
32
+ 8,short_metaworld,2,0.2912062704563141,4250
33
+ 8,short_metaworld,3,0.18091799318790436,4250
34
+ 9,short_metaworld,0,0.2523835301399231,4250
35
+ 9,short_metaworld,1,0.2574305534362793,4250
36
+ 9,short_metaworld,2,0.2924705445766449,4250
37
+ 9,short_metaworld,3,0.19771520793437958,4250
38
+ 10,short_metaworld,0,0.23678657412528992,4250
39
+ 10,short_metaworld,1,0.2441885620355606,4250
40
+ 10,short_metaworld,2,0.29841580986976624,4250
41
+ 10,short_metaworld,3,0.22060911357402802,4250
42
+ 11,short_metaworld,0,0.22633861005306244,4250
43
+ 11,short_metaworld,1,0.23437286913394928,4250
44
+ 11,short_metaworld,2,0.3036373257637024,4250
45
+ 11,short_metaworld,3,0.23565179109573364,4250
46
+ 12,short_metaworld,0,0.220066636800766,4250
47
+ 12,short_metaworld,1,0.22533780336380005,4250
48
+ 12,short_metaworld,2,0.2991962134838104,4250
49
+ 12,short_metaworld,3,0.25539910793304443,4250
50
+ 13,short_metaworld,0,0.21208599209785461,4250
51
+ 13,short_metaworld,1,0.21645447611808777,4250
52
+ 13,short_metaworld,2,0.2984868884086609,4250
53
+ 13,short_metaworld,3,0.2729724049568176,4250
54
+ 14,short_metaworld,0,0.21153134107589722,4250
55
+ 14,short_metaworld,1,0.2144383043050766,4250
56
+ 14,short_metaworld,2,0.2981346845626831,4250
57
+ 14,short_metaworld,3,0.275896281003952,4250
58
+ 15,short_metaworld,0,0.20906934142112732,4250
59
+ 15,short_metaworld,1,0.21298854053020477,4250
60
+ 15,short_metaworld,2,0.29622671008110046,4250
61
+ 15,short_metaworld,3,0.2817155122756958,4250
62
+ 16,short_metaworld,0,0.2072872817516327,4250
63
+ 16,short_metaworld,1,0.21117538213729858,4250
64
+ 16,short_metaworld,2,0.2976193130016327,4250
65
+ 16,short_metaworld,3,0.2839184105396271,4250
tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9c96de0901951335e66869ff79d7ccf87555bae5029154ea4971f1e68d10ad4
3
+ size 399563
train_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_type": "short_metaworld",
3
+ "data_root": "data/short-metaworld-vla",
4
+ "train_jsonl": "",
5
+ "val_jsonl": "",
6
+ "val_ratio": 0.15,
7
+ "vision_model_name": "google/siglip2-base-patch16-224",
8
+ "text_model_name": "google/siglip2-base-patch16-224",
9
+ "separate_backbones": false,
10
+ "image_size": 224,
11
+ "freeze_vision": true,
12
+ "freeze_text": true,
13
+ "unfreeze_vision_last_n_layers": 2,
14
+ "unfreeze_text_last_n_layers": 2,
15
+ "fusion_type": "cross_attn",
16
+ "proj_dim": 512,
17
+ "fusion_hidden_dim": 1024,
18
+ "fusion_out_dim": 512,
19
+ "fusion_num_layers": 3,
20
+ "fusion_num_heads": 8,
21
+ "fusion_dropout": 0.1,
22
+ "normalize_embeddings": true,
23
+ "action_head_type": "moe",
24
+ "action_mlp_hidden_dim": 256,
25
+ "action_mlp_layers": 2,
26
+ "action_mlp_dropout": 0.1,
27
+ "moe_num_experts": 4,
28
+ "moe_hidden_dim": 512,
29
+ "moe_load_balance_weight": 0.001,
30
+ "router_condition": "action_input",
31
+ "act_chunk_size": 8,
32
+ "act_hidden_dim": 512,
33
+ "act_num_layers": 2,
34
+ "act_dropout": 0.1,
35
+ "use_geometry_features": true,
36
+ "geometry_dim": 6,
37
+ "temporal_context": 4,
38
+ "action_dim": 4,
39
+ "num_workers": 8,
40
+ "normalize_action_targets": true,
41
+ "action_norm_eps": 1e-06,
42
+ "learnable_action_scale": true,
43
+ "action_scale_init": 1.0,
44
+ "loss_type": "huber",
45
+ "huber_delta": 0.5,
46
+ "epochs": 80,
47
+ "batch_size": 32,
48
+ "grad_accum_steps": 2,
49
+ "learning_rate": 5e-05,
50
+ "weight_decay": 0.01,
51
+ "grad_clip_norm": 1.0,
52
+ "use_fp16": true,
53
+ "early_stopping_patience": 8,
54
+ "early_stopping_min_delta": 0.0005,
55
+ "seed": 42,
56
+ "device": "cuda",
57
+ "out_dir": "checkpoints_stage2_moe_full_peg_only",
58
+ "save_best_by_val": true
59
+ }