Add files using upload-large-folder tool
Browse files- architecture.txt +10 -0
- moe_router_entropy.csv +17 -0
- moe_router_weights.csv +65 -0
- tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0 +3 -0
- train_config.json +59 -0
architecture.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_tag: moe_full
|
| 2 |
+
task: peg-insert-side-v3
|
| 3 |
+
config: experiments/moe_full_unfreeze.yaml
|
| 4 |
+
action_head_type: moe
|
| 5 |
+
router_condition: action_input
|
| 6 |
+
freeze_vision: true
|
| 7 |
+
freeze_text: true
|
| 8 |
+
unfreeze_vision_last_n_layers: 2
|
| 9 |
+
unfreeze_text_last_n_layers: 2
|
| 10 |
+
act_chunk_size: n/a
|
moe_router_entropy.csv
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,mean_router_entropy,sample_count
|
| 2 |
+
1,short_metaworld,1.0972516559274759,4250
|
| 3 |
+
2,short_metaworld,0.868278188122546,4250
|
| 4 |
+
3,short_metaworld,0.8644592178498998,4250
|
| 5 |
+
4,short_metaworld,0.8801690644621849,4250
|
| 6 |
+
5,short_metaworld,0.9024338802160585,4250
|
| 7 |
+
6,short_metaworld,0.9231361929274657,4250
|
| 8 |
+
7,short_metaworld,0.9505329890706959,4250
|
| 9 |
+
8,short_metaworld,0.9680941077444484,4250
|
| 10 |
+
9,short_metaworld,0.9897653412012493,4250
|
| 11 |
+
10,short_metaworld,1.0097954931697424,4250
|
| 12 |
+
11,short_metaworld,1.019337093510172,4250
|
| 13 |
+
12,short_metaworld,1.0326422810493148,4250
|
| 14 |
+
13,short_metaworld,1.0416921249838436,4250
|
| 15 |
+
14,short_metaworld,1.0466027086096652,4250
|
| 16 |
+
15,short_metaworld,1.0440011160680476,4250
|
| 17 |
+
16,short_metaworld,1.0458287356697462,4250
|
moe_router_weights.csv
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,expert_idx,mean_router_weight,sample_count
|
| 2 |
+
1,short_metaworld,0,0.2524724304676056,4250
|
| 3 |
+
1,short_metaworld,1,0.2697482109069824,4250
|
| 4 |
+
1,short_metaworld,2,0.26585105061531067,4250
|
| 5 |
+
1,short_metaworld,3,0.21192866563796997,4250
|
| 6 |
+
2,short_metaworld,0,0.3143608272075653,4250
|
| 7 |
+
2,short_metaworld,1,0.3268682062625885,4250
|
| 8 |
+
2,short_metaworld,2,0.21709245443344116,4250
|
| 9 |
+
2,short_metaworld,3,0.14167837798595428,4250
|
| 10 |
+
3,short_metaworld,0,0.3089982569217682,4250
|
| 11 |
+
3,short_metaworld,1,0.3164787292480469,4250
|
| 12 |
+
3,short_metaworld,2,0.2356213927268982,4250
|
| 13 |
+
3,short_metaworld,3,0.13890130817890167,4250
|
| 14 |
+
4,short_metaworld,0,0.29757270216941833,4250
|
| 15 |
+
4,short_metaworld,1,0.30781999230384827,4250
|
| 16 |
+
4,short_metaworld,2,0.2540871202945709,4250
|
| 17 |
+
4,short_metaworld,3,0.1405191272497177,4250
|
| 18 |
+
5,short_metaworld,0,0.28526824712753296,4250
|
| 19 |
+
5,short_metaworld,1,0.29758328199386597,4250
|
| 20 |
+
5,short_metaworld,2,0.27030232548713684,4250
|
| 21 |
+
5,short_metaworld,3,0.14684562385082245,4250
|
| 22 |
+
6,short_metaworld,0,0.27950519323349,4250
|
| 23 |
+
6,short_metaworld,1,0.289709210395813,4250
|
| 24 |
+
6,short_metaworld,2,0.27237799763679504,4250
|
| 25 |
+
6,short_metaworld,3,0.15840782225131989,4250
|
| 26 |
+
7,short_metaworld,0,0.26899853348731995,4250
|
| 27 |
+
7,short_metaworld,1,0.27762043476104736,4250
|
| 28 |
+
7,short_metaworld,2,0.2824629247188568,4250
|
| 29 |
+
7,short_metaworld,3,0.17091910541057587,4250
|
| 30 |
+
8,short_metaworld,0,0.26036784052848816,4250
|
| 31 |
+
8,short_metaworld,1,0.26750805974006653,4250
|
| 32 |
+
8,short_metaworld,2,0.2912062704563141,4250
|
| 33 |
+
8,short_metaworld,3,0.18091799318790436,4250
|
| 34 |
+
9,short_metaworld,0,0.2523835301399231,4250
|
| 35 |
+
9,short_metaworld,1,0.2574305534362793,4250
|
| 36 |
+
9,short_metaworld,2,0.2924705445766449,4250
|
| 37 |
+
9,short_metaworld,3,0.19771520793437958,4250
|
| 38 |
+
10,short_metaworld,0,0.23678657412528992,4250
|
| 39 |
+
10,short_metaworld,1,0.2441885620355606,4250
|
| 40 |
+
10,short_metaworld,2,0.29841580986976624,4250
|
| 41 |
+
10,short_metaworld,3,0.22060911357402802,4250
|
| 42 |
+
11,short_metaworld,0,0.22633861005306244,4250
|
| 43 |
+
11,short_metaworld,1,0.23437286913394928,4250
|
| 44 |
+
11,short_metaworld,2,0.3036373257637024,4250
|
| 45 |
+
11,short_metaworld,3,0.23565179109573364,4250
|
| 46 |
+
12,short_metaworld,0,0.220066636800766,4250
|
| 47 |
+
12,short_metaworld,1,0.22533780336380005,4250
|
| 48 |
+
12,short_metaworld,2,0.2991962134838104,4250
|
| 49 |
+
12,short_metaworld,3,0.25539910793304443,4250
|
| 50 |
+
13,short_metaworld,0,0.21208599209785461,4250
|
| 51 |
+
13,short_metaworld,1,0.21645447611808777,4250
|
| 52 |
+
13,short_metaworld,2,0.2984868884086609,4250
|
| 53 |
+
13,short_metaworld,3,0.2729724049568176,4250
|
| 54 |
+
14,short_metaworld,0,0.21153134107589722,4250
|
| 55 |
+
14,short_metaworld,1,0.2144383043050766,4250
|
| 56 |
+
14,short_metaworld,2,0.2981346845626831,4250
|
| 57 |
+
14,short_metaworld,3,0.275896281003952,4250
|
| 58 |
+
15,short_metaworld,0,0.20906934142112732,4250
|
| 59 |
+
15,short_metaworld,1,0.21298854053020477,4250
|
| 60 |
+
15,short_metaworld,2,0.29622671008110046,4250
|
| 61 |
+
15,short_metaworld,3,0.2817155122756958,4250
|
| 62 |
+
16,short_metaworld,0,0.2072872817516327,4250
|
| 63 |
+
16,short_metaworld,1,0.21117538213729858,4250
|
| 64 |
+
16,short_metaworld,2,0.2976193130016327,4250
|
| 65 |
+
16,short_metaworld,3,0.2839184105396271,4250
|
tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9c96de0901951335e66869ff79d7ccf87555bae5029154ea4971f1e68d10ad4
|
| 3 |
+
size 399563
|
train_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_type": "short_metaworld",
|
| 3 |
+
"data_root": "data/short-metaworld-vla",
|
| 4 |
+
"train_jsonl": "",
|
| 5 |
+
"val_jsonl": "",
|
| 6 |
+
"val_ratio": 0.15,
|
| 7 |
+
"vision_model_name": "google/siglip2-base-patch16-224",
|
| 8 |
+
"text_model_name": "google/siglip2-base-patch16-224",
|
| 9 |
+
"separate_backbones": false,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"freeze_vision": true,
|
| 12 |
+
"freeze_text": true,
|
| 13 |
+
"unfreeze_vision_last_n_layers": 2,
|
| 14 |
+
"unfreeze_text_last_n_layers": 2,
|
| 15 |
+
"fusion_type": "cross_attn",
|
| 16 |
+
"proj_dim": 512,
|
| 17 |
+
"fusion_hidden_dim": 1024,
|
| 18 |
+
"fusion_out_dim": 512,
|
| 19 |
+
"fusion_num_layers": 3,
|
| 20 |
+
"fusion_num_heads": 8,
|
| 21 |
+
"fusion_dropout": 0.1,
|
| 22 |
+
"normalize_embeddings": true,
|
| 23 |
+
"action_head_type": "moe",
|
| 24 |
+
"action_mlp_hidden_dim": 256,
|
| 25 |
+
"action_mlp_layers": 2,
|
| 26 |
+
"action_mlp_dropout": 0.1,
|
| 27 |
+
"moe_num_experts": 4,
|
| 28 |
+
"moe_hidden_dim": 512,
|
| 29 |
+
"moe_load_balance_weight": 0.001,
|
| 30 |
+
"router_condition": "action_input",
|
| 31 |
+
"act_chunk_size": 8,
|
| 32 |
+
"act_hidden_dim": 512,
|
| 33 |
+
"act_num_layers": 2,
|
| 34 |
+
"act_dropout": 0.1,
|
| 35 |
+
"use_geometry_features": true,
|
| 36 |
+
"geometry_dim": 6,
|
| 37 |
+
"temporal_context": 4,
|
| 38 |
+
"action_dim": 4,
|
| 39 |
+
"num_workers": 8,
|
| 40 |
+
"normalize_action_targets": true,
|
| 41 |
+
"action_norm_eps": 1e-06,
|
| 42 |
+
"learnable_action_scale": true,
|
| 43 |
+
"action_scale_init": 1.0,
|
| 44 |
+
"loss_type": "huber",
|
| 45 |
+
"huber_delta": 0.5,
|
| 46 |
+
"epochs": 80,
|
| 47 |
+
"batch_size": 32,
|
| 48 |
+
"grad_accum_steps": 2,
|
| 49 |
+
"learning_rate": 5e-05,
|
| 50 |
+
"weight_decay": 0.01,
|
| 51 |
+
"grad_clip_norm": 1.0,
|
| 52 |
+
"use_fp16": true,
|
| 53 |
+
"early_stopping_patience": 8,
|
| 54 |
+
"early_stopping_min_delta": 0.0005,
|
| 55 |
+
"seed": 42,
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"out_dir": "checkpoints_stage2_moe_full_peg_only",
|
| 58 |
+
"save_best_by_val": true
|
| 59 |
+
}
|