Add files using upload-large-folder tool
Browse files- checkpoints_stage2_moe_full_peg_only/architecture.txt +10 -0
- checkpoints_stage2_moe_full_peg_only/moe_router_entropy.csv +17 -0
- checkpoints_stage2_moe_full_peg_only/moe_router_weights.csv +65 -0
- checkpoints_stage2_moe_full_peg_only/tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0 +3 -0
- checkpoints_stage2_moe_full_peg_only/train_config.json +59 -0
- checkpoints_stage2_moe_full_unfreeze/action_mean.npy +3 -0
- checkpoints_stage2_moe_full_unfreeze/action_std.npy +3 -0
- checkpoints_stage2_moe_full_unfreeze/architecture.txt +10 -0
- checkpoints_stage2_moe_full_unfreeze/best.pt +3 -0
- checkpoints_stage2_moe_full_unfreeze/latest.pt +3 -0
- checkpoints_stage2_moe_full_unfreeze/moe_router_entropy.csv +26 -0
- checkpoints_stage2_moe_full_unfreeze/moe_router_weights.csv +101 -0
- checkpoints_stage2_moe_full_unfreeze/tensorboard/events.out.tfevents.1776640393.praise-5080.2198461.0 +3 -0
- checkpoints_stage2_moe_full_unfreeze/train_config.json +59 -0
- checkpoints_stage2_moe_text_unfreeze/action_mean.npy +3 -0
- checkpoints_stage2_moe_text_unfreeze/action_std.npy +3 -0
- checkpoints_stage2_moe_text_unfreeze/architecture.txt +10 -0
- checkpoints_stage2_moe_text_unfreeze/best.pt +3 -0
- checkpoints_stage2_moe_text_unfreeze/latest.pt +3 -0
- checkpoints_stage2_moe_text_unfreeze/moe_router_entropy.csv +21 -0
- checkpoints_stage2_moe_text_unfreeze/moe_router_weights.csv +81 -0
- checkpoints_stage2_moe_text_unfreeze/tensorboard/events.out.tfevents.1776638724.praise-5080.2184587.0 +3 -0
- checkpoints_stage2_moe_text_unfreeze/train_config.json +59 -0
- checkpoints_stage2_no_moe_unfreeze/action_mean.npy +3 -0
- checkpoints_stage2_no_moe_unfreeze/action_std.npy +3 -0
- checkpoints_stage2_no_moe_unfreeze/architecture.txt +10 -0
- checkpoints_stage2_no_moe_unfreeze/best.pt +3 -0
- checkpoints_stage2_no_moe_unfreeze/latest.pt +3 -0
- checkpoints_stage2_no_moe_unfreeze/tensorboard/events.out.tfevents.1776637048.praise-5080.2170321.0 +3 -0
- checkpoints_stage2_no_moe_unfreeze/train_config.json +59 -0
checkpoints_stage2_moe_full_peg_only/architecture.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_tag: moe_full
|
| 2 |
+
task: peg-insert-side-v3
|
| 3 |
+
config: experiments/moe_full_unfreeze.yaml
|
| 4 |
+
action_head_type: moe
|
| 5 |
+
router_condition: action_input
|
| 6 |
+
freeze_vision: true
|
| 7 |
+
freeze_text: true
|
| 8 |
+
unfreeze_vision_last_n_layers: 2
|
| 9 |
+
unfreeze_text_last_n_layers: 2
|
| 10 |
+
act_chunk_size: n/a
|
checkpoints_stage2_moe_full_peg_only/moe_router_entropy.csv
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,mean_router_entropy,sample_count
|
| 2 |
+
1,short_metaworld,1.0972516559274759,4250
|
| 3 |
+
2,short_metaworld,0.868278188122546,4250
|
| 4 |
+
3,short_metaworld,0.8644592178498998,4250
|
| 5 |
+
4,short_metaworld,0.8801690644621849,4250
|
| 6 |
+
5,short_metaworld,0.9024338802160585,4250
|
| 7 |
+
6,short_metaworld,0.9231361929274657,4250
|
| 8 |
+
7,short_metaworld,0.9505329890706959,4250
|
| 9 |
+
8,short_metaworld,0.9680941077444484,4250
|
| 10 |
+
9,short_metaworld,0.9897653412012493,4250
|
| 11 |
+
10,short_metaworld,1.0097954931697424,4250
|
| 12 |
+
11,short_metaworld,1.019337093510172,4250
|
| 13 |
+
12,short_metaworld,1.0326422810493148,4250
|
| 14 |
+
13,short_metaworld,1.0416921249838436,4250
|
| 15 |
+
14,short_metaworld,1.0466027086096652,4250
|
| 16 |
+
15,short_metaworld,1.0440011160680476,4250
|
| 17 |
+
16,short_metaworld,1.0458287356697462,4250
|
checkpoints_stage2_moe_full_peg_only/moe_router_weights.csv
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,expert_idx,mean_router_weight,sample_count
|
| 2 |
+
1,short_metaworld,0,0.2524724304676056,4250
|
| 3 |
+
1,short_metaworld,1,0.2697482109069824,4250
|
| 4 |
+
1,short_metaworld,2,0.26585105061531067,4250
|
| 5 |
+
1,short_metaworld,3,0.21192866563796997,4250
|
| 6 |
+
2,short_metaworld,0,0.3143608272075653,4250
|
| 7 |
+
2,short_metaworld,1,0.3268682062625885,4250
|
| 8 |
+
2,short_metaworld,2,0.21709245443344116,4250
|
| 9 |
+
2,short_metaworld,3,0.14167837798595428,4250
|
| 10 |
+
3,short_metaworld,0,0.3089982569217682,4250
|
| 11 |
+
3,short_metaworld,1,0.3164787292480469,4250
|
| 12 |
+
3,short_metaworld,2,0.2356213927268982,4250
|
| 13 |
+
3,short_metaworld,3,0.13890130817890167,4250
|
| 14 |
+
4,short_metaworld,0,0.29757270216941833,4250
|
| 15 |
+
4,short_metaworld,1,0.30781999230384827,4250
|
| 16 |
+
4,short_metaworld,2,0.2540871202945709,4250
|
| 17 |
+
4,short_metaworld,3,0.1405191272497177,4250
|
| 18 |
+
5,short_metaworld,0,0.28526824712753296,4250
|
| 19 |
+
5,short_metaworld,1,0.29758328199386597,4250
|
| 20 |
+
5,short_metaworld,2,0.27030232548713684,4250
|
| 21 |
+
5,short_metaworld,3,0.14684562385082245,4250
|
| 22 |
+
6,short_metaworld,0,0.27950519323349,4250
|
| 23 |
+
6,short_metaworld,1,0.289709210395813,4250
|
| 24 |
+
6,short_metaworld,2,0.27237799763679504,4250
|
| 25 |
+
6,short_metaworld,3,0.15840782225131989,4250
|
| 26 |
+
7,short_metaworld,0,0.26899853348731995,4250
|
| 27 |
+
7,short_metaworld,1,0.27762043476104736,4250
|
| 28 |
+
7,short_metaworld,2,0.2824629247188568,4250
|
| 29 |
+
7,short_metaworld,3,0.17091910541057587,4250
|
| 30 |
+
8,short_metaworld,0,0.26036784052848816,4250
|
| 31 |
+
8,short_metaworld,1,0.26750805974006653,4250
|
| 32 |
+
8,short_metaworld,2,0.2912062704563141,4250
|
| 33 |
+
8,short_metaworld,3,0.18091799318790436,4250
|
| 34 |
+
9,short_metaworld,0,0.2523835301399231,4250
|
| 35 |
+
9,short_metaworld,1,0.2574305534362793,4250
|
| 36 |
+
9,short_metaworld,2,0.2924705445766449,4250
|
| 37 |
+
9,short_metaworld,3,0.19771520793437958,4250
|
| 38 |
+
10,short_metaworld,0,0.23678657412528992,4250
|
| 39 |
+
10,short_metaworld,1,0.2441885620355606,4250
|
| 40 |
+
10,short_metaworld,2,0.29841580986976624,4250
|
| 41 |
+
10,short_metaworld,3,0.22060911357402802,4250
|
| 42 |
+
11,short_metaworld,0,0.22633861005306244,4250
|
| 43 |
+
11,short_metaworld,1,0.23437286913394928,4250
|
| 44 |
+
11,short_metaworld,2,0.3036373257637024,4250
|
| 45 |
+
11,short_metaworld,3,0.23565179109573364,4250
|
| 46 |
+
12,short_metaworld,0,0.220066636800766,4250
|
| 47 |
+
12,short_metaworld,1,0.22533780336380005,4250
|
| 48 |
+
12,short_metaworld,2,0.2991962134838104,4250
|
| 49 |
+
12,short_metaworld,3,0.25539910793304443,4250
|
| 50 |
+
13,short_metaworld,0,0.21208599209785461,4250
|
| 51 |
+
13,short_metaworld,1,0.21645447611808777,4250
|
| 52 |
+
13,short_metaworld,2,0.2984868884086609,4250
|
| 53 |
+
13,short_metaworld,3,0.2729724049568176,4250
|
| 54 |
+
14,short_metaworld,0,0.21153134107589722,4250
|
| 55 |
+
14,short_metaworld,1,0.2144383043050766,4250
|
| 56 |
+
14,short_metaworld,2,0.2981346845626831,4250
|
| 57 |
+
14,short_metaworld,3,0.275896281003952,4250
|
| 58 |
+
15,short_metaworld,0,0.20906934142112732,4250
|
| 59 |
+
15,short_metaworld,1,0.21298854053020477,4250
|
| 60 |
+
15,short_metaworld,2,0.29622671008110046,4250
|
| 61 |
+
15,short_metaworld,3,0.2817155122756958,4250
|
| 62 |
+
16,short_metaworld,0,0.2072872817516327,4250
|
| 63 |
+
16,short_metaworld,1,0.21117538213729858,4250
|
| 64 |
+
16,short_metaworld,2,0.2976193130016327,4250
|
| 65 |
+
16,short_metaworld,3,0.2839184105396271,4250
|
checkpoints_stage2_moe_full_peg_only/tensorboard/events.out.tfevents.1776852769.praise-5080.2844183.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9c96de0901951335e66869ff79d7ccf87555bae5029154ea4971f1e68d10ad4
|
| 3 |
+
size 399563
|
checkpoints_stage2_moe_full_peg_only/train_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_type": "short_metaworld",
|
| 3 |
+
"data_root": "data/short-metaworld-vla",
|
| 4 |
+
"train_jsonl": "",
|
| 5 |
+
"val_jsonl": "",
|
| 6 |
+
"val_ratio": 0.15,
|
| 7 |
+
"vision_model_name": "google/siglip2-base-patch16-224",
|
| 8 |
+
"text_model_name": "google/siglip2-base-patch16-224",
|
| 9 |
+
"separate_backbones": false,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"freeze_vision": true,
|
| 12 |
+
"freeze_text": true,
|
| 13 |
+
"unfreeze_vision_last_n_layers": 2,
|
| 14 |
+
"unfreeze_text_last_n_layers": 2,
|
| 15 |
+
"fusion_type": "cross_attn",
|
| 16 |
+
"proj_dim": 512,
|
| 17 |
+
"fusion_hidden_dim": 1024,
|
| 18 |
+
"fusion_out_dim": 512,
|
| 19 |
+
"fusion_num_layers": 3,
|
| 20 |
+
"fusion_num_heads": 8,
|
| 21 |
+
"fusion_dropout": 0.1,
|
| 22 |
+
"normalize_embeddings": true,
|
| 23 |
+
"action_head_type": "moe",
|
| 24 |
+
"action_mlp_hidden_dim": 256,
|
| 25 |
+
"action_mlp_layers": 2,
|
| 26 |
+
"action_mlp_dropout": 0.1,
|
| 27 |
+
"moe_num_experts": 4,
|
| 28 |
+
"moe_hidden_dim": 512,
|
| 29 |
+
"moe_load_balance_weight": 0.001,
|
| 30 |
+
"router_condition": "action_input",
|
| 31 |
+
"act_chunk_size": 8,
|
| 32 |
+
"act_hidden_dim": 512,
|
| 33 |
+
"act_num_layers": 2,
|
| 34 |
+
"act_dropout": 0.1,
|
| 35 |
+
"use_geometry_features": true,
|
| 36 |
+
"geometry_dim": 6,
|
| 37 |
+
"temporal_context": 4,
|
| 38 |
+
"action_dim": 4,
|
| 39 |
+
"num_workers": 8,
|
| 40 |
+
"normalize_action_targets": true,
|
| 41 |
+
"action_norm_eps": 1e-06,
|
| 42 |
+
"learnable_action_scale": true,
|
| 43 |
+
"action_scale_init": 1.0,
|
| 44 |
+
"loss_type": "huber",
|
| 45 |
+
"huber_delta": 0.5,
|
| 46 |
+
"epochs": 80,
|
| 47 |
+
"batch_size": 32,
|
| 48 |
+
"grad_accum_steps": 2,
|
| 49 |
+
"learning_rate": 5e-05,
|
| 50 |
+
"weight_decay": 0.01,
|
| 51 |
+
"grad_clip_norm": 1.0,
|
| 52 |
+
"use_fp16": true,
|
| 53 |
+
"early_stopping_patience": 8,
|
| 54 |
+
"early_stopping_min_delta": 0.0005,
|
| 55 |
+
"seed": 42,
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"out_dir": "checkpoints_stage2_moe_full_peg_only",
|
| 58 |
+
"save_best_by_val": true
|
| 59 |
+
}
|
checkpoints_stage2_moe_full_unfreeze/action_mean.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
|
| 3 |
+
size 144
|
checkpoints_stage2_moe_full_unfreeze/action_std.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
|
| 3 |
+
size 144
|
checkpoints_stage2_moe_full_unfreeze/architecture.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_tag: moe_full
|
| 2 |
+
setting: core
|
| 3 |
+
config: experiments/moe_full_unfreeze.yaml
|
| 4 |
+
action_head_type: moe
|
| 5 |
+
router_condition: action_input
|
| 6 |
+
freeze_vision: true
|
| 7 |
+
freeze_text: true
|
| 8 |
+
unfreeze_vision_last_n_layers: 2
|
| 9 |
+
unfreeze_text_last_n_layers: 2
|
| 10 |
+
act_chunk_size: n/a
|
checkpoints_stage2_moe_full_unfreeze/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:440fc24b6261688744fe46c03c581eb6d42ae58198911440bcac36e239fa1885
|
| 3 |
+
size 1826296723
|
checkpoints_stage2_moe_full_unfreeze/latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebab02b7968b978d0e514cf7097b97d805385101eb50fe0df9b9c89be1ca6800
|
| 3 |
+
size 1826337283
|
checkpoints_stage2_moe_full_unfreeze/moe_router_entropy.csv
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,mean_router_entropy,sample_count
|
| 2 |
+
1,short_metaworld,0.8266469451868448,59539
|
| 3 |
+
2,short_metaworld,0.8533366397670633,59539
|
| 4 |
+
3,short_metaworld,0.8693474535340717,59539
|
| 5 |
+
4,short_metaworld,0.8904163963175487,59539
|
| 6 |
+
5,short_metaworld,0.9250720586699692,59539
|
| 7 |
+
6,short_metaworld,0.9457602825621302,59539
|
| 8 |
+
7,short_metaworld,0.9612330587745677,59539
|
| 9 |
+
8,short_metaworld,0.9723969739556451,59539
|
| 10 |
+
9,short_metaworld,0.9794298765542427,59539
|
| 11 |
+
10,short_metaworld,0.9789973156880027,59539
|
| 12 |
+
11,short_metaworld,1.0188740455718426,59539
|
| 13 |
+
12,short_metaworld,1.0265689132868232,59539
|
| 14 |
+
13,short_metaworld,1.040901602255933,59539
|
| 15 |
+
14,short_metaworld,1.045208848729876,59539
|
| 16 |
+
15,short_metaworld,1.0550597640867616,59539
|
| 17 |
+
16,short_metaworld,1.0639260735712261,59539
|
| 18 |
+
17,short_metaworld,1.078483802683777,59539
|
| 19 |
+
18,short_metaworld,1.0873469053151474,59539
|
| 20 |
+
19,short_metaworld,1.08479473295326,59539
|
| 21 |
+
20,short_metaworld,1.0818844081643484,59539
|
| 22 |
+
21,short_metaworld,1.088739881516334,59539
|
| 23 |
+
22,short_metaworld,1.0956593954755363,59539
|
| 24 |
+
23,short_metaworld,1.1019762048816382,59539
|
| 25 |
+
24,short_metaworld,1.115751132529953,59539
|
| 26 |
+
25,short_metaworld,1.1183759485058393,59539
|
checkpoints_stage2_moe_full_unfreeze/moe_router_weights.csv
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,expert_idx,mean_router_weight,sample_count
|
| 2 |
+
1,short_metaworld,0,0.1725972592830658,59539
|
| 3 |
+
1,short_metaworld,1,0.3237016201019287,59539
|
| 4 |
+
1,short_metaworld,2,0.26370570063591003,59539
|
| 5 |
+
1,short_metaworld,3,0.23999355733394623,59539
|
| 6 |
+
2,short_metaworld,0,0.20173285901546478,59539
|
| 7 |
+
2,short_metaworld,1,0.29363250732421875,59539
|
| 8 |
+
2,short_metaworld,2,0.26702892780303955,59539
|
| 9 |
+
2,short_metaworld,3,0.23760712146759033,59539
|
| 10 |
+
3,short_metaworld,0,0.234486922621727,59539
|
| 11 |
+
3,short_metaworld,1,0.2745989263057709,59539
|
| 12 |
+
3,short_metaworld,2,0.2596431076526642,59539
|
| 13 |
+
3,short_metaworld,3,0.23127387464046478,59539
|
| 14 |
+
4,short_metaworld,0,0.2463609129190445,59539
|
| 15 |
+
4,short_metaworld,1,0.26643916964530945,59539
|
| 16 |
+
4,short_metaworld,2,0.26865747570991516,59539
|
| 17 |
+
4,short_metaworld,3,0.21854126453399658,59539
|
| 18 |
+
5,short_metaworld,0,0.2569107711315155,59539
|
| 19 |
+
5,short_metaworld,1,0.2579309940338135,59539
|
| 20 |
+
5,short_metaworld,2,0.26801663637161255,59539
|
| 21 |
+
5,short_metaworld,3,0.21714268624782562,59539
|
| 22 |
+
6,short_metaworld,0,0.2629309892654419,59539
|
| 23 |
+
6,short_metaworld,1,0.2564207911491394,59539
|
| 24 |
+
6,short_metaworld,2,0.26288262009620667,59539
|
| 25 |
+
6,short_metaworld,3,0.2177649587392807,59539
|
| 26 |
+
7,short_metaworld,0,0.2639448642730713,59539
|
| 27 |
+
7,short_metaworld,1,0.2512056529521942,59539
|
| 28 |
+
7,short_metaworld,2,0.26194506883621216,59539
|
| 29 |
+
7,short_metaworld,3,0.22290275990962982,59539
|
| 30 |
+
8,short_metaworld,0,0.2619611322879791,59539
|
| 31 |
+
8,short_metaworld,1,0.2512024939060211,59539
|
| 32 |
+
8,short_metaworld,2,0.26084572076797485,59539
|
| 33 |
+
8,short_metaworld,3,0.22599029541015625,59539
|
| 34 |
+
9,short_metaworld,0,0.26731523871421814,59539
|
| 35 |
+
9,short_metaworld,1,0.25023913383483887,59539
|
| 36 |
+
9,short_metaworld,2,0.25189438462257385,59539
|
| 37 |
+
9,short_metaworld,3,0.23055225610733032,59539
|
| 38 |
+
10,short_metaworld,0,0.27526751160621643,59539
|
| 39 |
+
10,short_metaworld,1,0.24768540263175964,59539
|
| 40 |
+
10,short_metaworld,2,0.24636392295360565,59539
|
| 41 |
+
10,short_metaworld,3,0.23067975044250488,59539
|
| 42 |
+
11,short_metaworld,0,0.2856765687465668,59539
|
| 43 |
+
11,short_metaworld,1,0.2459157258272171,59539
|
| 44 |
+
11,short_metaworld,2,0.2368682324886322,59539
|
| 45 |
+
11,short_metaworld,3,0.23154431581497192,59539
|
| 46 |
+
12,short_metaworld,0,0.28470495343208313,59539
|
| 47 |
+
12,short_metaworld,1,0.24562636017799377,59539
|
| 48 |
+
12,short_metaworld,2,0.2367038130760193,59539
|
| 49 |
+
12,short_metaworld,3,0.2329660952091217,59539
|
| 50 |
+
13,short_metaworld,0,0.28792625665664673,59539
|
| 51 |
+
13,short_metaworld,1,0.244477316737175,59539
|
| 52 |
+
13,short_metaworld,2,0.23464582860469818,59539
|
| 53 |
+
13,short_metaworld,3,0.2329520285129547,59539
|
| 54 |
+
14,short_metaworld,0,0.28744399547576904,59539
|
| 55 |
+
14,short_metaworld,1,0.24262361228466034,59539
|
| 56 |
+
14,short_metaworld,2,0.23451165854930878,59539
|
| 57 |
+
14,short_metaworld,3,0.2354205995798111,59539
|
| 58 |
+
15,short_metaworld,0,0.28776249289512634,59539
|
| 59 |
+
15,short_metaworld,1,0.24331533908843994,59539
|
| 60 |
+
15,short_metaworld,2,0.23419678211212158,59539
|
| 61 |
+
15,short_metaworld,3,0.23472383618354797,59539
|
| 62 |
+
16,short_metaworld,0,0.2902672588825226,59539
|
| 63 |
+
16,short_metaworld,1,0.24256440997123718,59539
|
| 64 |
+
16,short_metaworld,2,0.2317458987236023,59539
|
| 65 |
+
16,short_metaworld,3,0.23542441427707672,59539
|
| 66 |
+
17,short_metaworld,0,0.2897332012653351,59539
|
| 67 |
+
17,short_metaworld,1,0.24269115924835205,59539
|
| 68 |
+
17,short_metaworld,2,0.23254677653312683,59539
|
| 69 |
+
17,short_metaworld,3,0.23502705991268158,59539
|
| 70 |
+
18,short_metaworld,0,0.2905566394329071,59539
|
| 71 |
+
18,short_metaworld,1,0.24254554510116577,59539
|
| 72 |
+
18,short_metaworld,2,0.23163749277591705,59539
|
| 73 |
+
18,short_metaworld,3,0.23526298999786377,59539
|
| 74 |
+
19,short_metaworld,0,0.2922336459159851,59539
|
| 75 |
+
19,short_metaworld,1,0.2397744357585907,59539
|
| 76 |
+
19,short_metaworld,2,0.23199936747550964,59539
|
| 77 |
+
19,short_metaworld,3,0.23599739372730255,59539
|
| 78 |
+
20,short_metaworld,0,0.2911907434463501,59539
|
| 79 |
+
20,short_metaworld,1,0.2390134483575821,59539
|
| 80 |
+
20,short_metaworld,2,0.23173364996910095,59539
|
| 81 |
+
20,short_metaworld,3,0.2380628138780594,59539
|
| 82 |
+
21,short_metaworld,0,0.2931298315525055,59539
|
| 83 |
+
21,short_metaworld,1,0.24040372669696808,59539
|
| 84 |
+
21,short_metaworld,2,0.2293849140405655,59539
|
| 85 |
+
21,short_metaworld,3,0.23708373308181763,59539
|
| 86 |
+
22,short_metaworld,0,0.29230207204818726,59539
|
| 87 |
+
22,short_metaworld,1,0.2397640198469162,59539
|
| 88 |
+
22,short_metaworld,2,0.22986406087875366,59539
|
| 89 |
+
22,short_metaworld,3,0.23806846141815186,59539
|
| 90 |
+
23,short_metaworld,0,0.29536736011505127,59539
|
| 91 |
+
23,short_metaworld,1,0.2345297634601593,59539
|
| 92 |
+
23,short_metaworld,2,0.23083822429180145,59539
|
| 93 |
+
23,short_metaworld,3,0.23926454782485962,59539
|
| 94 |
+
24,short_metaworld,0,0.29640254378318787,59539
|
| 95 |
+
24,short_metaworld,1,0.23482196033000946,59539
|
| 96 |
+
24,short_metaworld,2,0.23173286020755768,59539
|
| 97 |
+
24,short_metaworld,3,0.2370423674583435,59539
|
| 98 |
+
25,short_metaworld,0,0.2962692975997925,59539
|
| 99 |
+
25,short_metaworld,1,0.2354266494512558,59539
|
| 100 |
+
25,short_metaworld,2,0.23047353327274323,59539
|
| 101 |
+
25,short_metaworld,3,0.2378319650888443,59539
|
checkpoints_stage2_moe_full_unfreeze/tensorboard/events.out.tfevents.1776640393.praise-5080.2198461.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e04707a3f3456a3781740e2b9af2a9b5dad7b127a376cc22e4cf1487b9a89876
|
| 3 |
+
size 8663930
|
checkpoints_stage2_moe_full_unfreeze/train_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_type": "short_metaworld",
|
| 3 |
+
"data_root": "data/short-metaworld-vla",
|
| 4 |
+
"train_jsonl": "",
|
| 5 |
+
"val_jsonl": "",
|
| 6 |
+
"val_ratio": 0.15,
|
| 7 |
+
"vision_model_name": "google/siglip2-base-patch16-224",
|
| 8 |
+
"text_model_name": "google/siglip2-base-patch16-224",
|
| 9 |
+
"separate_backbones": false,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"freeze_vision": true,
|
| 12 |
+
"freeze_text": true,
|
| 13 |
+
"unfreeze_vision_last_n_layers": 2,
|
| 14 |
+
"unfreeze_text_last_n_layers": 2,
|
| 15 |
+
"fusion_type": "cross_attn",
|
| 16 |
+
"proj_dim": 512,
|
| 17 |
+
"fusion_hidden_dim": 1024,
|
| 18 |
+
"fusion_out_dim": 512,
|
| 19 |
+
"fusion_num_layers": 3,
|
| 20 |
+
"fusion_num_heads": 8,
|
| 21 |
+
"fusion_dropout": 0.1,
|
| 22 |
+
"normalize_embeddings": true,
|
| 23 |
+
"action_head_type": "moe",
|
| 24 |
+
"action_mlp_hidden_dim": 256,
|
| 25 |
+
"action_mlp_layers": 2,
|
| 26 |
+
"action_mlp_dropout": 0.1,
|
| 27 |
+
"moe_num_experts": 4,
|
| 28 |
+
"moe_hidden_dim": 512,
|
| 29 |
+
"moe_load_balance_weight": 0.001,
|
| 30 |
+
"router_condition": "action_input",
|
| 31 |
+
"act_chunk_size": 8,
|
| 32 |
+
"act_hidden_dim": 512,
|
| 33 |
+
"act_num_layers": 2,
|
| 34 |
+
"act_dropout": 0.1,
|
| 35 |
+
"use_geometry_features": true,
|
| 36 |
+
"geometry_dim": 6,
|
| 37 |
+
"temporal_context": 4,
|
| 38 |
+
"action_dim": 4,
|
| 39 |
+
"num_workers": 8,
|
| 40 |
+
"normalize_action_targets": true,
|
| 41 |
+
"action_norm_eps": 1e-06,
|
| 42 |
+
"learnable_action_scale": true,
|
| 43 |
+
"action_scale_init": 1.0,
|
| 44 |
+
"loss_type": "huber",
|
| 45 |
+
"huber_delta": 0.5,
|
| 46 |
+
"epochs": 80,
|
| 47 |
+
"batch_size": 32,
|
| 48 |
+
"grad_accum_steps": 2,
|
| 49 |
+
"learning_rate": 5e-05,
|
| 50 |
+
"weight_decay": 0.01,
|
| 51 |
+
"grad_clip_norm": 1.0,
|
| 52 |
+
"use_fp16": true,
|
| 53 |
+
"early_stopping_patience": 8,
|
| 54 |
+
"early_stopping_min_delta": 0.0005,
|
| 55 |
+
"seed": 42,
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"out_dir": "checkpoints_stage2_moe_full_unfreeze",
|
| 58 |
+
"save_best_by_val": true
|
| 59 |
+
}
|
checkpoints_stage2_moe_text_unfreeze/action_mean.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
|
| 3 |
+
size 144
|
checkpoints_stage2_moe_text_unfreeze/action_std.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
|
| 3 |
+
size 144
|
checkpoints_stage2_moe_text_unfreeze/architecture.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_tag: moe_text
|
| 2 |
+
setting: core
|
| 3 |
+
config: experiments/moe_text_unfreeze.yaml
|
| 4 |
+
action_head_type: moe
|
| 5 |
+
router_condition: text
|
| 6 |
+
freeze_vision: true
|
| 7 |
+
freeze_text: true
|
| 8 |
+
unfreeze_vision_last_n_layers: 2
|
| 9 |
+
unfreeze_text_last_n_layers: 2
|
| 10 |
+
act_chunk_size: n/a
|
checkpoints_stage2_moe_text_unfreeze/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13bbbccc888cdac06329671dc49b830e9fc7adf5191f4b0221fedcb78a62bc7f
|
| 3 |
+
size 1826295571
|
checkpoints_stage2_moe_text_unfreeze/latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5300c7da06e3009faf4939557e237899b785724d076501d1fe0b80178b3c85a
|
| 3 |
+
size 1826336131
|
checkpoints_stage2_moe_text_unfreeze/moe_router_entropy.csv
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,mean_router_entropy,sample_count
|
| 2 |
+
1,short_metaworld,1.3860215749456126,59539
|
| 3 |
+
2,short_metaworld,1.3859960296536484,59539
|
| 4 |
+
3,short_metaworld,1.3860550356931578,59539
|
| 5 |
+
4,short_metaworld,1.3860976992821288,59539
|
| 6 |
+
5,short_metaworld,1.3861125832824228,59539
|
| 7 |
+
6,short_metaworld,1.3861275549612804,59539
|
| 8 |
+
7,short_metaworld,1.386156571432181,59539
|
| 9 |
+
8,short_metaworld,1.386179281892041,59539
|
| 10 |
+
9,short_metaworld,1.3861871356216409,59539
|
| 11 |
+
10,short_metaworld,1.3861988380119112,59539
|
| 12 |
+
11,short_metaworld,1.3862183840787095,59539
|
| 13 |
+
12,short_metaworld,1.386233197621406,59539
|
| 14 |
+
13,short_metaworld,1.3862402749299045,59539
|
| 15 |
+
14,short_metaworld,1.3862468894707403,59539
|
| 16 |
+
15,short_metaworld,1.3862460440656648,59539
|
| 17 |
+
16,short_metaworld,1.386254353549203,59539
|
| 18 |
+
17,short_metaworld,1.3862646711443478,59539
|
| 19 |
+
18,short_metaworld,1.3862623862779409,59539
|
| 20 |
+
19,short_metaworld,1.3862614024464113,59539
|
| 21 |
+
20,short_metaworld,1.3862660815737053,59539
|
checkpoints_stage2_moe_text_unfreeze/moe_router_weights.csv
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
epoch,task_name,expert_idx,mean_router_weight,sample_count
|
| 2 |
+
1,short_metaworld,0,0.2503420412540436,59539
|
| 3 |
+
1,short_metaworld,1,0.2457144558429718,59539
|
| 4 |
+
1,short_metaworld,2,0.25188928842544556,59539
|
| 5 |
+
1,short_metaworld,3,0.2520378828048706,59539
|
| 6 |
+
2,short_metaworld,0,0.251309335231781,59539
|
| 7 |
+
2,short_metaworld,1,0.2445855289697647,59539
|
| 8 |
+
2,short_metaworld,2,0.2530737817287445,59539
|
| 9 |
+
2,short_metaworld,3,0.25103235244750977,59539
|
| 10 |
+
3,short_metaworld,0,0.2510743737220764,59539
|
| 11 |
+
3,short_metaworld,1,0.24634063243865967,59539
|
| 12 |
+
3,short_metaworld,2,0.25233784317970276,59539
|
| 13 |
+
3,short_metaworld,3,0.25024473667144775,59539
|
| 14 |
+
4,short_metaworld,0,0.25092294812202454,59539
|
| 15 |
+
4,short_metaworld,1,0.24752888083457947,59539
|
| 16 |
+
4,short_metaworld,2,0.2518005967140198,59539
|
| 17 |
+
4,short_metaworld,3,0.24973003566265106,59539
|
| 18 |
+
5,short_metaworld,0,0.2507861852645874,59539
|
| 19 |
+
5,short_metaworld,1,0.24724964797496796,59539
|
| 20 |
+
5,short_metaworld,2,0.25160107016563416,59539
|
| 21 |
+
5,short_metaworld,3,0.25036120414733887,59539
|
| 22 |
+
6,short_metaworld,0,0.2508155405521393,59539
|
| 23 |
+
6,short_metaworld,1,0.24734751880168915,59539
|
| 24 |
+
6,short_metaworld,2,0.2516288757324219,59539
|
| 25 |
+
6,short_metaworld,3,0.25018537044525146,59539
|
| 26 |
+
7,short_metaworld,0,0.2510412335395813,59539
|
| 27 |
+
7,short_metaworld,1,0.2474389672279358,59539
|
| 28 |
+
7,short_metaworld,2,0.2514648735523224,59539
|
| 29 |
+
7,short_metaworld,3,0.2500589191913605,59539
|
| 30 |
+
8,short_metaworld,0,0.25110000371932983,59539
|
| 31 |
+
8,short_metaworld,1,0.2477336823940277,59539
|
| 32 |
+
8,short_metaworld,2,0.2511843144893646,59539
|
| 33 |
+
8,short_metaworld,3,0.2499760538339615,59539
|
| 34 |
+
9,short_metaworld,0,0.2511608898639679,59539
|
| 35 |
+
9,short_metaworld,1,0.24793510138988495,59539
|
| 36 |
+
9,short_metaworld,2,0.25116100907325745,59539
|
| 37 |
+
9,short_metaworld,3,0.24973830580711365,59539
|
| 38 |
+
10,short_metaworld,0,0.25113919377326965,59539
|
| 39 |
+
10,short_metaworld,1,0.2480745017528534,59539
|
| 40 |
+
10,short_metaworld,2,0.25087249279022217,59539
|
| 41 |
+
10,short_metaworld,3,0.24991968274116516,59539
|
| 42 |
+
11,short_metaworld,0,0.2509872317314148,59539
|
| 43 |
+
11,short_metaworld,1,0.24823589622974396,59539
|
| 44 |
+
11,short_metaworld,2,0.25075864791870117,59539
|
| 45 |
+
11,short_metaworld,3,0.2500267028808594,59539
|
| 46 |
+
12,short_metaworld,0,0.2509134113788605,59539
|
| 47 |
+
12,short_metaworld,1,0.24851222336292267,59539
|
| 48 |
+
12,short_metaworld,2,0.25063180923461914,59539
|
| 49 |
+
12,short_metaworld,3,0.2499595731496811,59539
|
| 50 |
+
13,short_metaworld,0,0.2509301006793976,59539
|
| 51 |
+
13,short_metaworld,1,0.24855764210224152,59539
|
| 52 |
+
13,short_metaworld,2,0.25048622488975525,59539
|
| 53 |
+
13,short_metaworld,3,0.2500481605529785,59539
|
| 54 |
+
14,short_metaworld,0,0.2508739233016968,59539
|
| 55 |
+
14,short_metaworld,1,0.24877679347991943,59539
|
| 56 |
+
14,short_metaworld,2,0.25037506222724915,59539
|
| 57 |
+
14,short_metaworld,3,0.24997255206108093,59539
|
| 58 |
+
15,short_metaworld,0,0.25082045793533325,59539
|
| 59 |
+
15,short_metaworld,1,0.24877150356769562,59539
|
| 60 |
+
15,short_metaworld,2,0.25048789381980896,59539
|
| 61 |
+
15,short_metaworld,3,0.24988879263401031,59539
|
| 62 |
+
16,short_metaworld,0,0.2507392466068268,59539
|
| 63 |
+
16,short_metaworld,1,0.24902907013893127,59539
|
| 64 |
+
16,short_metaworld,2,0.25044384598731995,59539
|
| 65 |
+
16,short_metaworld,3,0.24978768825531006,59539
|
| 66 |
+
17,short_metaworld,0,0.2505834698677063,59539
|
| 67 |
+
17,short_metaworld,1,0.2492835372686386,59539
|
| 68 |
+
17,short_metaworld,2,0.2501995861530304,59539
|
| 69 |
+
17,short_metaworld,3,0.249890998005867,59539
|
| 70 |
+
18,short_metaworld,0,0.25059205293655396,59539
|
| 71 |
+
18,short_metaworld,1,0.24905765056610107,59539
|
| 72 |
+
18,short_metaworld,2,0.25026026368141174,59539
|
| 73 |
+
18,short_metaworld,3,0.250081866979599,59539
|
| 74 |
+
19,short_metaworld,0,0.2506750226020813,59539
|
| 75 |
+
19,short_metaworld,1,0.24909931421279907,59539
|
| 76 |
+
19,short_metaworld,2,0.2502804398536682,59539
|
| 77 |
+
19,short_metaworld,3,0.24993231892585754,59539
|
| 78 |
+
20,short_metaworld,0,0.2507267892360687,59539
|
| 79 |
+
20,short_metaworld,1,0.2493761032819748,59539
|
| 80 |
+
20,short_metaworld,2,0.25002411007881165,59539
|
| 81 |
+
20,short_metaworld,3,0.24984972178936005,59539
|
checkpoints_stage2_moe_text_unfreeze/tensorboard/events.out.tfevents.1776638724.praise-5080.2184587.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00bebc1285aea38f375f7568c0da9c632b33c19b4a5afe39908a7b2af7c54684
|
| 3 |
+
size 6921243
|
checkpoints_stage2_moe_text_unfreeze/train_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_type": "short_metaworld",
|
| 3 |
+
"data_root": "data/short-metaworld-vla",
|
| 4 |
+
"train_jsonl": "",
|
| 5 |
+
"val_jsonl": "",
|
| 6 |
+
"val_ratio": 0.15,
|
| 7 |
+
"vision_model_name": "google/siglip2-base-patch16-224",
|
| 8 |
+
"text_model_name": "google/siglip2-base-patch16-224",
|
| 9 |
+
"separate_backbones": false,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"freeze_vision": true,
|
| 12 |
+
"freeze_text": true,
|
| 13 |
+
"unfreeze_vision_last_n_layers": 2,
|
| 14 |
+
"unfreeze_text_last_n_layers": 2,
|
| 15 |
+
"fusion_type": "cross_attn",
|
| 16 |
+
"proj_dim": 512,
|
| 17 |
+
"fusion_hidden_dim": 1024,
|
| 18 |
+
"fusion_out_dim": 512,
|
| 19 |
+
"fusion_num_layers": 3,
|
| 20 |
+
"fusion_num_heads": 8,
|
| 21 |
+
"fusion_dropout": 0.1,
|
| 22 |
+
"normalize_embeddings": true,
|
| 23 |
+
"action_head_type": "moe",
|
| 24 |
+
"action_mlp_hidden_dim": 256,
|
| 25 |
+
"action_mlp_layers": 2,
|
| 26 |
+
"action_mlp_dropout": 0.1,
|
| 27 |
+
"moe_num_experts": 4,
|
| 28 |
+
"moe_hidden_dim": 512,
|
| 29 |
+
"moe_load_balance_weight": 0.001,
|
| 30 |
+
"router_condition": "text",
|
| 31 |
+
"act_chunk_size": 8,
|
| 32 |
+
"act_hidden_dim": 512,
|
| 33 |
+
"act_num_layers": 2,
|
| 34 |
+
"act_dropout": 0.1,
|
| 35 |
+
"use_geometry_features": true,
|
| 36 |
+
"geometry_dim": 6,
|
| 37 |
+
"temporal_context": 4,
|
| 38 |
+
"action_dim": 4,
|
| 39 |
+
"num_workers": 8,
|
| 40 |
+
"normalize_action_targets": true,
|
| 41 |
+
"action_norm_eps": 1e-06,
|
| 42 |
+
"learnable_action_scale": true,
|
| 43 |
+
"action_scale_init": 1.0,
|
| 44 |
+
"loss_type": "huber",
|
| 45 |
+
"huber_delta": 0.5,
|
| 46 |
+
"epochs": 80,
|
| 47 |
+
"batch_size": 32,
|
| 48 |
+
"grad_accum_steps": 2,
|
| 49 |
+
"learning_rate": 5e-05,
|
| 50 |
+
"weight_decay": 0.01,
|
| 51 |
+
"grad_clip_norm": 1.0,
|
| 52 |
+
"use_fp16": true,
|
| 53 |
+
"early_stopping_patience": 8,
|
| 54 |
+
"early_stopping_min_delta": 0.0005,
|
| 55 |
+
"seed": 42,
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"out_dir": "checkpoints_stage2_moe_text_unfreeze",
|
| 58 |
+
"save_best_by_val": true
|
| 59 |
+
}
|
checkpoints_stage2_no_moe_unfreeze/action_mean.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:881babfc525f93ae7ac93ab91516b17b292d67775bfd40feedd7755616d28bf2
|
| 3 |
+
size 144
|
checkpoints_stage2_no_moe_unfreeze/action_std.npy
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2dab14b2f1929494f67e68d843c0f4efec058594e511d68358d738016c0b1fe7
|
| 3 |
+
size 144
|
checkpoints_stage2_no_moe_unfreeze/architecture.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_tag: no_moe
|
| 2 |
+
setting: core
|
| 3 |
+
config: experiments/no_moe_unfreeze.yaml
|
| 4 |
+
action_head_type: mlp
|
| 5 |
+
router_condition: n/a
|
| 6 |
+
freeze_vision: true
|
| 7 |
+
freeze_text: true
|
| 8 |
+
unfreeze_vision_last_n_layers: 2
|
| 9 |
+
unfreeze_text_last_n_layers: 2
|
| 10 |
+
act_chunk_size: n/a
|
checkpoints_stage2_no_moe_unfreeze/best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95a6021db530477bcdbdf46e33ade442e56f98bfd01ac4b82bba9ab58e7f7249
|
| 3 |
+
size 1816284011
|
checkpoints_stage2_no_moe_unfreeze/latest.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15c2056e01fe4c2fefcd0310002e10d680adbd6a267bf8bbf69f5de66a1633b0
|
| 3 |
+
size 1816322539
|
checkpoints_stage2_no_moe_unfreeze/tensorboard/events.out.tfevents.1776637048.praise-5080.2170321.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a190ac69c19947e3ccc743a8d8eb042974b21581bcce58b22b1947579df5d17f
|
| 3 |
+
size 4464798
|
checkpoints_stage2_no_moe_unfreeze/train_config.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_type": "short_metaworld",
|
| 3 |
+
"data_root": "data/short-metaworld-vla",
|
| 4 |
+
"train_jsonl": "",
|
| 5 |
+
"val_jsonl": "",
|
| 6 |
+
"val_ratio": 0.15,
|
| 7 |
+
"vision_model_name": "google/siglip2-base-patch16-224",
|
| 8 |
+
"text_model_name": "google/siglip2-base-patch16-224",
|
| 9 |
+
"separate_backbones": false,
|
| 10 |
+
"image_size": 224,
|
| 11 |
+
"freeze_vision": true,
|
| 12 |
+
"freeze_text": true,
|
| 13 |
+
"unfreeze_vision_last_n_layers": 2,
|
| 14 |
+
"unfreeze_text_last_n_layers": 2,
|
| 15 |
+
"fusion_type": "cross_attn",
|
| 16 |
+
"proj_dim": 512,
|
| 17 |
+
"fusion_hidden_dim": 1024,
|
| 18 |
+
"fusion_out_dim": 512,
|
| 19 |
+
"fusion_num_layers": 3,
|
| 20 |
+
"fusion_num_heads": 8,
|
| 21 |
+
"fusion_dropout": 0.1,
|
| 22 |
+
"normalize_embeddings": true,
|
| 23 |
+
"action_head_type": "mlp",
|
| 24 |
+
"action_mlp_hidden_dim": 512,
|
| 25 |
+
"action_mlp_layers": 2,
|
| 26 |
+
"action_mlp_dropout": 0.1,
|
| 27 |
+
"moe_num_experts": 4,
|
| 28 |
+
"moe_hidden_dim": 512,
|
| 29 |
+
"moe_load_balance_weight": 0.01,
|
| 30 |
+
"router_condition": "text",
|
| 31 |
+
"act_chunk_size": 8,
|
| 32 |
+
"act_hidden_dim": 512,
|
| 33 |
+
"act_num_layers": 2,
|
| 34 |
+
"act_dropout": 0.1,
|
| 35 |
+
"use_geometry_features": true,
|
| 36 |
+
"geometry_dim": 6,
|
| 37 |
+
"temporal_context": 4,
|
| 38 |
+
"action_dim": 4,
|
| 39 |
+
"num_workers": 8,
|
| 40 |
+
"normalize_action_targets": true,
|
| 41 |
+
"action_norm_eps": 1e-06,
|
| 42 |
+
"learnable_action_scale": true,
|
| 43 |
+
"action_scale_init": 1.0,
|
| 44 |
+
"loss_type": "huber",
|
| 45 |
+
"huber_delta": 0.5,
|
| 46 |
+
"epochs": 80,
|
| 47 |
+
"batch_size": 32,
|
| 48 |
+
"grad_accum_steps": 2,
|
| 49 |
+
"learning_rate": 5e-05,
|
| 50 |
+
"weight_decay": 0.01,
|
| 51 |
+
"grad_clip_norm": 1.0,
|
| 52 |
+
"use_fp16": true,
|
| 53 |
+
"early_stopping_patience": 8,
|
| 54 |
+
"early_stopping_min_delta": 0.0005,
|
| 55 |
+
"seed": 42,
|
| 56 |
+
"device": "cuda",
|
| 57 |
+
"out_dir": "checkpoints_stage2_no_moe_unfreeze",
|
| 58 |
+
"save_best_by_val": true
|
| 59 |
+
}
|