YHLLEO commited on
Commit
ad05546
·
verified ·
1 Parent(s): 0345cab

Upload with explicit token

Browse files
checkpoints/0700000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16b2618f708c61d6552f9f067aab93b8226aa6e0e5814bf68756129a5ef5ba01
3
+ size 24814462194
config_2025-11-19T10-09-48.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ basic:
2
+ exp_name: 000_DSMoE_3B_E16_Flow
3
+ results_dir: exps/
4
+ data_path: /mmu_nlp_hdd/liuyahui06/datasets/imagenet/train
5
+ global_seed: 1234
6
+ epochs: 1000
7
+ log_every: 100
8
+ ckpt_every: 50000
9
+ rf: true
10
+ rf_ori: false
11
+ accum_iter: 1
12
+ clip_grad_norm: null
13
+ image_size: 256
14
+ global_batch_size: 512
15
+ num_workers: 16
16
+ timestep_start: 0
17
+ timestep_end: 1000
18
+ vae_path: stabilityai/sd-vae-ft-mse
19
+ model:
20
+ ckpt: null
21
+ target: models.models_DSMoE.DiT
22
+ params:
23
+ input_size: 32
24
+ num_classes: 1000
25
+ patch_size: 2
26
+ depth: 30
27
+ hidden_size: 1152
28
+ num_heads: 16
29
+ mlp_ratio: 4
30
+ use_swiglu: false
31
+ rope_type: 2d
32
+ use_sinks: false
33
+ sliding_window: 0
34
+ enable_gqa: false
35
+ norm_type: layernorm
36
+ MoE_config:
37
+ num_experts: 16
38
+ hidden_size: 1152
39
+ moe_intermediate_size: 2880
40
+ n_group: 2
41
+ topk_group: 2
42
+ num_experts_per_tok: 2
43
+ routed_scaling_factor: 2.0
44
+ capacity: 1
45
+ init_MoeMLP: false
46
+ interleave: true
47
+ skip_first2: false
48
+ skip_last2: false
49
+ use_shared_expert: true
50
+ CapacityPred_loss_weight: 1
51
+ optim:
52
+ base_learning_rate: 0.0001
53
+ weight_decay: 0
54
+ betas:
55
+ - 0.9
56
+ - 0.999
57
+ lr_sheduler:
58
+ warmup: null
59
+ train_epoch: null