YHLLEO commited on
Commit
73db77c
·
verified ·
1 Parent(s): 6636aab

Upload with explicit token

Browse files
checkpoints/0700000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f7165a8ff9abe165f7667dd395c023a54991a82e8d476c49b8979ebc78e33c5
3
+ size 6181722961
config_2025-11-07T16-06-55.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ basic:
2
+ exp_name: 000_DSMoE_B_E16_Flow
3
+ results_dir: exps/
4
+ data_path: /mmu_nlp_hdd/liuyahui06/datasets/imagenet/train
5
+ global_seed: 1234
6
+ epochs: 1000
7
+ log_every: 100
8
+ ckpt_every: 50000
9
+ rf: true
10
+ rf_ori: false
11
+ accum_iter: 1
12
+ clip_grad_norm: null
13
+ image_size: 256
14
+ global_batch_size: 512
15
+ num_workers: 16
16
+ timestep_start: 0
17
+ timestep_end: 1000
18
+ vae_path: stabilityai/sd-vae-ft-mse
19
+ model:
20
+ ckpt: null
21
+ target: models.models_DSMoE.DiT
22
+ params:
23
+ input_size: 32
24
+ num_classes: 1000
25
+ patch_size: 2
26
+ depth: 10
27
+ hidden_size: 768
28
+ num_heads: 12
29
+ mlp_ratio: 4
30
+ use_swiglu: false
31
+ rope_type: 2d
32
+ use_sinks: false
33
+ sliding_window: 0
34
+ enable_gqa: false
35
+ norm_type: layernorm
36
+ MoE_config:
37
+ num_experts: 16
38
+ hidden_size: 768
39
+ moe_intermediate_size: 1536
40
+ n_group: 2
41
+ topk_group: 2
42
+ num_experts_per_tok: 2
43
+ routed_scaling_factor: 2.0
44
+ capacity: 1
45
+ init_MoeMLP: false
46
+ interleave: true
47
+ skip_first2: false
48
+ skip_last2: false
49
+ use_shared_expert: true
50
+ CapacityPred_loss_weight: 1
51
+ optim:
52
+ base_learning_rate: 0.0001
53
+ weight_decay: 0
54
+ betas:
55
+ - 0.9
56
+ - 0.999
57
+ lr_sheduler:
58
+ warmup: null
59
+ train_epoch: null