lpkphd commited on
Commit
c1cb582
·
verified ·
1 Parent(s): 018a848

Upload config/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config/config.yaml +48 -0
config/config.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ d_model: 768
2
+ n_layers: 12
3
+ vocab_size: 50257
4
+ attention:
5
+ type: mla
6
+ n_heads: 12
7
+ n_kv_heads: null
8
+ kv_compression_dim: 256
9
+ q_compression_dim: 384
10
+ rope_dim: 64
11
+ dropout: 0.0
12
+ ffn:
13
+ type: moe_swiglu
14
+ hidden_mult: 2.6667
15
+ n_experts: 4
16
+ top_k: 2
17
+ shared_experts: 1
18
+ load_balance_weight: 0.01
19
+ dropout: 0.0
20
+ position:
21
+ type: decoupled_rope
22
+ max_seq_len: 2048
23
+ rope_base: 10000.0
24
+ rope_dim: null
25
+ norm:
26
+ type: rmsnorm
27
+ eps: 1.0e-06
28
+ output:
29
+ type: tied
30
+ training:
31
+ batch_size: 8
32
+ seq_len: 1024
33
+ lr: 0.0003
34
+ min_lr: 3.0e-05
35
+ warmup_steps: 1000
36
+ max_steps: 50000
37
+ weight_decay: 0.1
38
+ grad_clip: 1.0
39
+ beta1: 0.9
40
+ beta2: 0.95
41
+ dataset: HuggingFaceFW/fineweb-edu
42
+ tokenizer: gpt2
43
+ log_interval: 10
44
+ eval_interval: 500
45
+ save_interval: 2500
46
+ eval_steps: 100
47
+ embed_dropout: 0.0
48
+ residual_dropout: 0.0