Lazaurus commited on
Commit
004a88b
·
verified ·
1 Parent(s): 8789bc4

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +47 -0
config.yaml ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ inherit: configs/base.yaml
2
+ experiment_name: chinchilla_best
3
+
4
+ model:
5
+ dim: 2048
6
+ n_layers: 24
7
+
8
+ attention:
9
+ type: gated_gqa
10
+ num_heads: 16
11
+ num_kv_heads: 4
12
+ head_dim: 128
13
+ qk_norm: true
14
+
15
+ ffn:
16
+ type: relu2
17
+ intermediate_mult: 2.667
18
+
19
+ residual:
20
+ type: mhc
21
+ n_streams: 4
22
+
23
+ optim:
24
+ type: muon
25
+ lr: 3.0e-4 # Adam LR for 1D params/embeddings
26
+ muon_lr: 0.007
27
+ normuon: true
28
+ normuon_beta2: 0.95
29
+ scheduler: trapezoidal
30
+ cooldown_fraction: 0.45
31
+ warmup_steps: 500
32
+
33
+ training:
34
+ tokens: 20_000_000_000
35
+ batch_size: 8 # per-GPU; 131GB peak on H200 at seq_len=4096
36
+ seq_len: 4096
37
+ grad_accum_steps: 4 # effective batch = 8*8*4 = 256 seqs = 1.05M tok/step
38
+ liger: true
39
+ compile: true
40
+ checkpoint_every_tokens: 2_000_000_000
41
+ hf_repo: GoedelMachines/chinchilla-1b-best
42
+
43
+ data:
44
+ shard_dir: data/fineweb_edu
45
+
46
+ logging:
47
+ wandb_enabled: true