Lazaurus commited on
Commit
f43c26d
·
verified ·
1 Parent(s): 5a88107

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +42 -0
config.yaml ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ inherit: configs/base.yaml
2
+ experiment_name: chinchilla_baseline
3
+
4
+ model:
5
+ dim: 2048
6
+ n_layers: 24
7
+
8
+ attention:
9
+ type: gqa
10
+ num_heads: 16
11
+ num_kv_heads: 4
12
+ head_dim: 128
13
+ qk_norm: true
14
+
15
+ ffn:
16
+ type: swiglu
17
+ intermediate_mult: 2.667
18
+
19
+ residual:
20
+ type: prenorm
21
+
22
+ optim:
23
+ type: adamw
24
+ lr: 3.0e-4
25
+ scheduler: cosine
26
+ warmup_steps: 500
27
+
28
+ training:
29
+ tokens: 20_000_000_000
30
+ batch_size: 8 # per-GPU; 111GB peak at seq_len=4096
31
+ seq_len: 4096
32
+ grad_accum_steps: 6 # effective batch = 8*8*6 = 384 seqs = 1.57M tok/step
33
+ liger: true
34
+ compile: true
35
+ checkpoint_every_tokens: 2_000_000_000
36
+ hf_repo: GoedelMachines/chinchilla-1b-baseline
37
+
38
+ data:
39
+ shard_dir: data/fineweb_edu
40
+
41
+ logging:
42
+ wandb_enabled: true