markhenry commited on
Commit
6436067
·
verified ·
1 Parent(s): 330cdcc

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +56 -0
config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_dir": "../data/fineweb-edu-10B",
3
+ "batch_size": 80,
4
+ "seq_len": 1024,
5
+ "max_iters": 16000,
6
+ "gradient_accumulation_steps": 4,
7
+ "optimizer": "muon",
8
+ "learning_rate": 0.0006,
9
+ "min_lr": 6e-05,
10
+ "weight_decay": 0.1,
11
+ "beta1": 0.9,
12
+ "beta2": 0.95,
13
+ "grad_clip": 1.0,
14
+ "warmup_iters": 0,
15
+ "lr_decay_iters": 16000,
16
+ "muon_lr": 0.006,
17
+ "muon_min_lr": 0.0,
18
+ "adamw_lr": 0.006,
19
+ "adamw_min_lr": 0.0,
20
+ "muon_momentum": 0.95,
21
+ "muon_nesterov": true,
22
+ "muon_ns_steps": 5,
23
+ "muon_weight_decay": 0.0,
24
+ "lr_schedule": "linear_warmdown",
25
+ "warmdown_frac": 0.4,
26
+ "device": "cuda",
27
+ "dtype": "bfloat16",
28
+ "compile": true,
29
+ "n_layer": 12,
30
+ "n_head": 8,
31
+ "n_embd": 1024,
32
+ "block_size": 1024,
33
+ "vocab_size": 50304,
34
+ "sparsity_mode": "none",
35
+ "rmsnorm_affine": true,
36
+ "cayley_levels": [],
37
+ "cayley_per_parent_budget": false,
38
+ "cayley_seed": 42,
39
+ "cayley_backend": "auto",
40
+ "cayley_locations": [
41
+ "resid_mid"
42
+ ],
43
+ "cayley_score_standardize": false,
44
+ "cayley_richardson_refine": false,
45
+ "cayley_matching_pursuit": false,
46
+ "tied_block_init": false,
47
+ "dead_threshold_c": 0.1,
48
+ "resume": false,
49
+ "stop_val_loss": 3.173,
50
+ "out_dir": "../out/vanilla-v5-parity",
51
+ "eval_interval": 250,
52
+ "eval_iters": 100,
53
+ "log_interval": 10,
54
+ "wandb_project": "sparse-nanogpt",
55
+ "wandb_run_name": "vanilla-v5-parity"
56
+ }