vorushin commited on
Commit
18e4548
·
verified ·
1 Parent(s): 186dd03

Upload checkpoint (depth=12, 1000 steps)

Browse files
checkpoint_d12/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_shards": 50,
3
+ "hf_repo_id": "vorushin/tpuchat",
4
+ "depth": 12,
5
+ "aspect_ratio": 64,
6
+ "head_dim": 128,
7
+ "vocab_size": 32768,
8
+ "seq_len": 2048,
9
+ "window_pattern": "SSSL",
10
+ "softcap": 15.0,
11
+ "num_iterations": 1000,
12
+ "target_param_data_ratio": 10.5,
13
+ "device_batch_size": 2,
14
+ "grad_accum_steps": 8,
15
+ "total_batch_size": -1,
16
+ "max_chars_per_doc": 10000,
17
+ "learning_rate": 0.0003,
18
+ "weight_decay": 0.1,
19
+ "beta1": 0.9,
20
+ "beta2": 0.95,
21
+ "eps": 1e-08,
22
+ "warmup_ratio": 0.02,
23
+ "warmdown_ratio": 0.5,
24
+ "final_lr_frac": 0.0,
25
+ "eval_every": 100,
26
+ "eval_steps": 10,
27
+ "log_every": 10,
28
+ "save_every": -1,
29
+ "sample_every": 250,
30
+ "param_seed": 42
31
+ }
checkpoint_d12/params.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537134a36b83ae6fd9e80932e69de632e5d391070eb372a98346a209fad4444d
3
+ size 1145578631