NeelNanda commited on
Commit
6e03152
·
1 Parent(s): 7d2de7e

Auto Commit

Browse files
Files changed (2) hide show
  1. config.json +55 -0
  2. model_final.pth +3 -0
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_layers": 4,
3
+ "d_model": 512,
4
+ "d_mlp": 2048,
5
+ "d_head": 64,
6
+ "n_heads": 8,
7
+ "lr_hidden": 0.002,
8
+ "lr_vector": 0.001,
9
+ "batch_size_per_device": 32,
10
+ "batches_per_step": 1,
11
+ "seed": 9153,
12
+ "save_checkpoints": true,
13
+ "debug": false,
14
+ "debug_batch": false,
15
+ "normalization": "LN",
16
+ "max_tokens": 22000000000,
17
+ "version": 170,
18
+ "use_bfloat16_matmul": true,
19
+ "n_ctx": 1024,
20
+ "d_vocab": 48262,
21
+ "tokenizer_name": "NeelNanda/gpt-neox-tokenizer-digits",
22
+ "betas": [
23
+ 0.9,
24
+ 0.99
25
+ ],
26
+ "weight_decay": 0.05,
27
+ "dataset_name": "c4+code",
28
+ "grad_norm_clip": 1.0,
29
+ "n_devices": 8,
30
+ "act_fn": "solu_ln",
31
+ "shortformer_pos": false,
32
+ "attn_only": false,
33
+ "ln_eps": 1e-05,
34
+ "lr_schedule": "cosine_warmup",
35
+ "warmup_tokens": 300000000,
36
+ "train_loss_ewma_beta": 0.99,
37
+ "truncate_tokens": 1000000000000,
38
+ "log_interval": 50,
39
+ "initializer_scale_global": 1.0,
40
+ "initializer_scale_hidden": 0.02,
41
+ "initializer_scale_embed": 0.1,
42
+ "initializer_scale_unembed": 0.02,
43
+ "neuron_scale": 1.0,
44
+ "neuron_temp": 1.0,
45
+ "use_acc": false,
46
+ "weight_init_scheme": "gpt2",
47
+ "fixed_init": "",
48
+ "store_init": true,
49
+ "control": 1.0,
50
+ "tokens_per_step": 262144,
51
+ "batch_size": 256,
52
+ "max_steps": 83923,
53
+ "warmup_steps": 1144,
54
+ "n_params": 12582912
55
+ }
model_final.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f3252f6eb13d5d1c23de3aeaf6ca5dacfaee1839a58e4c2bef02d552ac4dcb
3
+ size 254510118