File size: 1,336 Bytes
f5c1628 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | {
"comment": "Note: this is not the full params file used for training (see our github repo), but sufficient for model loading",
"data": {
"add_bos": true,
"add_eos": true,
"batch_size": 3,
"load_async": true,
"n_views": 2,
"prefetch_size": 1024,
"root_dir": "<zeyuan_placeholder>",
"seed": 42,
"seq_len": 4096,
"sources": {
"original_shuffled4": 1.0
},
"tokenizer": {
"name": "tiktoken",
"path": "<zeyuan_placeholder>"
}
},
"model": {
"attn_impl": "sdpa",
"canon_activation": false,
"canon_bias": false,
"canon_kernel": 4,
"canon_residual": true,
"canon_set": "ABCD",
"dim": 4096,
"ffn_dim_multiplier": 1.0,
"head_dim": null,
"hidden_dim": 14336,
"init_base_std": null,
"init_std_factor": "disabled",
"max_seqlen": 4096,
"multiple_of": 256,
"n_heads": 32,
"n_kv_heads": 8,
"n_layers": 32,
"norm_eps": 1e-05,
"qk_norm": false,
"rope_dim": 32,
"rope_theta": 100000.0,
"seed": 42,
"sliding_window": null,
"vocab_size": 128256,
"weight_tying": false,
"z_loss": false
}
}
|