PLM4 / PhysicsLM4.2-8B /default /params.json
quockhangdev's picture
PhysicsLM4.2-8B
f5c1628 verified
{
"comment": "Note: this is not the full params file used for training (see our github repo), but sufficient for model loading",
"data": {
"add_bos": true,
"add_eos": true,
"batch_size": 3,
"load_async": true,
"n_views": 2,
"prefetch_size": 1024,
"root_dir": "<zeyuan_placeholder>",
"seed": 42,
"seq_len": 4096,
"sources": {
"original_shuffled4": 1.0
},
"tokenizer": {
"name": "tiktoken",
"path": "<zeyuan_placeholder>"
}
},
"model": {
"attn_impl": "sdpa",
"canon_activation": false,
"canon_bias": false,
"canon_kernel": 4,
"canon_residual": true,
"canon_set": "ABCD",
"dim": 4096,
"ffn_dim_multiplier": 1.0,
"head_dim": null,
"hidden_dim": 14336,
"init_base_std": null,
"init_std_factor": "disabled",
"max_seqlen": 4096,
"multiple_of": 256,
"n_heads": 32,
"n_kv_heads": 8,
"n_layers": 32,
"norm_eps": 1e-05,
"qk_norm": false,
"rope_dim": 32,
"rope_theta": 100000.0,
"seed": 42,
"sliding_window": null,
"vocab_size": 128256,
"weight_tying": false,
"z_loss": false
}
}