quockhangdev
/

PLM4

Model card Files Files and versions

PLM4 / PhysicsLM4.2-8B /default /params.json

quockhangdev's picture

PhysicsLM4.2-8B

f5c1628 verified 5 days ago

history blame contribute delete

1.34 kB

	{
	"comment": "Note: this is not the full params file used for training (see our github repo), but sufficient for model loading",
	"data": {
	"add_bos": true,
	"add_eos": true,
	"batch_size": 3,
	"load_async": true,
	"n_views": 2,
	"prefetch_size": 1024,
	"root_dir": "<zeyuan_placeholder>",
	"seed": 42,
	"seq_len": 4096,
	"sources": {
	"original_shuffled4": 1.0
	},
	"tokenizer": {
	"name": "tiktoken",
	"path": "<zeyuan_placeholder>"
	}
	},
	"model": {
	"attn_impl": "sdpa",
	"canon_activation": false,
	"canon_bias": false,
	"canon_kernel": 4,
	"canon_residual": true,
	"canon_set": "ABCD",
	"dim": 4096,
	"ffn_dim_multiplier": 1.0,
	"head_dim": null,
	"hidden_dim": 14336,
	"init_base_std": null,
	"init_std_factor": "disabled",
	"max_seqlen": 4096,
	"multiple_of": 256,
	"n_heads": 32,
	"n_kv_heads": 8,
	"n_layers": 32,
	"norm_eps": 1e-05,
	"qk_norm": false,
	"rope_dim": 32,
	"rope_theta": 100000.0,
	"seed": 42,
	"sliding_window": null,
	"vocab_size": 128256,
	"weight_tying": false,
	"z_loss": false
	}
	}