microgpt / model /model.json
gpahal's picture
Upload folder using huggingface_hub
baa2ef6 verified
raw
history blame contribute delete
327 Bytes
{
"max_seq_len": 1024,
"d_model": 768,
"n_layers": 12,
"n_heads": 12,
"use_padded_vocab_size": true,
"use_rope": true,
"rope_theta": 10000.0,
"is_rope_full_precision": true,
"embd_dropout_p": 0.0,
"attn_dropout_p": 0.0,
"residual_dropout_p": 0.0,
"init_std": 0.02,
"init_residual_scaled_factor": 2.0
}