| { | |
| "vocab_size": 16384, | |
| "block_size": 256, | |
| "n_embd": 384, | |
| "n_head": 6, | |
| "n_layer": 8, | |
| "use_rope": true, | |
| "n_kv_head": 2, | |
| "use_swiglu": false, | |
| "use_rmsnorm": true, | |
| "use_mtp": true, | |
| "mtp_heads": 2, | |
| "mtp_weight": 0.1, | |
| "tie_mtp_lm_head": true, | |
| "use_relu2": true, | |
| "use_qk_norm": true, | |
| "logit_cap": 15.0, | |
| "use_zero_init": true | |
| } |