| { | |
| "d_model": 1536, | |
| "n_heads": 16, | |
| "n_kv_heads": 2, | |
| "d_head": 96, | |
| "vocab_size": 130569, | |
| "max_seq_len": 4096, | |
| "dropout": 0.1, | |
| "rope_base": 500000.0, | |
| "d_ff": 4608, | |
| "n_trunk_layers": 12, | |
| "n_recur_layers": 8, | |
| "n_coda_layers": 4, | |
| "t_max": 6, | |
| "lora_rank": 32, | |
| "mtp_depth": 2 | |
| } |