| { | |
| "vocab_size": 32064, | |
| "d_model": 1280, | |
| "n_layers": 30, | |
| "n_heads": 10, | |
| "q_lora_rank": 640, | |
| "kv_lora_rank": 320, | |
| "qk_nope_head_dim": 64, | |
| "qk_rope_head_dim": 64, | |
| "v_head_dim": 128, | |
| "ff_hidden_mult": 3.5, | |
| "qk_norm": true, | |
| "max_seq_len": 2048, | |
| "attn_dropout": 0.05, | |
| "resid_dropout": 0.05, | |
| "emb_dropout": 0.05, | |
| "label_smoothing": 0.05 | |
| } |