| { | |
| "vocab_size": 64000, | |
| "d_model": 3072, | |
| "n_layers": 26, | |
| "n_heads": 24, | |
| "n_kv_heads": 8, | |
| "d_ffn": 9216, | |
| "max_seq_len": 4096, | |
| "rope_theta": 500000.0, | |
| "dropout": 0.0, | |
| "bias": false, | |
| "use_flash_attn": true, | |
| "use_fp8": false, | |
| "use_hybrid": true, | |
| "hybrid_pattern": "M M M M M M M M M M M M A M M M M M M M M M M M A M", | |
| "mamba_d_state": 128, | |
| "mamba_head_dim": 64, | |
| "mamba_expand": 2, | |
| "mamba_conv_kernel": 4, | |
| "mamba_n_groups": 8, | |
| "mamba_d_ffn": 4608, | |
| "mamba_chunk_size": 256, | |
| "model_type": "evafrill-mo", | |
| "architectures": [ | |
| "EvafrillMoForCausalLM" | |
| ], | |
| "torch_dtype": "bfloat16" | |
| } |