{ "vocab_size": 64000, "d_model": 3072, "n_layers": 26, "n_heads": 24, "n_kv_heads": 8, "d_ffn": 9216, "max_seq_len": 4096, "rope_theta": 500000.0, "dropout": 0.0, "bias": false, "use_flash_attn": true, "use_fp8": false, "use_hybrid": true, "hybrid_pattern": "M M M M M M M M M M M M A M M M M M M M M M M M A M", "mamba_d_state": 128, "mamba_head_dim": 64, "mamba_expand": 2, "mamba_conv_kernel": 4, "mamba_n_groups": 8, "mamba_d_ffn": 4608, "mamba_chunk_size": 256, "model_type": "evafrill-mo", "architectures": [ "EvafrillMoForCausalLM" ], "torch_dtype": "bfloat16" }