Metis-1.3-base / config.json
GiuliannoV's picture
Add files using upload-large-folder tool
cbad111 verified
{
"architectures": [
"MetisMambaLMHeadModel"
],
"model_type": "metis_mamba2_hybrid",
"name": "Metis-1.3",
"architecture": "mamba2_hybrid_decoder",
"vocab_size": 8192,
"block_size": 4096,
"d_model": 1152,
"n_layer": 28,
"n_heads": 18,
"n_kv_heads": 6,
"head_dim": 64,
"attn_layer_idx": [
3,
7,
11,
15,
19,
23,
27
],
"attn_d_conv": 4,
"attn_rotary_emb_dim": 0,
"ssm_layer": "Mamba2",
"ssm_d_state": 64,
"ssm_d_conv": 4,
"ssm_expand": 2,
"ssm_cfg": {
"layer": "Mamba2",
"d_state": 64,
"d_conv": 4,
"expand": 2
},
"attn_cfg": {
"causal": true,
"d_conv": 4,
"head_dim": 64,
"num_heads": 18,
"num_heads_kv": 6,
"qkv_proj_bias": false,
"out_proj_bias": false,
"rotary_emb_dim": 0
},
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 0,
"unk_token_id": 3,
"rms_norm": true,
"residual_in_fp32": false,
"fused_add_norm": false,
"pad_vocab_size_multiple": 16,
"tie_embeddings": true,
"torch_dtype": "bfloat16",
"estimated_params": 201490560
}