MiMo-V2-Flash-4bit / config.json
awni's picture
Add files using upload-large-folder tool
65d2673 verified
{
"add_full_attention_sink_bias": false,
"add_swa_attention_sink_bias": true,
"architectures": [
"MiMoV2FlashForCausalLM"
],
"attention_bias": false,
"attention_chunk_size": 128,
"attention_dropout": 0.0,
"attention_value_scale": 0.707,
"auto_map": {
"AutoConfig": "configuration_mimo_v2_flash.MiMoV2FlashConfig",
"AutoModel": "modeling_mimo_v2_flash.MiMoV2FlashModel",
"AutoModelForCausalLM": "modeling_mimo_v2_flash.MiMoV2FlashForCausalLM"
},
"head_dim": 192,
"hidden_act": "silu",
"hidden_size": 4096,
"hybrid_layer_pattern": [
0,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0,
1,
1,
1,
1,
1,
0
],
"initializer_range": 0.02,
"intermediate_size": 16384,
"layernorm_epsilon": 1e-05,
"max_position_embeddings": 262144,
"model_type": "mimo_v2_flash",
"moe_intermediate_size": 2048,
"moe_layer_freq": [
0,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1,
1
],
"n_group": 1,
"n_routed_experts": 256,
"n_shared_experts": null,
"norm_topk_prob": true,
"num_attention_heads": 64,
"num_experts_per_tok": 8,
"num_hidden_layers": 48,
"num_key_value_heads": 4,
"partial_rotary_factor": 0.334,
"quantization": {
"group_size": 64,
"bits": 4,
"mode": "affine"
},
"quantization_config": {
"group_size": 64,
"bits": 4,
"mode": "affine"
},
"rope_theta": 5000000,
"routed_scaling_factor": null,
"scoring_func": "sigmoid",
"sliding_window": 128,
"sliding_window_size": 128,
"swa_head_dim": 192,
"swa_num_attention_heads": 64,
"swa_num_key_value_heads": 8,
"swa_rope_theta": 10000,
"swa_v_head_dim": 128,
"tie_word_embeddings": false,
"topk_group": 1,
"topk_method": "noaux_tc",
"torch_dtype": "bfloat16",
"transformers_version": "4.40.1",
"use_cache": true,
"v_head_dim": 128,
"vocab_size": 152576
}