moe / metadata.json
C10X's picture
Upload 8 files
c2f5196 verified
{
"model_name": "Qwen3-5M-MoE-2exp-active",
"model_type": "Qwen3MoeForCausalLM",
"tokenizer": "gpt2",
"dtype": "bfloat16",
"vocab_size": 50257,
"hidden_size": 128,
"num_layers": 8,
"num_attention_heads": 4,
"num_key_value_heads": 2,
"head_dim": 32,
"moe_intermediate_size": 1024,
"num_experts": 64,
"num_experts_per_tok": 2,
"sliding_window": 512,
"max_position_embeddings": 8192,
"rope_theta": 500000,
"layer_types": [
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention",
"full_attention",
"sliding_attention",
"sliding_attention"
],
"max_window_layers": 6,
"parameters_total": 208220928,
"parameters_active": 13186816,
"active_ratio": 0.03125,
"positional_encoding": "rope",
"normalization": "rmsnorm",
"activation": "swiglu",
"tie_word_embeddings": true
}