AlexWortega's picture
Upload config.json with huggingface_hub
a14207b verified
{
"vocab_size": 512,
"d_model": 640,
"n_layers": 14,
"n_q_heads": 10,
"n_kv_heads": 2,
"head_dim": 64,
"rope_partial": 32,
"rope_theta": 10000.0,
"d_ff": 1024,
"n_routed_experts": 8,
"n_shared_experts": 1,
"top_k": 2,
"moe_first_layer": 1,
"router_z_coef": 0.001,
"router_noise_std": 0.0,
"router_aux_coef": 0.001,
"bias_update_rate": 0.001,
"max_seq_len": 1024,
"tie_embeddings": true,
"rms_eps": 1e-06,
"init_std": 0.02,
"mup_base_d": 512,
"attn_backend": "sdpa",
"moe_backend": "grouped",
"moe_capacity_factor": 1.5,
"smear_gate": true,
"use_chunked_ce": true,
"ce_chunk_tokens": 512,
"ce_checkpoint_chunks": true,
"use_liger_ce": true,
"_model_class": "MoEModel",
"architectures": [
"MoEModel"
]
}