{ "vocab_size": 512, "d_model": 640, "n_layers": 14, "n_q_heads": 10, "n_kv_heads": 2, "head_dim": 64, "rope_partial": 32, "rope_theta": 10000.0, "d_ff": 1024, "n_routed_experts": 8, "n_shared_experts": 1, "top_k": 2, "moe_first_layer": 1, "router_z_coef": 0.001, "router_noise_std": 0.0, "router_aux_coef": 0.001, "bias_update_rate": 0.001, "max_seq_len": 1024, "tie_embeddings": true, "rms_eps": 1e-06, "init_std": 0.02, "mup_base_d": 512, "attn_backend": "sdpa", "moe_backend": "grouped", "moe_capacity_factor": 1.5, "smear_gate": true, "use_chunked_ce": true, "ce_chunk_tokens": 512, "ce_checkpoint_chunks": true, "use_liger_ce": true, "_model_class": "MoEModel", "architectures": [ "MoEModel" ] }