{ "vocab_size": 50304, "d_model": 768, "n_layers": 16, "n_dense_layers": 1, "n_q_heads": 12, "n_kv_heads": 3, "head_dim": 128, "qk_norm": true, "rope_theta": 10000.0, "attn_softcap": 0.0, "dense_ffn": 2304, "expert_ffn": 320, "n_experts": 36, "top_k": 6, "n_shared": 1, "gating": "sigmoid", "norm_topk_prob": false, "balancing": "aux_free", "aux_loss_coef": 0.001, "z_loss_coef": 0.001, "bias_update_rate": 0.001, "router_init_std": 0.02, "tie_embeddings": true, "scale_embeddings": false, "final_z_loss_coef": 0.0001, "logit_softcap": 0.0, "n_mtp": 0, "mtp_weight": 0.1, "init_std": 0.02, "expert_backend": "grouped", "fused_ce": true, "ce_chunk": 4096, "fp8_head": false, "fp8_x_scale": 1.0, "fp8_w_scale": 1.0, "fp8_grad_scale": 1.0, "preset": "500M" }