{ "att_dropout": 0.0, "att_experts": null, "att_groups": 8, "att_heads": 16, "att_query_experts": null, "att_query_groups": 8, "att_type": "sqa", "debug_interval": 10, "debug_mode": false, "embed_dim": 512, "interlayer_att_dropout": 0.0, "interlayer_att_experts": null, "interlayer_att_groups": 8, "interlayer_att_query_experts": null, "interlayer_att_query_groups": 8, "interlayer_att_type": "sqa", "norm_decay": 0.9, "norm_init_gate": -2.0, "norm_per_dim_scale": false, "norm_type": "classic-rms", "num_groups": 3, "num_layers": 21, "residual_gate_init": 3.0, "residual_gate_slot_status_type": "mean", "residual_gate_type": "elementwise", "residual_per_slot_gate": true, "rope_base": 100000, "seq_len": 8192, "stm_size": 4096, "use_flash_attention": true, "use_gated_residual": true, "use_tanh_residual_gate": false }