{ "d_model": 768, "d_intermediate": 0, "n_layer": 10, "vocab_size": 50280, "ssm_cfg": {}, "attn_layer_idx": [ 1, 3, 5, 6, 8 ], "attn_cfg": { "num_heads": 8 }, "rms_norm": true, "residual_in_fp32": true, "fused_add_norm": true, "pad_vocab_size_multiple": 8, "tie_embeddings": true }