{ "decoder_config": { "att_groups": 4, "att_heads": 16, "att_query_groups": 8, "cross_att_type": "sqa", "dense_layer_dim": 1536, "embed_dim": 512, "ff_activation": "silu", "ff_dim": 192, "ff_dropout": 0.0, "final_stateless_layers_config": [ "moe", "moe" ], "head_norm_type": "rms_norm", "moe_bias_mode": "global", "moe_grouped_gemm": true, "moe_shared_experts_bias_mode": "global", "moe_top_k": 10, "moe_use_cutlass_grouped_gemm": true, "moe_use_weighted_shared_experts": false, "num_experts": 384, "num_layers": 21, "num_shared_experts": 2, "rope_base": 100000, "router_amp": true, "router_dtype": "bfloat16", "self_att_type": "sqa", "seq_len": 8192, "shared_expert_dim": 384, "stateless_layers_config": [ "dense", "moe" ], "stm_size": 4096, "use_attention_output_bias": false, "use_flash_attention": true, "use_gated": true, "use_gated_attention": true, "use_gated_cross_attention": false, "use_head_norm": true, "use_moe": true, "use_vectorized_moe": true, "vocab_size": 65536 }, "encoder_config": { "att_groups": 8, "att_heads": 16, "att_query_groups": 8, "cross_att_type": "sqa", "embed_dim": 512, "ff_activation": "silu", "ff_dim": 1536, "ff_dropout": 0.0, "num_layers": 21, "rope_base": 100000, "self_att_type": "sqa", "seq_len": 8192, "skip_memory_cross_attention": true, "stm_size": 4096, "use_attention_output_bias": false, "use_flash_attention": true, "use_gated": true, "use_gated_attention": true, "vocab_size": 65536 }, "memory_attention_config": { "att_groups": 8, "att_heads": 16, "att_query_groups": 8, "att_type": "sqa", "embed_dim": 512, "interlayer_att_groups": 8, "interlayer_att_query_groups": 8, "interlayer_att_type": "sqa", "norm_type": "classic-rms", "num_groups": 3, "num_layers": 21, "residual_gate_type": "elementwise", "residual_per_slot_gate": true, "rope_base": 100000, "seq_len": 8192, "stm_size": 4096, "use_flash_attention": false, "use_gated_residual": true, "use_tanh_residual_gate": false }, "memory_attention_variant": "grouped-self-interlayer", "system_prompt_title": "SYSTEM INSTRUCTIONS", "tokenizer": null, "tokenizer_config": { "answer_token_id": 6, "bos_token_id": 2, "eos_token_id": 3, "internal_token_id": 8, "pad_token_id": 0, "query_token_id": 5, "think_token_id": 7, "tool_call_token_id": 9, "tool_use_token_id": 10 } }