| { |
| "producer": { |
| "name": "modelopt", |
| "version": "0.37.0" |
| }, |
| "architecture": "Qwen3ForCausalLM", |
| "dtype": "bfloat16", |
| "logits_dtype": "float16", |
| "num_hidden_layers": 36, |
| "num_attention_heads": 32, |
| "num_key_value_heads": 8, |
| "hidden_size": 2560, |
| "norm_epsilon": 1e-06, |
| "vocab_size": 151936, |
| "max_position_embeddings": 40960, |
| "hidden_act": "silu", |
| "use_parallel_embedding": true, |
| "embedding_sharding_dim": 0, |
| "head_size": 128, |
| "intermediate_size": 9728, |
| "position_embedding_type": "rope_gpt_neox", |
| "share_embedding_table": false, |
| "residual_mlp": false, |
| "bias": false, |
| "rotary_pct": 1.0, |
| "rank": 0, |
| "decoder": "qwen", |
| "rmsnorm": true, |
| "lm_head_bias": false, |
| "mlp_bias": false, |
| "attn_bias": false, |
| "rotary_base": 1000000, |
| "rotary_scaling": null, |
| "disable_weight_only_quant_plugin": false, |
| "num_labels": 1, |
| "use_logn_attn": false, |
| "mlp_only_layers": [], |
| "decoder_sparse_step": 1, |
| "moe": { |
| "num_experts": 0, |
| "shared_expert_intermediate_size": 0, |
| "top_k": 0, |
| "normalization_mode": 0, |
| "sparse_mixer_epsilon": 0.01, |
| "tp_mode": 0, |
| "device_limited_n_group": 0, |
| "device_limited_topk_group": 0, |
| "device_limited_routed_scaling_factor": 1.0 |
| }, |
| "runtime_defaults": null, |
| "mapping": { |
| "world_size": 1, |
| "gpus_per_node": 8, |
| "cp_size": 1, |
| "tp_size": 1, |
| "pp_size": 1, |
| "moe_tp_size": 1, |
| "moe_cluster_size": 1, |
| "moe_ep_size": 1, |
| "attn_tp_size": 1, |
| "attn_cp_size": 1, |
| "cp_config": {}, |
| "enable_attention_dp": false, |
| "enable_lm_head_tp_in_adp": false |
| }, |
| "quantization": { |
| "quant_algo": "FP8", |
| "kv_cache_quant_algo": "FP8", |
| "group_size": 128, |
| "smoothquant_val": 0.5, |
| "clamp_val": null, |
| "use_meta_recipe": false, |
| "has_zero_point": false, |
| "pre_quant_scale": false, |
| "exclude_modules": [ |
| "lm_head" |
| ], |
| "mamba_ssm_cache_dtype": null |
| }, |
| "qk_layernorm": false, |
| "rotary_embedding_dim": 128, |
| "seq_length": 8192, |
| "qwen_type": "qwen3", |
| "moe_intermediate_size": 0, |
| "moe_shared_expert_intermediate_size": 0, |
| "tie_word_embeddings": true, |
| "model_type": "qwen" |
| } |