{ "producer": { "name": "modelopt", "version": "0.37.0" }, "architecture": "Qwen3ForCausalLM", "dtype": "bfloat16", "logits_dtype": "float16", "num_hidden_layers": 36, "num_attention_heads": 32, "num_key_value_heads": 8, "hidden_size": 2560, "norm_epsilon": 1e-06, "vocab_size": 151936, "max_position_embeddings": 40960, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "head_size": 128, "intermediate_size": 9728, "position_embedding_type": "rope_gpt_neox", "share_embedding_table": false, "residual_mlp": false, "bias": false, "rotary_pct": 1.0, "rank": 0, "decoder": "qwen", "rmsnorm": true, "lm_head_bias": false, "mlp_bias": false, "attn_bias": false, "rotary_base": 1000000, "rotary_scaling": null, "disable_weight_only_quant_plugin": false, "num_labels": 1, "use_logn_attn": false, "mlp_only_layers": [], "decoder_sparse_step": 1, "moe": { "num_experts": 0, "shared_expert_intermediate_size": 0, "top_k": 0, "normalization_mode": 0, "sparse_mixer_epsilon": 0.01, "tp_mode": 0, "device_limited_n_group": 0, "device_limited_topk_group": 0, "device_limited_routed_scaling_factor": 1.0 }, "runtime_defaults": null, "mapping": { "world_size": 1, "gpus_per_node": 8, "cp_size": 1, "tp_size": 1, "pp_size": 1, "moe_tp_size": 1, "moe_cluster_size": 1, "moe_ep_size": 1, "attn_tp_size": 1, "attn_cp_size": 1, "cp_config": {}, "enable_attention_dp": false, "enable_lm_head_tp_in_adp": false }, "quantization": { "quant_algo": "FP8", "kv_cache_quant_algo": "FP8", "group_size": 128, "smoothquant_val": 0.5, "clamp_val": null, "use_meta_recipe": false, "has_zero_point": false, "pre_quant_scale": false, "exclude_modules": [ "lm_head" ], "mamba_ssm_cache_dtype": null }, "qk_layernorm": false, "rotary_embedding_dim": 128, "seq_length": 8192, "qwen_type": "qwen3", "moe_intermediate_size": 0, "moe_shared_expert_intermediate_size": 0, "tie_word_embeddings": true, "model_type": "qwen" }