{ "name_or_path": "tclf90/GLM-5.2-Int4-Int8Mix", "architectures": [ "GlmMoeDsaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "dtype": "bfloat16", "eos_token_id": [ 154820, 154827, 154829 ], "ep_size": 1, "first_k_dense_replace": 3, "head_dim": 192, "hidden_act": "silu", "hidden_size": 6144, "index_head_dim": 128, "index_n_heads": 32, "index_share_for_mtp_iteration": true, "index_skip_topk_offset": 3, "index_topk": 2048, "index_topk_freq": 4, "index_topk_pattern": null, "indexer_rope_interleave": true, "indexer_types": [ "full", "full", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared", "full", "shared", "shared", "shared" ], "initializer_range": 0.02, "intermediate_size": 12288, "kv_lora_rank": 512, "max_position_embeddings": 1048576, "mlp_layer_types": [ "dense", "dense", "dense", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse", "sparse" ], "model_type": "glm_moe_dsa", "moe_intermediate_size": 2048, "moe_layer_freq": 1, "n_group": 1, "n_routed_experts": 256, "n_shared_experts": 1, "norm_topk_prob": true, "num_attention_heads": 64, "num_experts_per_tok": 8, "num_hidden_layers": 78, "num_key_value_heads": 64, "num_nextn_predict_layers": 1, "pad_token_id": 154820, "pretraining_tp": 1, "q_lora_rank": 2048, "qk_head_dim": 256, "qk_nope_head_dim": 192, "qk_rope_head_dim": 64, "rms_norm_eps": 1e-05, "rope_interleave": true, "rope_parameters": { "rope_theta": 8000000, "rope_type": "default" }, "routed_scaling_factor": 2.5, "scoring_func": "sigmoid", "tie_word_embeddings": false, "topk_group": 1, "topk_method": "noaux_tc", "transformers_version": "5.12.0", "use_cache": true, "v_head_dim": 256, "vocab_size": 154880, "quantization_config": { "quant_method": "compressed-tensors", "format": "pack-quantized", "ignore": [ "re:model[.]layers[.]0[.].*", "re:model[.]layers[.][1-9][0-9]*[.](?:mtp_block[.])?mlp[.]gate(?:$|[.].*)", "re:model[.]layers[.][1-9][0-9]*[.](?:mtp_block[.])?self_attn[.]indexer(?:$|[.].*)", "re:model[.]layers[.][1-9][0-9]*[.](?:mtp_block[.])?self_attn[.]indexers_proj(?:$|[.].*)", "re:model[.]layers[.][1-9][0-9]*[.](?:eh_proj|enorm|hnorm)[.].*", "re:model[.]layers[.][1-9][0-9]*[.]shared_head[.]norm[.].*", "re:model[.]layers[.][1-9][0-9]*[.]shared_head[.]head(?:$|[.].*)" ], "config_groups": { "w4a16_experts": { "targets": [ "re:model[.]layers[.](?:3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77)[.]mlp[.]experts[.][0-9]+[.](?:gate_proj|up_proj|down_proj)$" ], "weights": { "num_bits": 4, "type": "int", "symmetric": true, "strategy": "group", "group_size": 128, "dynamic": false } }, "w8a16_linears": { "targets": [ "re:model[.]layers[.](?:1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|41|42|43|44|45|46|47|48|49|50|51|52|53|54|55|56|57|58|59|60|61|62|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77)[.](?:self_attn[.](?:fused_qkv_a_proj_with_mqa|q_a_proj|q_b_proj|kv_a_proj_with_mqa|kv_b_proj|o_proj)|mlp[.](?:gate_up_proj|gate_proj|up_proj|down_proj|shared_experts[.](?:gate_up_proj|gate_proj|up_proj|down_proj)))$" ], "weights": { "num_bits": 8, "type": "int", "symmetric": true, "strategy": "group", "group_size": 128, "dynamic": false } }, "w8a16_mtp_channel": { "targets": [ "re:model[.]layers[.](?:78)[.](?:mtp_block[.])?(?:self_attn[.](?:fused_qkv_a_proj_with_mqa|q_a_proj|q_b_proj|kv_a_proj_with_mqa|kv_b_proj|o_proj)|mlp[.](?:experts[.][0-9]+[.](?:gate_proj|up_proj|down_proj)|gate_up_proj|gate_proj|up_proj|down_proj|shared_experts[.](?:gate_up_proj|gate_proj|up_proj|down_proj)))$" ], "weights": { "num_bits": 8, "type": "int", "symmetric": true, "strategy": "channel", "group_size": -1, "dynamic": false } } }, "packed_modules_mapping": { "fused_qkv_a_proj_with_mqa": [ "q_a_proj", "kv_a_proj_with_mqa" ], "gate_up_proj": [ "gate_proj", "up_proj" ] } } }