| { |
| "architectures": [ |
| "PanguProMoEV2ForCausalLM" |
| ], |
| "attention_dropout": 0.0, |
| "auto_map": { |
| "AutoConfig": "configuration_pangu_moe.PanguProMoEConfig", |
| "AutoModel": "modeling_pangu_moe.PanguProMoEModel", |
| "AutoModelForCausalLM": "modeling_pangu_moe.PanguProMoEV2ForCausalLM" |
| }, |
| "mlp_only_layers": [ |
| 0, |
| 1, |
| 2, |
| 3 |
| ], |
| "bos_token_id": 1, |
| "eos_token_id": 45892, |
| "hidden_act": "silu", |
| "hidden_size": 4608, |
| "initializer_range": 0.02, |
| "intermediate_size": 10240, |
| "max_position_embeddings": 131072, |
| "model_type": "PanguProMoE", |
| "moe_intermediate_size": 1280, |
| "num_attention_heads": 64, |
| "num_experts": 80, |
| "norm_topk_prob": true, |
| "router_enable_expert_bias": true, |
| "num_experts_per_tok": 8, |
| "num_hidden_layers": 50, |
| "num_key_value_heads": 4, |
| "output_router_logits": false, |
| "rms_norm_eps": 1e-05, |
| "rope_theta": 25600000, |
| "routed_scaling_factor": 2.5, |
| "sandwich_norm": true, |
| "shared_expert_intermediate_size": 2560, |
| "tie_word_embeddings": false, |
| "torch_dtype": "bfloat16", |
| "transformers_version": "4.48.2", |
| "use_cache": true, |
| "vocab_size": 153600, |
| "qk_nope_dim": 128, |
| "qk_rope_dim": 64, |
| "v_channels": 128, |
| "param_sink_number": 128, |
| "param_sink_with_value": true, |
| "num_mtp_layers": 1, |
| "mtp_quantize": "w8a8_dynamic", |
| "quantize": "w8a8_dynamic", |
| "quantization_config": { |
| "config_groups": { |
| "group_0": { |
| "input_activations": { |
| "actorder": null, |
| "block_structure": null, |
| "dynamic": true, |
| "group_size": null, |
| "num_bits": 8, |
| "observer": "memoryless", |
| "observer_kwargs": {}, |
| "strategy": "token", |
| "symmetric": true, |
| "type": "int" |
| }, |
| "output_activations": null, |
| "targets": [ |
| "Linear" |
| ], |
| "weights": { |
| "actorder": null, |
| "block_structure": null, |
| "dynamic": false, |
| "group_size": null, |
| "num_bits": 8, |
| "observer": "minmax", |
| "observer_kwargs": {}, |
| "strategy": "channel", |
| "symmetric": true, |
| "type": "int" |
| } |
| } |
| }, |
| "format": "int-quantized", |
| "kv_cache_scheme": { |
| "num_bits": 8, |
| "type": "int", |
| "strategy": "channel", |
| "dynamic": false, |
| "symmetric": true |
| }, |
| "quant_method": "compressed-tensors", |
| "quantization_status": "compressed" |
| } |
| } |
|
|