{ "architectures": [ "PanguEmbeddedForCausalLM" ], "auto_map": { "AutoConfig": "configuration_openpangu_dense.PanguEmbeddedConfig", "AutoModel": "modeling_openpangu_dense.PanguEmbeddedModel", "AutoModelForCausalLM": "modeling_openpangu_dense.PanguEmbeddedForCausalLM" }, "model_type": "PanguEmbedded", "hidden_size": 4096, "intermediate_size": 16384, "num_hidden_layers": 27, "num_attention_heads": 32, "num_nextn_predict_layers": 1, "num_key_value_groups": 4, "num_key_value_heads": 8, "head_dim": 128, "v_channels": 128, "qk_nope_dim": 96, "qk_rope_dim":32, "max_position_embeddings": 65536, "initializer_range": 0.02, "rms_norm_eps": 1e-05, "rotary_percent": 0.25, "rope_theta": 16000000.0, "vocab_size": 153376, "hidden_act": "silu", "attention_dropout": 0.0, "bias": true, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.53.2", "use_cache": true, "bos_token_id": 1, "pad_token_id": 0, "eos_token_id": 45892, "_attn_implementation": "eager", "sliding_window": 128, "param_sink_number": 128, "param_sink_with_value": true, "layer_types": [ "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention", "full_attention", "sliding_attention" ], "num_sink_tokens": 128, "attn_groupnorm": true, "attn_elementwise_gate": true, "router_sliding_window": 3, "router_win_decay": 0.5 }