FreedomIntelligence
/

openPangu-Ultra-MoE-718B

pangu_ultra_moe

Model card Files Files and versions

openPangu-Ultra-MoE-718B / inference /vllm_ascend /patch /worker /patch_common /patch_config.py

wangrongsheng's picture

Add files using upload-large-folder tool

a86f2f6 verified 4 months ago

history blame contribute delete

3.77 kB

	#
	# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
	# This file is a part of the vllm-ascend project.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	from vllm.config import ModelConfig


	def get_attr_by_names(src_config, attrs, default_value):
	for attr in attrs:
	value = getattr(src_config, attr, 0)
	if value > 0:
	return value
	return default_value


	def _verify_with_expert_parallelism(self) -> None:
	num_expert_names = [
	"moe_num_experts", # Dbrx
	"num_experts", # Jamba
	"n_routed_experts", # DeepSeek
	"num_local_experts", # Mixtral
	"num_routed_experts", # Pangu
	]
	num_experts = 0
	for name in num_expert_names:
	num_experts = getattr(self.hf_text_config, name, 0)
	if num_experts > 0:
	break
	if num_experts < 1:
	raise ValueError(
	"Number of experts in the model must be greater than 0 "
	"when expert parallelism is enabled.")


	@property
	def is_deepseek_mla(self) -> bool:
	kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
	kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, None)
	if not hasattr(self.hf_text_config, "model_type"):
	return False
	elif self.hf_text_config.model_type in \
	('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
	return kv_lora_dim is not None
	elif self.hf_text_config.model_type == 'eagle':
	# if the model is an EAGLE module, check for the
	# underlying architecture
	return self.hf_text_config.model.model_type in \
	('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
	and kv_lora_dim is not None
	return False


	def get_head_size(self) -> int:
	if self.is_deepseek_mla:
	qk_rope_dim_names = ['attention_qk_rope_dim', 'qk_rope_head_dim']
	kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
	qk_rope_dim = get_attr_by_names(self.hf_text_config, qk_rope_dim_names, 0)
	kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, 0)
	if self.use_mla:
	return kv_lora_dim + qk_rope_dim
	else:
	qk_dim_names = ['attention_qk_dim', 'qk_nope_head_dim']
	qk_dim = get_attr_by_names(self.hf_text_config, qk_dim_names, 0)
	if qk_rope_dim and qk_dim:
	return qk_rope_dim + qk_dim
	if hasattr(self.hf_text_config,
	"model_type") and (self.hf_text_config.model_type
	== "zamba2"):
	return self.hf_text_config.attention_head_dim

	if self.is_attention_free:
	return 0

	# NOTE: Some configs may set head_dim=None in the config
	if getattr(self.hf_text_config, "head_dim", None) is not None:
	return self.hf_text_config.head_dim

	# FIXME(woosuk): This may not be true for all models.
	return (self.hf_text_config.hidden_size //
	self.hf_text_config.num_attention_heads)


	ModelConfig._verify_with_expert_parallelism = _verify_with_expert_parallelism
	ModelConfig.is_deepseek_mla = is_deepseek_mla
	ModelConfig.get_head_size = get_head_size