File size: 3,766 Bytes
88a424e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from vllm.config import ModelConfig
def get_attr_by_names(src_config, attrs, default_value):
for attr in attrs:
value = getattr(src_config, attr, 0)
if value > 0:
return value
return default_value
def _verify_with_expert_parallelism(self) -> None:
num_expert_names = [
"moe_num_experts", # Dbrx
"num_experts", # Jamba
"n_routed_experts", # DeepSeek
"num_local_experts", # Mixtral
"num_routed_experts", # Pangu
]
num_experts = 0
for name in num_expert_names:
num_experts = getattr(self.hf_text_config, name, 0)
if num_experts > 0:
break
if num_experts < 1:
raise ValueError(
"Number of experts in the model must be greater than 0 "
"when expert parallelism is enabled.")
@property
def is_deepseek_mla(self) -> bool:
kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, None)
if not hasattr(self.hf_text_config, "model_type"):
return False
elif self.hf_text_config.model_type in \
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
return kv_lora_dim is not None
elif self.hf_text_config.model_type == 'eagle':
# if the model is an EAGLE module, check for the
# underlying architecture
return self.hf_text_config.model.model_type in \
('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
and kv_lora_dim is not None
return False
def get_head_size(self) -> int:
if self.is_deepseek_mla:
qk_rope_dim_names = ['attention_qk_rope_dim', 'qk_rope_head_dim']
kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
qk_rope_dim = get_attr_by_names(self.hf_text_config, qk_rope_dim_names, 0)
kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, 0)
if self.use_mla:
return kv_lora_dim + qk_rope_dim
else:
qk_dim_names = ['attention_qk_dim', 'qk_nope_head_dim']
qk_dim = get_attr_by_names(self.hf_text_config, qk_dim_names, 0)
if qk_rope_dim and qk_dim:
return qk_rope_dim + qk_dim
if hasattr(self.hf_text_config,
"model_type") and (self.hf_text_config.model_type
== "zamba2"):
return self.hf_text_config.attention_head_dim
if self.is_attention_free:
return 0
# NOTE: Some configs may set head_dim=None in the config
if getattr(self.hf_text_config, "head_dim", None) is not None:
return self.hf_text_config.head_dim
# FIXME(woosuk): This may not be true for all models.
return (self.hf_text_config.hidden_size //
self.hf_text_config.num_attention_heads)
ModelConfig._verify_with_expert_parallelism = _verify_with_expert_parallelism
ModelConfig.is_deepseek_mla = is_deepseek_mla
ModelConfig.get_head_size = get_head_size
|