# # Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved. # This file is a part of the vllm-ascend project. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # from vllm.config import ModelConfig def get_attr_by_names(src_config, attrs, default_value): for attr in attrs: value = getattr(src_config, attr, 0) if value > 0: return value return default_value def _verify_with_expert_parallelism(self) -> None: num_expert_names = [ "moe_num_experts", # Dbrx "num_experts", # Jamba "n_routed_experts", # DeepSeek "num_local_experts", # Mixtral "num_routed_experts", # Pangu ] num_experts = 0 for name in num_expert_names: num_experts = getattr(self.hf_text_config, name, 0) if num_experts > 0: break if num_experts < 1: raise ValueError( "Number of experts in the model must be greater than 0 " "when expert parallelism is enabled.") @property def is_deepseek_mla(self) -> bool: kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank'] kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, None) if not hasattr(self.hf_text_config, "model_type"): return False elif self.hf_text_config.model_type in \ ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'): return kv_lora_dim is not None elif self.hf_text_config.model_type == 'eagle': # if the model is an EAGLE module, check for the # underlying architecture return self.hf_text_config.model.model_type in \ ('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \ and kv_lora_dim is not None return False def get_head_size(self) -> int: if self.is_deepseek_mla: qk_rope_dim_names = ['attention_qk_rope_dim', 'qk_rope_head_dim'] kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank'] qk_rope_dim = get_attr_by_names(self.hf_text_config, qk_rope_dim_names, 0) kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, 0) if self.use_mla: return kv_lora_dim + qk_rope_dim else: qk_dim_names = ['attention_qk_dim', 'qk_nope_head_dim'] qk_dim = get_attr_by_names(self.hf_text_config, qk_dim_names, 0) if qk_rope_dim and qk_dim: return qk_rope_dim + qk_dim if hasattr(self.hf_text_config, "model_type") and (self.hf_text_config.model_type == "zamba2"): return self.hf_text_config.attention_head_dim if self.is_attention_free: return 0 # NOTE: Some configs may set head_dim=None in the config if getattr(self.hf_text_config, "head_dim", None) is not None: return self.hf_text_config.head_dim # FIXME(woosuk): This may not be true for all models. return (self.hf_text_config.hidden_size // self.hf_text_config.num_attention_heads) ModelConfig._verify_with_expert_parallelism = _verify_with_expert_parallelism ModelConfig.is_deepseek_mla = is_deepseek_mla ModelConfig.get_head_size = get_head_size