File size: 3,766 Bytes
88a424e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
# This file is a part of the vllm-ascend project.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from vllm.config import ModelConfig


def get_attr_by_names(src_config, attrs, default_value):
    for attr in attrs:
        value = getattr(src_config, attr, 0)
        if value > 0:
            return value
    return default_value


def _verify_with_expert_parallelism(self) -> None:
        num_expert_names = [
            "moe_num_experts",  # Dbrx
            "num_experts",  # Jamba
            "n_routed_experts",  # DeepSeek
            "num_local_experts",  # Mixtral
            "num_routed_experts",  # Pangu
        ]
        num_experts = 0
        for name in num_expert_names:
            num_experts = getattr(self.hf_text_config, name, 0)
            if num_experts > 0:
                break
        if num_experts < 1:
            raise ValueError(
                "Number of experts in the model must be greater than 0 "
                "when expert parallelism is enabled.")


@property
def is_deepseek_mla(self) -> bool:
    kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
    kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, None)
    if not hasattr(self.hf_text_config, "model_type"):
        return False
    elif self.hf_text_config.model_type in \
        ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'pangu_ultra_moe'):
        return kv_lora_dim is not None
    elif self.hf_text_config.model_type == 'eagle':
        # if the model is an EAGLE module, check for the
        # underlying architecture
        return self.hf_text_config.model.model_type in \
                ('deepseek_v2', 'deepseek_v3', 'pangu_ultra_moe') \
            and kv_lora_dim is not None
    return False


def get_head_size(self) -> int:
    if self.is_deepseek_mla:
        qk_rope_dim_names = ['attention_qk_rope_dim', 'qk_rope_head_dim']
        kv_lora_dim_names = ['attention_kv_lora_dim', 'kv_lora_rank']
        qk_rope_dim = get_attr_by_names(self.hf_text_config, qk_rope_dim_names, 0)
        kv_lora_dim = get_attr_by_names(self.hf_text_config, kv_lora_dim_names, 0)
        if self.use_mla:
            return kv_lora_dim + qk_rope_dim
        else:
            qk_dim_names = ['attention_qk_dim', 'qk_nope_head_dim']
            qk_dim = get_attr_by_names(self.hf_text_config, qk_dim_names, 0)
            if qk_rope_dim and qk_dim:
                return qk_rope_dim + qk_dim
    if hasattr(self.hf_text_config,
                "model_type") and (self.hf_text_config.model_type
                                    == "zamba2"):
        return self.hf_text_config.attention_head_dim

    if self.is_attention_free:
        return 0

    # NOTE: Some configs may set head_dim=None in the config
    if getattr(self.hf_text_config, "head_dim", None) is not None:
        return self.hf_text_config.head_dim

    # FIXME(woosuk): This may not be true for all models.
    return (self.hf_text_config.hidden_size //
            self.hf_text_config.num_attention_heads)


ModelConfig._verify_with_expert_parallelism = _verify_with_expert_parallelism
ModelConfig.is_deepseek_mla = is_deepseek_mla
ModelConfig.get_head_size = get_head_size