File size: 1,561 Bytes
cbbb431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# configuration_eve.py
from __future__ import annotations

from typing import Any, Optional
from transformers import PretrainedConfig


class EveConfig(PretrainedConfig):
    model_type = "eve_moe"
    attribute_map = {
        "num_hidden_layers": "n_layer",
        "num_attention_heads": "n_head",
        "hidden_size": "n_embd",
        "max_position_embeddings": "block_size",
    }

    def __init__(

        self,

        vocab_size: int = 50304,

        n_layer: int = 12,

        n_embd: int = 512,

        n_head: int = 8,

        head_dim: int = 64,

        block_size: int = 2048,

        num_experts: int = 8,

        top_k: int = 2,

        expert_intermediate_size: int = 1408,

        shared_expert_intermediate_size: int = 1408,

        router_aux_loss_coef: float = 0.01,

        use_checkpointing: bool = False,

        rope_theta: float = 10000.0,

        **kwargs: Any,

    ):
        self.vocab_size = vocab_size
        self.n_layer = n_layer
        self.n_embd = n_embd
        self.n_head = n_head
        self.head_dim = head_dim
        self.block_size = block_size
        self.num_experts = num_experts
        self.top_k = top_k
        self.expert_intermediate_size = expert_intermediate_size
        self.shared_expert_intermediate_size = shared_expert_intermediate_size
        self.router_aux_loss_coef = router_aux_loss_coef
        self.use_checkpointing = use_checkpointing
        self.rope_theta = rope_theta
        super().__init__(**kwargs)


__all__ = ["EveConfig"]