Eve-2-MoE-NanoSQL-272M / configuration_eve.py
anthonym21's picture
Upload folder using huggingface_hub
cbbb431 verified
# configuration_eve.py
from __future__ import annotations
from typing import Any, Optional
from transformers import PretrainedConfig
class EveConfig(PretrainedConfig):
model_type = "eve_moe"
attribute_map = {
"num_hidden_layers": "n_layer",
"num_attention_heads": "n_head",
"hidden_size": "n_embd",
"max_position_embeddings": "block_size",
}
def __init__(
self,
vocab_size: int = 50304,
n_layer: int = 12,
n_embd: int = 512,
n_head: int = 8,
head_dim: int = 64,
block_size: int = 2048,
num_experts: int = 8,
top_k: int = 2,
expert_intermediate_size: int = 1408,
shared_expert_intermediate_size: int = 1408,
router_aux_loss_coef: float = 0.01,
use_checkpointing: bool = False,
rope_theta: float = 10000.0,
**kwargs: Any,
):
self.vocab_size = vocab_size
self.n_layer = n_layer
self.n_embd = n_embd
self.n_head = n_head
self.head_dim = head_dim
self.block_size = block_size
self.num_experts = num_experts
self.top_k = top_k
self.expert_intermediate_size = expert_intermediate_size
self.shared_expert_intermediate_size = shared_expert_intermediate_size
self.router_aux_loss_coef = router_aux_loss_coef
self.use_checkpointing = use_checkpointing
self.rope_theta = rope_theta
super().__init__(**kwargs)
__all__ = ["EveConfig"]