# configuration_eve.py from __future__ import annotations from typing import Any, Optional from transformers import PretrainedConfig class EveConfig(PretrainedConfig): model_type = "eve_moe" attribute_map = { "num_hidden_layers": "n_layer", "num_attention_heads": "n_head", "hidden_size": "n_embd", "max_position_embeddings": "block_size", } def __init__( self, vocab_size: int = 50304, n_layer: int = 12, n_embd: int = 512, n_head: int = 8, head_dim: int = 64, block_size: int = 2048, num_experts: int = 8, top_k: int = 2, expert_intermediate_size: int = 1408, shared_expert_intermediate_size: int = 1408, router_aux_loss_coef: float = 0.01, use_checkpointing: bool = False, rope_theta: float = 10000.0, **kwargs: Any, ): self.vocab_size = vocab_size self.n_layer = n_layer self.n_embd = n_embd self.n_head = n_head self.head_dim = head_dim self.block_size = block_size self.num_experts = num_experts self.top_k = top_k self.expert_intermediate_size = expert_intermediate_size self.shared_expert_intermediate_size = shared_expert_intermediate_size self.router_aux_loss_coef = router_aux_loss_coef self.use_checkpointing = use_checkpointing self.rope_theta = rope_theta super().__init__(**kwargs) __all__ = ["EveConfig"]