anthonym21 commited on
Commit
92c2fb8
·
verified ·
1 Parent(s): 2308f67

Upload configuration_eve.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_eve.py +37 -0
configuration_eve.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import PretrainedConfig
3
+
4
+ class EveConfig(PretrainedConfig):
5
+ model_type = "eve_moe"
6
+
7
+ def __init__(
8
+ self,
9
+ vocab_size=50304,
10
+ n_layer=12,
11
+ n_embd=512,
12
+ n_head=8,
13
+ head_dim=64,
14
+ block_size=2048,
15
+ num_experts=8,
16
+ top_k=2,
17
+ expert_intermediate_size=1408,
18
+ shared_expert_intermediate_size=1408,
19
+ router_aux_loss_coef=0.01,
20
+ use_checkpointing=False,
21
+ rope_theta=10000.0,
22
+ **kwargs,
23
+ ):
24
+ self.vocab_size = vocab_size
25
+ self.n_layer = n_layer
26
+ self.n_embd = n_embd
27
+ self.n_head = n_head
28
+ self.head_dim = head_dim
29
+ self.block_size = block_size
30
+ self.num_experts = num_experts
31
+ self.top_k = top_k
32
+ self.expert_intermediate_size = expert_intermediate_size
33
+ self.shared_expert_intermediate_size = shared_expert_intermediate_size
34
+ self.router_aux_loss_coef = router_aux_loss_coef
35
+ self.use_checkpointing = use_checkpointing
36
+ self.rope_theta = rope_theta
37
+ super().__init__(**kwargs)