dill-dev commited on
Commit
2f08f53
·
verified ·
1 Parent(s): 21be5fa

Create configuration_momo.py

Browse files
Files changed (1) hide show
  1. configuration_momo.py +26 -0
configuration_momo.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PretrainedConfig
2
+
3
+ class MomoConfig(PretrainedConfig):
4
+ model_type = "momo"
5
+ def __init__(
6
+ self,
7
+ vocab_size=32000, hidden_size=1024, intermediate_size=2752,
8
+ num_hidden_layers=24, num_attention_heads=16, num_key_value_heads=8,
9
+ max_position_embeddings=512, rope_theta=10000.0, rms_norm_eps=1e-5,
10
+ use_gradient_checkpointing=False,
11
+ model_name='Momo-336M', model_version='1.0',
12
+ **kwargs
13
+ ):
14
+ super().__init__(**kwargs)
15
+ self.vocab_size = vocab_size
16
+ self.hidden_size = hidden_size
17
+ self.intermediate_size = intermediate_size
18
+ self.num_hidden_layers = num_hidden_layers
19
+ self.num_attention_heads = num_attention_heads
20
+ self.num_key_value_heads = num_key_value_heads
21
+ self.max_position_embeddings = max_position_embeddings
22
+ self.rope_theta = rope_theta
23
+ self.rms_norm_eps = rms_norm_eps
24
+ self.use_gradient_checkpointing = use_gradient_checkpointing
25
+ self.model_name = model_name
26
+ self.model_version = model_version