dill-dev commited on
Commit
4f1e40d
·
verified ·
1 Parent(s): ed64422

Create configuration_momo.py

Browse files
Files changed (1) hide show
  1. configuration_momo.py +38 -17
configuration_momo.py CHANGED
@@ -1,26 +1,47 @@
 
 
 
 
1
  from transformers import PretrainedConfig
2
 
 
3
  class MomoConfig(PretrainedConfig):
4
  model_type = "momo"
 
5
  def __init__(
6
  self,
7
- vocab_size=32000, hidden_size=1024, intermediate_size=2752,
8
- num_hidden_layers=24, num_attention_heads=16, num_key_value_heads=8,
9
- max_position_embeddings=512, rope_theta=10000.0, rms_norm_eps=1e-5,
 
 
 
 
 
 
10
  use_gradient_checkpointing=False,
11
- model_name='Momo-336M', model_version='1.0',
12
- **kwargs
 
 
 
 
13
  ):
14
- super().__init__(**kwargs)
15
- self.vocab_size = vocab_size
16
- self.hidden_size = hidden_size
17
- self.intermediate_size = intermediate_size
18
- self.num_hidden_layers = num_hidden_layers
19
- self.num_attention_heads = num_attention_heads
20
- self.num_key_value_heads = num_key_value_heads
21
- self.max_position_embeddings = max_position_embeddings
22
- self.rope_theta = rope_theta
23
- self.rms_norm_eps = rms_norm_eps
 
 
 
 
 
24
  self.use_gradient_checkpointing = use_gradient_checkpointing
25
- self.model_name = model_name
26
- self.model_version = model_version
 
1
+ # configuration_momo.py
2
+ # 🌸 Momo-336M — HuggingFace compatible config
3
+ # Upload this file to your HF repo alongside modeling_momo.py and config.json
4
+
5
  from transformers import PretrainedConfig
6
 
7
+
8
  class MomoConfig(PretrainedConfig):
9
  model_type = "momo"
10
+
11
  def __init__(
12
  self,
13
+ vocab_size=32000,
14
+ hidden_size=1024,
15
+ intermediate_size=2752,
16
+ num_hidden_layers=24,
17
+ num_attention_heads=16,
18
+ num_key_value_heads=8,
19
+ max_position_embeddings=512,
20
+ rope_theta=10000.0,
21
+ rms_norm_eps=1e-5,
22
  use_gradient_checkpointing=False,
23
+ model_name="Momo-336M",
24
+ model_version="1.0",
25
+ pad_token_id=3,
26
+ bos_token_id=1,
27
+ eos_token_id=0,
28
+ **kwargs,
29
  ):
30
+ super().__init__(
31
+ pad_token_id=pad_token_id,
32
+ bos_token_id=bos_token_id,
33
+ eos_token_id=eos_token_id,
34
+ **kwargs,
35
+ )
36
+ self.vocab_size = vocab_size
37
+ self.hidden_size = hidden_size
38
+ self.intermediate_size = intermediate_size
39
+ self.num_hidden_layers = num_hidden_layers
40
+ self.num_attention_heads = num_attention_heads
41
+ self.num_key_value_heads = num_key_value_heads
42
+ self.max_position_embeddings = max_position_embeddings
43
+ self.rope_theta = rope_theta
44
+ self.rms_norm_eps = rms_norm_eps
45
  self.use_gradient_checkpointing = use_gradient_checkpointing
46
+ self.model_name = model_name
47
+ self.model_version = model_version