KitsuVp commited on
Commit
2fa9200
·
verified ·
1 Parent(s): 6a6bfb0

Update configuration_neollm.py

Browse files
Files changed (1) hide show
  1. configuration_neollm.py +15 -8
configuration_neollm.py CHANGED
@@ -5,18 +5,19 @@ from transformers.utils import logging
5
 
6
  logger = logging.get_logger(__name__)
7
 
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
- This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
16
 
17
  def __init__(
18
  self,
19
- vocab_size=151665,
20
  hidden_size=512,
21
  intermediate_size=1536,
22
  num_hidden_layers=12,
@@ -40,8 +41,9 @@ class NeoLLMConfig(PretrainedConfig):
40
  linear_num_value_heads=16,
41
  layer_types=None,
42
  fan_ratio=0.125,
43
- fan_ratio_ffn=0.0625, # NEW: Half of fan_ratio for FFN periodicity modeling
44
  dropout_rate=0.1,
 
45
  **kwargs,
46
  ):
47
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -61,7 +63,6 @@ class NeoLLMConfig(PretrainedConfig):
61
  self.attention_bias = attention_bias
62
  self.attention_dropout = attention_dropout
63
  self.head_dim = head_dim
64
-
65
  rope_config_validation(self)
66
 
67
  self.layer_types = layer_types
@@ -80,15 +81,21 @@ class NeoLLMConfig(PretrainedConfig):
80
  self.linear_num_value_heads = linear_num_value_heads
81
 
82
  # FANformer parameters
83
- self.fan_ratio = fan_ratio # Used in attention mechanisms
84
- self.fan_ratio_ffn = fan_ratio_ffn # NEW: Used in FFN for complementary periodicity
85
 
 
86
  self.dropout_rate = dropout_rate
87
 
 
 
 
 
88
  self.auto_map = {
89
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
90
  "AutoModel": "modeling_neollm.NeoLLMModel",
91
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
92
  }
93
 
94
- __all__ = ["NeoLLMConfig"]
 
 
5
 
6
  logger = logging.get_logger(__name__)
7
 
8
+
9
  class NeoLLMConfig(PretrainedConfig):
10
  r"""
11
+ This is the configuration class to store the configuration of a `NeoLLMModel`]. It is used to instantiate a
12
  NeoLLM model according to the specified arguments, defining the model architecture.
13
+ Configuration objects inherit from `PretrainedConfig`] and can be used to control the model outputs.
14
  """
15
  model_type = "neollm"
16
  keys_to_ignore_at_inference = []
17
 
18
  def __init__(
19
  self,
20
+ vocab_size=200005,
21
  hidden_size=512,
22
  intermediate_size=1536,
23
  num_hidden_layers=12,
 
41
  linear_num_value_heads=16,
42
  layer_types=None,
43
  fan_ratio=0.125,
44
+ fan_ratio_ffn=0.0625,
45
  dropout_rate=0.1,
46
+ pope_bias_init="zero",
47
  **kwargs,
48
  ):
49
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
 
63
  self.attention_bias = attention_bias
64
  self.attention_dropout = attention_dropout
65
  self.head_dim = head_dim
 
66
  rope_config_validation(self)
67
 
68
  self.layer_types = layer_types
 
81
  self.linear_num_value_heads = linear_num_value_heads
82
 
83
  # FANformer parameters
84
+ self.fan_ratio = fan_ratio
85
+ self.fan_ratio_ffn = fan_ratio_ffn
86
 
87
+ # Dropout
88
  self.dropout_rate = dropout_rate
89
 
90
+ # PoPE (Polar Positional Embedding) parameters
91
+ # rope_theta is reused as base wavelength for PoPE frequency components
92
+ self.pope_bias_init = pope_bias_init # "zero" (better for length extrapolation) or "uniform" (better in-distribution)
93
+
94
  self.auto_map = {
95
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
96
  "AutoModel": "modeling_neollm.NeoLLMModel",
97
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
98
  }
99
 
100
+
101
+ __all__ = ["NeoLLMConfig"]