KitsuVp commited on
Commit
a85fd6b
·
verified ·
1 Parent(s): bc927d2

Update configuration_neollm.py

Browse files
Files changed (1) hide show
  1. configuration_neollm.py +14 -11
configuration_neollm.py CHANGED
@@ -1,24 +1,19 @@
1
  # ==================== configuration_neollm.py ====================
2
-
3
  from transformers.configuration_utils import PretrainedConfig
4
  from transformers.modeling_rope_utils import rope_config_validation
5
  from transformers.utils import logging
6
 
7
-
8
  logger = logging.get_logger(__name__)
9
 
10
-
11
  class NeoLLMConfig(PretrainedConfig):
12
  r"""
13
  This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
14
  NeoLLM model according to the specified arguments, defining the model architecture.
15
-
16
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
17
  """
18
-
19
  model_type = "neollm"
20
  keys_to_ignore_at_inference = []
21
-
22
  def __init__(
23
  self,
24
  vocab_size=151665,
@@ -45,6 +40,7 @@ class NeoLLMConfig(PretrainedConfig):
45
  linear_num_value_heads=16,
46
  layer_types=None,
47
  fan_ratio=0.125,
 
48
  dropout_rate=0.1,
49
  **kwargs,
50
  ):
@@ -65,8 +61,9 @@ class NeoLLMConfig(PretrainedConfig):
65
  self.attention_bias = attention_bias
66
  self.attention_dropout = attention_dropout
67
  self.head_dim = head_dim
 
68
  rope_config_validation(self)
69
-
70
  self.layer_types = layer_types
71
  if self.layer_types is None:
72
  interval_pattern = kwargs.get("full_attention_interval", 4)
@@ -74,18 +71,24 @@ class NeoLLMConfig(PretrainedConfig):
74
  "linear_attention" if bool((i + 1) % interval_pattern) else "full_attention"
75
  for i in range(self.num_hidden_layers)
76
  ]
77
-
78
- # linear attention part
79
  self.linear_conv_kernel_dim = linear_conv_kernel_dim
80
  self.linear_key_head_dim = linear_key_head_dim
81
  self.linear_value_head_dim = linear_value_head_dim
82
  self.linear_num_key_heads = linear_num_key_heads
83
  self.linear_num_value_heads = linear_num_value_heads
84
- self.fan_ratio = fan_ratio
 
 
 
 
85
  self.dropout_rate = dropout_rate
 
86
  self.auto_map = {
87
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
88
  "AutoModel": "modeling_neollm.NeoLLMModel",
89
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
90
  }
91
- __all__ = ["NeoLLMConfig"]
 
 
1
  # ==================== configuration_neollm.py ====================
 
2
  from transformers.configuration_utils import PretrainedConfig
3
  from transformers.modeling_rope_utils import rope_config_validation
4
  from transformers.utils import logging
5
 
 
6
  logger = logging.get_logger(__name__)
7
 
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
  This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
 
12
  Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
13
  """
 
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
16
+
17
  def __init__(
18
  self,
19
  vocab_size=151665,
 
40
  linear_num_value_heads=16,
41
  layer_types=None,
42
  fan_ratio=0.125,
43
+ fan_ratio_ffn=0.0625, # NEW: Half of fan_ratio for FFN periodicity modeling
44
  dropout_rate=0.1,
45
  **kwargs,
46
  ):
 
61
  self.attention_bias = attention_bias
62
  self.attention_dropout = attention_dropout
63
  self.head_dim = head_dim
64
+
65
  rope_config_validation(self)
66
+
67
  self.layer_types = layer_types
68
  if self.layer_types is None:
69
  interval_pattern = kwargs.get("full_attention_interval", 4)
 
71
  "linear_attention" if bool((i + 1) % interval_pattern) else "full_attention"
72
  for i in range(self.num_hidden_layers)
73
  ]
74
+
75
+ # Linear attention parameters
76
  self.linear_conv_kernel_dim = linear_conv_kernel_dim
77
  self.linear_key_head_dim = linear_key_head_dim
78
  self.linear_value_head_dim = linear_value_head_dim
79
  self.linear_num_key_heads = linear_num_key_heads
80
  self.linear_num_value_heads = linear_num_value_heads
81
+
82
+ # FANformer parameters
83
+ self.fan_ratio = fan_ratio # Used in attention mechanisms
84
+ self.fan_ratio_ffn = fan_ratio_ffn # NEW: Used in FFN for complementary periodicity
85
+
86
  self.dropout_rate = dropout_rate
87
+
88
  self.auto_map = {
89
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
90
  "AutoModel": "modeling_neollm.NeoLLMModel",
91
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
92
  }
93
+
94
+ __all__ = ["NeoLLMConfig"]