KitsuVp commited on
Commit
cb51961
·
verified ·
1 Parent(s): ee930f4

Update configuration_neollm.py

Browse files
Files changed (1) hide show
  1. configuration_neollm.py +8 -35
configuration_neollm.py CHANGED
@@ -5,12 +5,11 @@ from transformers.utils import logging
5
 
6
  logger = logging.get_logger(__name__)
7
 
8
-
9
  class NeoLLMConfig(PretrainedConfig):
10
  r"""
11
- This is the configuration class to store the configuration of a `NeoLLMModel`]. It is used to instantiate a
12
  NeoLLM model according to the specified arguments, defining the model architecture.
13
- Configuration objects inherit from `PretrainedConfig`] and can be used to control the model outputs.
14
  """
15
  model_type = "neollm"
16
  keys_to_ignore_at_inference = []
@@ -34,16 +33,10 @@ class NeoLLMConfig(PretrainedConfig):
34
  attention_bias=False,
35
  attention_dropout=0.1,
36
  head_dim=64,
37
- linear_conv_kernel_dim=4,
38
- linear_key_head_dim=32,
39
- linear_value_head_dim=32,
40
- linear_num_key_heads=8,
41
- linear_num_value_heads=16,
42
- layer_types=None,
43
  fan_ratio=0.125,
44
- fan_ratio_ffn=0.0625,
45
  dropout_rate=0.1,
46
- pope_bias_init="zero",
47
  **kwargs,
48
  ):
49
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -63,39 +56,19 @@ class NeoLLMConfig(PretrainedConfig):
63
  self.attention_bias = attention_bias
64
  self.attention_dropout = attention_dropout
65
  self.head_dim = head_dim
 
66
  rope_config_validation(self)
67
 
68
- self.layer_types = layer_types
69
- if self.layer_types is None:
70
- interval_pattern = kwargs.get("full_attention_interval", 4)
71
- self.layer_types = [
72
- "linear_attention" if bool((i + 1) % interval_pattern) else "full_attention"
73
- for i in range(self.num_hidden_layers)
74
- ]
75
-
76
- # Linear attention parameters
77
- self.linear_conv_kernel_dim = linear_conv_kernel_dim
78
- self.linear_key_head_dim = linear_key_head_dim
79
- self.linear_value_head_dim = linear_value_head_dim
80
- self.linear_num_key_heads = linear_num_key_heads
81
- self.linear_num_value_heads = linear_num_value_heads
82
-
83
  # FANformer parameters
84
- self.fan_ratio = fan_ratio
85
- self.fan_ratio_ffn = fan_ratio_ffn
86
 
87
- # Dropout
88
  self.dropout_rate = dropout_rate
89
 
90
- # PoPE (Polar Positional Embedding) parameters
91
- # rope_theta is reused as base wavelength for PoPE frequency components
92
- self.pope_bias_init = pope_bias_init # "zero" (better for length extrapolation) or "uniform" (better in-distribution)
93
-
94
  self.auto_map = {
95
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
96
  "AutoModel": "modeling_neollm.NeoLLMModel",
97
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
98
  }
99
 
100
-
101
- __all__ = ["NeoLLMConfig"]
 
5
 
6
  logger = logging.get_logger(__name__)
7
 
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
+ This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
 
33
  attention_bias=False,
34
  attention_dropout=0.1,
35
  head_dim=64,
36
+
 
 
 
 
 
37
  fan_ratio=0.125,
38
+ fan_ratio_ffn=0.0625, # NEW: Half of fan_ratio for FFN periodicity modeling
39
  dropout_rate=0.1,
 
40
  **kwargs,
41
  ):
42
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
 
56
  self.attention_bias = attention_bias
57
  self.attention_dropout = attention_dropout
58
  self.head_dim = head_dim
59
+
60
  rope_config_validation(self)
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  # FANformer parameters
63
+ self.fan_ratio = fan_ratio # Used in attention mechanisms
64
+ self.fan_ratio_ffn = fan_ratio_ffn # NEW: Used in FFN for complementary periodicity
65
 
 
66
  self.dropout_rate = dropout_rate
67
 
 
 
 
 
68
  self.auto_map = {
69
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
70
  "AutoModel": "modeling_neollm.NeoLLMModel",
71
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
72
  }
73
 
74
+ __all__ = ["NeoLLMConfig"]