KitsuVp commited on
Commit
d026a08
·
verified ·
1 Parent(s): 44526de

Update configuration_neollm.py

Browse files
Files changed (1) hide show
  1. configuration_neollm.py +24 -16
configuration_neollm.py CHANGED
@@ -7,9 +7,9 @@ logger = logging.get_logger(__name__)
7
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
- This is the configuration class to store the configuration of a `NeoLLMModel`. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
- Configuration objects inherit from `PretrainedConfig` and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
@@ -33,13 +33,17 @@ class NeoLLMConfig(PretrainedConfig):
33
  attention_bias=False,
34
  attention_dropout=0.1,
35
  head_dim=64,
 
 
 
 
 
 
 
 
36
  fan_ratio=0.125,
37
- fan_ratio_ffn=0.0625,
38
  dropout_rate=0.1,
39
- use_stack=True,
40
- num_stack_heads=4,
41
- stack_slots=24,
42
- stack_d_model=16,
43
  **kwargs,
44
  ):
45
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -59,24 +63,28 @@ class NeoLLMConfig(PretrainedConfig):
59
  self.attention_bias = attention_bias
60
  self.attention_dropout = attention_dropout
61
  self.head_dim = head_dim
 
 
 
 
 
 
 
 
 
 
62
  rope_config_validation(self)
63
 
64
  # FANformer parameters
65
- self.fan_ratio = fan_ratio
66
- self.fan_ratio_ffn = fan_ratio_ffn
67
 
68
  self.dropout_rate = dropout_rate
69
 
70
- # StackMemory parameters
71
- self.use_stack = use_stack
72
- self.num_stack_heads = num_stack_heads
73
- self.stack_slots = stack_slots
74
- self.stack_d_model = stack_d_model
75
-
76
  self.auto_map = {
77
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
78
  "AutoModel": "modeling_neollm.NeoLLMModel",
79
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
80
  }
81
 
82
- __all__ = ["NeoLLMConfig"]
 
7
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
+ This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
 
33
  attention_bias=False,
34
  attention_dropout=0.1,
35
  head_dim=64,
36
+ use_momentum_attention=True,
37
+ momentum_gamma=0.10,
38
+ use_mea_attention=True,
39
+ mea_component_key_value_heads=None,
40
+ mea_groupnorm_eps=1e-6,
41
+ use_lucid_attention=True,
42
+ lucid_attention_eps=1e-6,
43
+
44
  fan_ratio=0.125,
45
+ fan_ratio_ffn=0.0625, # NEW: Half of fan_ratio for FFN periodicity modeling
46
  dropout_rate=0.1,
 
 
 
 
47
  **kwargs,
48
  ):
49
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
 
63
  self.attention_bias = attention_bias
64
  self.attention_dropout = attention_dropout
65
  self.head_dim = head_dim
66
+ self.use_momentum_attention = use_momentum_attention
67
+ self.momentum_gamma = momentum_gamma
68
+ self.use_mea_attention = use_mea_attention
69
+ self.mea_component_key_value_heads = (
70
+ num_key_value_heads if mea_component_key_value_heads is None else int(mea_component_key_value_heads)
71
+ )
72
+ self.mea_groupnorm_eps = mea_groupnorm_eps
73
+ self.use_lucid_attention = use_lucid_attention
74
+ self.lucid_attention_eps = lucid_attention_eps
75
+
76
  rope_config_validation(self)
77
 
78
  # FANformer parameters
79
+ self.fan_ratio = fan_ratio # Used in attention mechanisms
80
+ self.fan_ratio_ffn = fan_ratio_ffn # NEW: Used in FFN for complementary periodicity
81
 
82
  self.dropout_rate = dropout_rate
83
 
 
 
 
 
 
 
84
  self.auto_map = {
85
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
86
  "AutoModel": "modeling_neollm.NeoLLMModel",
87
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
88
  }
89
 
90
+ __all__ = ["NeoLLMConfig"]