KitsuVp
/

NeoLLM

@@ -7,9 +7,9 @@ logger = logging.get_logger(__name__)
 class NeoLLMConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a `NeoLLMModel`. It is used to instantiate a
     NeoLLM model according to the specified arguments, defining the model architecture.
-    Configuration objects inherit from `PretrainedConfig` and can be used to control the model outputs.
     """
     model_type = "neollm"
     keys_to_ignore_at_inference = []
@@ -33,13 +33,17 @@ class NeoLLMConfig(PretrainedConfig):
         attention_bias=False,
         attention_dropout=0.1,
         head_dim=64,
         fan_ratio=0.125,
-        fan_ratio_ffn=0.0625,
         dropout_rate=0.1,
-        use_stack=True,
-        num_stack_heads=4,
-        stack_slots=24,
-        stack_d_model=16,
         **kwargs,
     ):
         super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -59,24 +63,28 @@ class NeoLLMConfig(PretrainedConfig):
         self.attention_bias = attention_bias
         self.attention_dropout = attention_dropout
         self.head_dim = head_dim
         rope_config_validation(self)
         # FANformer parameters
-        self.fan_ratio = fan_ratio
-        self.fan_ratio_ffn = fan_ratio_ffn
         self.dropout_rate = dropout_rate
-        # StackMemory parameters
-        self.use_stack = use_stack
-        self.num_stack_heads = num_stack_heads
-        self.stack_slots = stack_slots
-        self.stack_d_model = stack_d_model
         self.auto_map = {
             "AutoConfig": "configuration_neollm.NeoLLMConfig",
             "AutoModel": "modeling_neollm.NeoLLMModel",
             "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
         }
-__all__ = ["NeoLLMConfig"]

 class NeoLLMConfig(PretrainedConfig):
     r"""
+    This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
     NeoLLM model according to the specified arguments, defining the model architecture.
+    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
     """
     model_type = "neollm"
     keys_to_ignore_at_inference = []
         attention_bias=False,
         attention_dropout=0.1,
         head_dim=64,
+        use_momentum_attention=True,
+        momentum_gamma=0.10,
+        use_mea_attention=True,
+        mea_component_key_value_heads=None,
+        mea_groupnorm_eps=1e-6,
+        use_lucid_attention=True,
+        lucid_attention_eps=1e-6,
         fan_ratio=0.125,
+        fan_ratio_ffn=0.0625,  # NEW: Half of fan_ratio for FFN periodicity modeling
         dropout_rate=0.1,
         **kwargs,
     ):
         super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
         self.attention_bias = attention_bias
         self.attention_dropout = attention_dropout
         self.head_dim = head_dim
+        self.use_momentum_attention = use_momentum_attention
+        self.momentum_gamma = momentum_gamma
+        self.use_mea_attention = use_mea_attention
+        self.mea_component_key_value_heads = (
+            num_key_value_heads if mea_component_key_value_heads is None else int(mea_component_key_value_heads)
+        )
+        self.mea_groupnorm_eps = mea_groupnorm_eps
+        self.use_lucid_attention = use_lucid_attention
+        self.lucid_attention_eps = lucid_attention_eps
         rope_config_validation(self)
         # FANformer parameters
+        self.fan_ratio = fan_ratio  # Used in attention mechanisms
+        self.fan_ratio_ffn = fan_ratio_ffn  # NEW: Used in FFN for complementary periodicity
         self.dropout_rate = dropout_rate
         self.auto_map = {
             "AutoConfig": "configuration_neollm.NeoLLMConfig",
             "AutoModel": "modeling_neollm.NeoLLMModel",
             "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
         }
+__all__ = ["NeoLLMConfig"]