KitsuVp
/

NeoLLM

@@ -7,9 +7,9 @@ logger = logging.get_logger(__name__)
 class NeoLLMConfig(PretrainedConfig):
     r"""
-    This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
     NeoLLM model according to the specified arguments, defining the model architecture.
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
     """
     model_type = "neollm"
     keys_to_ignore_at_inference = []
@@ -33,10 +33,13 @@ class NeoLLMConfig(PretrainedConfig):
         attention_bias=False,
         attention_dropout=0.1,
         head_dim=64,
         fan_ratio=0.125,
-        fan_ratio_ffn=0.0625,  # NEW: Half of fan_ratio for FFN periodicity modeling
         dropout_rate=0.1,
         **kwargs,
     ):
         super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -56,19 +59,24 @@ class NeoLLMConfig(PretrainedConfig):
         self.attention_bias = attention_bias
         self.attention_dropout = attention_dropout
         self.head_dim = head_dim
         rope_config_validation(self)
         # FANformer parameters
-        self.fan_ratio = fan_ratio  # Used in attention mechanisms
-        self.fan_ratio_ffn = fan_ratio_ffn  # NEW: Used in FFN for complementary periodicity
         self.dropout_rate = dropout_rate
         self.auto_map = {
             "AutoConfig": "configuration_neollm.NeoLLMConfig",
             "AutoModel": "modeling_neollm.NeoLLMModel",
             "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
         }
-__all__ = ["NeoLLMConfig"]

 class NeoLLMConfig(PretrainedConfig):
     r"""
+    This is the configuration class to store the configuration of a `NeoLLMModel`. It is used to instantiate a
     NeoLLM model according to the specified arguments, defining the model architecture.
+    Configuration objects inherit from `PretrainedConfig` and can be used to control the model outputs.
     """
     model_type = "neollm"
     keys_to_ignore_at_inference = []
         attention_bias=False,
         attention_dropout=0.1,
         head_dim=64,
         fan_ratio=0.125,
+        fan_ratio_ffn=0.0625,
         dropout_rate=0.1,
+        use_stack=True,
+        num_stack_heads=4,
+        stack_slots=24,
+        stack_d_model=16,
         **kwargs,
     ):
         super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
         self.attention_bias = attention_bias
         self.attention_dropout = attention_dropout
         self.head_dim = head_dim
         rope_config_validation(self)
         # FANformer parameters
+        self.fan_ratio = fan_ratio
+        self.fan_ratio_ffn = fan_ratio_ffn
         self.dropout_rate = dropout_rate
+        # StackMemory parameters
+        self.use_stack = use_stack
+        self.num_stack_heads = num_stack_heads
+        self.stack_slots = stack_slots
+        self.stack_d_model = stack_d_model
         self.auto_map = {
             "AutoConfig": "configuration_neollm.NeoLLMConfig",
             "AutoModel": "modeling_neollm.NeoLLMModel",
             "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
         }
+__all__ = ["NeoLLMConfig"]