KitsuVp commited on
Commit
51cf4ed
·
verified ·
1 Parent(s): 9a60a23

Update configuration_neollm.py

Browse files
Files changed (1) hide show
  1. configuration_neollm.py +16 -8
configuration_neollm.py CHANGED
@@ -7,9 +7,9 @@ logger = logging.get_logger(__name__)
7
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
- This is the configuration class to store the configuration of a [`NeoLLMModel`]. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
@@ -33,10 +33,13 @@ class NeoLLMConfig(PretrainedConfig):
33
  attention_bias=False,
34
  attention_dropout=0.1,
35
  head_dim=64,
36
-
37
  fan_ratio=0.125,
38
- fan_ratio_ffn=0.0625, # NEW: Half of fan_ratio for FFN periodicity modeling
39
  dropout_rate=0.1,
 
 
 
 
40
  **kwargs,
41
  ):
42
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
@@ -56,19 +59,24 @@ class NeoLLMConfig(PretrainedConfig):
56
  self.attention_bias = attention_bias
57
  self.attention_dropout = attention_dropout
58
  self.head_dim = head_dim
59
-
60
  rope_config_validation(self)
61
 
62
  # FANformer parameters
63
- self.fan_ratio = fan_ratio # Used in attention mechanisms
64
- self.fan_ratio_ffn = fan_ratio_ffn # NEW: Used in FFN for complementary periodicity
65
 
66
  self.dropout_rate = dropout_rate
67
 
 
 
 
 
 
 
68
  self.auto_map = {
69
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
70
  "AutoModel": "modeling_neollm.NeoLLMModel",
71
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
72
  }
73
 
74
- __all__ = ["NeoLLMConfig"]
 
7
 
8
  class NeoLLMConfig(PretrainedConfig):
9
  r"""
10
+ This is the configuration class to store the configuration of a `NeoLLMModel`. It is used to instantiate a
11
  NeoLLM model according to the specified arguments, defining the model architecture.
12
+ Configuration objects inherit from `PretrainedConfig` and can be used to control the model outputs.
13
  """
14
  model_type = "neollm"
15
  keys_to_ignore_at_inference = []
 
33
  attention_bias=False,
34
  attention_dropout=0.1,
35
  head_dim=64,
 
36
  fan_ratio=0.125,
37
+ fan_ratio_ffn=0.0625,
38
  dropout_rate=0.1,
39
+ use_stack=True,
40
+ num_stack_heads=4,
41
+ stack_slots=24,
42
+ stack_d_model=16,
43
  **kwargs,
44
  ):
45
  super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
 
59
  self.attention_bias = attention_bias
60
  self.attention_dropout = attention_dropout
61
  self.head_dim = head_dim
 
62
  rope_config_validation(self)
63
 
64
  # FANformer parameters
65
+ self.fan_ratio = fan_ratio
66
+ self.fan_ratio_ffn = fan_ratio_ffn
67
 
68
  self.dropout_rate = dropout_rate
69
 
70
+ # StackMemory parameters
71
+ self.use_stack = use_stack
72
+ self.num_stack_heads = num_stack_heads
73
+ self.stack_slots = stack_slots
74
+ self.stack_d_model = stack_d_model
75
+
76
  self.auto_map = {
77
  "AutoConfig": "configuration_neollm.NeoLLMConfig",
78
  "AutoModel": "modeling_neollm.NeoLLMModel",
79
  "AutoModelForCausalLM": "modeling_neollm.NeoLLMForCausalLM"
80
  }
81
 
82
+ __all__ = ["NeoLLMConfig"]