Harryx2025 commited on
Commit
bb7d52f
·
verified ·
1 Parent(s): 06f8e4a

Rename configuration_patch_moe.py to configuration_FalconTST.py

Browse files
configuration_patch_moe.py → configuration_FalconTST.py RENAMED
@@ -1,24 +1,25 @@
1
  """
2
- Configuration class for PatchMoE model.
3
 
4
- This module defines the configuration for PatchMoE, a large-scale time series foundation model
5
  that utilizes Mixture of Experts (MoE) architecture with multiple patch tokenizers.
6
  """
7
 
8
- from typing import List, Optional
9
  from transformers import PretrainedConfig
 
10
 
11
 
12
- class PatchMoeConfig(PretrainedConfig):
13
  """
14
- Configuration class for PatchMoE model.
15
-
16
- PatchMoE is a time series foundation model that uses Mixture of Experts architecture
17
  with multiple patch tokenizers for efficient time series forecasting.
18
-
19
  This configuration inherits from [`PretrainedConfig`] and can be used to control the model
20
  output. Read the documentation from [`PretrainedConfig`] for more information.
21
-
22
  Args:
23
  hidden_size (`int`, *optional*, defaults to 1024):
24
  Dimensionality of the encoder layers and the pooler layer.
@@ -97,10 +98,10 @@ class PatchMoeConfig(PretrainedConfig):
97
  tie_word_embeddings (`bool`, *optional*, defaults to `False`):
98
  Whether to tie word embeddings.
99
  """
100
-
101
- model_type = "patch_moe"
102
  keys_to_ignore_at_inference = ["past_key_values"]
103
-
104
  def __init__(
105
  self,
106
  hidden_size: int = 1024,
@@ -113,6 +114,7 @@ class PatchMoeConfig(PretrainedConfig):
113
  mask_pad_value: float = 255.0,
114
  expert_num_layers: int = 4,
115
  shared_patch_size: int = 64,
 
116
  patch_size_list: Optional[List[int]] = None,
117
  multi_forecast_head_list: Optional[List[int]] = None,
118
  is_revin: bool = True,
@@ -126,6 +128,7 @@ class PatchMoeConfig(PretrainedConfig):
126
  test_data_test_len: int = 720,
127
  autoregressive_step_list: Optional[List[int]] = None,
128
  multi_forecast_head_type: str = "single",
 
129
  num_experts: int = 4,
130
  moe_router_topk: int = 2,
131
  moe_ffn_hidden_size: int = 4096,
@@ -142,7 +145,8 @@ class PatchMoeConfig(PretrainedConfig):
142
  tie_word_embeddings: bool = False,
143
  **kwargs,
144
  ):
145
- """Initialize PatchMoE configuration."""
 
146
  # Set default values for list parameters
147
  if patch_size_list is None:
148
  patch_size_list = [96, 64, 48, 24]
@@ -150,14 +154,15 @@ class PatchMoeConfig(PretrainedConfig):
150
  multi_forecast_head_list = [24, 96, 336]
151
  if autoregressive_step_list is None:
152
  autoregressive_step_list = [2, 4, 1]
153
- # patchmoe inference specific
 
154
  self.test_data_seq_len = test_data_seq_len
155
  self.inference_length = test_data_test_len
156
  self.autoregressive_step_list = autoregressive_step_list
157
  self.multi_forecast_head_type = multi_forecast_head_type
158
  self.use_cache = True
159
 
160
- # patchmoe specific
161
  self.hidden_size = hidden_size
162
  self.ffn_hidden_size = ffn_hidden_size
163
  self.num_attention_heads = num_attention_heads
@@ -165,7 +170,7 @@ class PatchMoeConfig(PretrainedConfig):
165
  self.initializer_range = initializer_range
166
  self.seq_length = seq_length
167
  self.multi_forecast_head_list = multi_forecast_head_list
168
- self.kv_channels = self.hidden_size // self.num_attention_heads
169
  self.rotary_base = rope_theta
170
  self.num_hidden_layers = num_hidden_layers
171
  self.mask_pad_value = mask_pad_value
@@ -188,7 +193,7 @@ class PatchMoeConfig(PretrainedConfig):
188
  self.moe_router_topk = moe_router_topk
189
  self.moe_router_score_function = moe_router_score_function
190
  self.moe_ffn_hidden_size = moe_ffn_hidden_size
191
- self.moe_shared_expert_intermediate_size = moe_shared_expert_intermediate_size
192
  self.moe_router_enable_expert_bias = moe_router_enable_expert_bias
193
  self.moe_expert_final_layernorm = moe_expert_final_layernorm
194
  self.transformer_input_layernorm = transformer_input_layernorm
@@ -196,8 +201,11 @@ class PatchMoeConfig(PretrainedConfig):
196
  self.q_layernorm = q_layernorm
197
  self.k_layernorm = k_layernorm
198
 
199
- kwargs.pop("tie_word_embeddings", None)
 
 
200
  super().__init__(
201
  tie_word_embeddings=tie_word_embeddings,
202
  **kwargs,
203
  )
 
 
1
  """
2
+ Configuration class for FalconTST model.
3
 
4
+ This module defines the configuration for FalconTST, a large-scale time series foundation model
5
  that utilizes Mixture of Experts (MoE) architecture with multiple patch tokenizers.
6
  """
7
 
8
+ from typing import List, Optional, Union
9
  from transformers import PretrainedConfig
10
+ import torch
11
 
12
 
13
+ class FalconTSTConfig(PretrainedConfig):
14
  """
15
+ Configuration class for FalconTST model.
16
+
17
+ FalconTST is a time series foundation model that uses Mixture of Experts architecture
18
  with multiple patch tokenizers for efficient time series forecasting.
19
+
20
  This configuration inherits from [`PretrainedConfig`] and can be used to control the model
21
  output. Read the documentation from [`PretrainedConfig`] for more information.
22
+
23
  Args:
24
  hidden_size (`int`, *optional*, defaults to 1024):
25
  Dimensionality of the encoder layers and the pooler layer.
 
98
  tie_word_embeddings (`bool`, *optional*, defaults to `False`):
99
  Whether to tie word embeddings.
100
  """
101
+
102
+ model_type = "FalconTST"
103
  keys_to_ignore_at_inference = ["past_key_values"]
104
+
105
  def __init__(
106
  self,
107
  hidden_size: int = 1024,
 
114
  mask_pad_value: float = 255.0,
115
  expert_num_layers: int = 4,
116
  shared_patch_size: int = 64,
117
+
118
  patch_size_list: Optional[List[int]] = None,
119
  multi_forecast_head_list: Optional[List[int]] = None,
120
  is_revin: bool = True,
 
128
  test_data_test_len: int = 720,
129
  autoregressive_step_list: Optional[List[int]] = None,
130
  multi_forecast_head_type: str = "single",
131
+
132
  num_experts: int = 4,
133
  moe_router_topk: int = 2,
134
  moe_ffn_hidden_size: int = 4096,
 
145
  tie_word_embeddings: bool = False,
146
  **kwargs,
147
  ):
148
+ """Initialize FalconTST configuration."""
149
+
150
  # Set default values for list parameters
151
  if patch_size_list is None:
152
  patch_size_list = [96, 64, 48, 24]
 
154
  multi_forecast_head_list = [24, 96, 336]
155
  if autoregressive_step_list is None:
156
  autoregressive_step_list = [2, 4, 1]
157
+
158
+ # FalconTST inference specific
159
  self.test_data_seq_len = test_data_seq_len
160
  self.inference_length = test_data_test_len
161
  self.autoregressive_step_list = autoregressive_step_list
162
  self.multi_forecast_head_type = multi_forecast_head_type
163
  self.use_cache = True
164
 
165
+ # FalconTST specific
166
  self.hidden_size = hidden_size
167
  self.ffn_hidden_size = ffn_hidden_size
168
  self.num_attention_heads = num_attention_heads
 
170
  self.initializer_range = initializer_range
171
  self.seq_length = seq_length
172
  self.multi_forecast_head_list = multi_forecast_head_list
173
+ self.kv_channels=self.hidden_size // self.num_attention_heads
174
  self.rotary_base = rope_theta
175
  self.num_hidden_layers = num_hidden_layers
176
  self.mask_pad_value = mask_pad_value
 
193
  self.moe_router_topk = moe_router_topk
194
  self.moe_router_score_function = moe_router_score_function
195
  self.moe_ffn_hidden_size = moe_ffn_hidden_size
196
+ self.moe_shared_expert_intermediate_size=moe_shared_expert_intermediate_size
197
  self.moe_router_enable_expert_bias = moe_router_enable_expert_bias
198
  self.moe_expert_final_layernorm = moe_expert_final_layernorm
199
  self.transformer_input_layernorm = transformer_input_layernorm
 
201
  self.q_layernorm = q_layernorm
202
  self.k_layernorm = k_layernorm
203
 
204
+
205
+
206
+ kwargs.pop('tie_word_embeddings', None)
207
  super().__init__(
208
  tie_word_embeddings=tie_word_embeddings,
209
  **kwargs,
210
  )
211
+