Update configuration_super_linear.py
Browse files
configuration_super_linear.py
CHANGED
|
@@ -30,24 +30,21 @@ class SuperLinearConfig(PretrainedConfig):
|
|
| 30 |
pred_len=96,
|
| 31 |
inf_pred_len=96,
|
| 32 |
max_horizon=96,
|
| 33 |
-
moe_n_experts=
|
| 34 |
top_k_experts=5,
|
| 35 |
moe =1,
|
| 36 |
-
freq_experts='mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
|
| 37 |
auto_regressive= 1,
|
| 38 |
-
con= 0,
|
| 39 |
d_model= 128,
|
| 40 |
dropout= 0.0,
|
| 41 |
-
fft_len=
|
| 42 |
freeze_experts= 1,
|
| 43 |
-
ker_len= 50,
|
| 44 |
layer_type= "RLinear",
|
| 45 |
linear_checkpoints_dir= "checkpoints5",
|
| 46 |
linear_checkpoints_path= "/cs/azencot_fsas/MoE/",
|
| 47 |
load_linear = 1,
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
mlp_gating = 1,
|
| 51 |
model_type= "super_linear",
|
| 52 |
moe_temp = 1,
|
| 53 |
noisy_gating_std = 0.1,
|
|
@@ -71,12 +68,9 @@ class SuperLinearConfig(PretrainedConfig):
|
|
| 71 |
self.linear_checkpoints_path = linear_checkpoints_path
|
| 72 |
self.linear_checkpoints_dir = linear_checkpoints_dir
|
| 73 |
self.load_linear = load_linear
|
| 74 |
-
self.manual_moe = manual_moe
|
| 75 |
self.misc_moe = misc_moe
|
| 76 |
self.noisy_gating_std = noisy_gating_std
|
| 77 |
self.noisy_gating_std_decay = noisy_gating_std_decay
|
| 78 |
-
self.ker_len = ker_len
|
| 79 |
-
self.con = con
|
| 80 |
self.d_model = d_model
|
| 81 |
self.mlp_gating = mlp_gating
|
| 82 |
self.moe_temp = moe_temp
|
|
|
|
| 30 |
pred_len=96,
|
| 31 |
inf_pred_len=96,
|
| 32 |
max_horizon=96,
|
| 33 |
+
moe_n_experts=12,
|
| 34 |
top_k_experts=5,
|
| 35 |
moe =1,
|
| 36 |
+
freq_experts= 'mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
|
| 37 |
auto_regressive= 1,
|
|
|
|
| 38 |
d_model= 128,
|
| 39 |
dropout= 0.0,
|
| 40 |
+
fft_len= 5000,
|
| 41 |
freeze_experts= 1,
|
|
|
|
| 42 |
layer_type= "RLinear",
|
| 43 |
linear_checkpoints_dir= "checkpoints5",
|
| 44 |
linear_checkpoints_path= "/cs/azencot_fsas/MoE/",
|
| 45 |
load_linear = 1,
|
| 46 |
+
misc_moe = 10,
|
| 47 |
+
mlp_gating = 0,
|
|
|
|
| 48 |
model_type= "super_linear",
|
| 49 |
moe_temp = 1,
|
| 50 |
noisy_gating_std = 0.1,
|
|
|
|
| 68 |
self.linear_checkpoints_path = linear_checkpoints_path
|
| 69 |
self.linear_checkpoints_dir = linear_checkpoints_dir
|
| 70 |
self.load_linear = load_linear
|
|
|
|
| 71 |
self.misc_moe = misc_moe
|
| 72 |
self.noisy_gating_std = noisy_gating_std
|
| 73 |
self.noisy_gating_std_decay = noisy_gating_std_decay
|
|
|
|
|
|
|
| 74 |
self.d_model = d_model
|
| 75 |
self.mlp_gating = mlp_gating
|
| 76 |
self.moe_temp = moe_temp
|