razmars commited on
Commit
1b82127
·
verified ·
1 Parent(s): 1e413b5

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +9 -9
config.json CHANGED
@@ -8,31 +8,31 @@
8
  "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
9
  },
10
  "auto_regressive": 1,
11
- "con": 0,
12
  "d_model": 128,
13
  "dropout": 0.0,
14
- "fft_len": 10000,
15
  "freeze_experts": 1,
16
- "freq_experts": "mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
17
  "inf_pred_len": 96,
18
- "ker_len": 50,
19
  "layer_type": "RLinear",
20
  "linear_checkpoints_dir": "checkpoints5",
21
  "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
22
  "load_linear": 0,
23
- "manual_moe": 0,
24
  "max_horizon": 96,
25
- "misc_moe": 1,
26
- "mlp_gating": 1,
27
  "model_type": "super_linear",
28
  "moe": 1,
29
- "moe_n_experts": 8,
30
  "moe_temp": 1,
31
  "noisy_gating_std": 0.1,
32
  "noisy_gating_std_decay": 1,
33
  "pred_len": 96,
34
  "seq_len": 512,
35
- "top_k_experts": 5,
36
  "torch_dtype": "float32",
37
  "transformers_version": "4.40.1",
38
  "use_fft": 1
 
8
  "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
9
  },
10
  "auto_regressive": 1,
11
+ "con": 0, # remove
12
  "d_model": 128,
13
  "dropout": 0.0,
14
+ "fft_len": 5000,
15
  "freeze_experts": 1,
16
+ "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
17
  "inf_pred_len": 96,
18
+ "ker_len": 50, # remove
19
  "layer_type": "RLinear",
20
  "linear_checkpoints_dir": "checkpoints5",
21
  "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
22
  "load_linear": 0,
23
+ "manual_moe": 0, # remove
24
  "max_horizon": 96,
25
+ "misc_moe": 10,
26
+ "mlp_gating": 0,
27
  "model_type": "super_linear",
28
  "moe": 1,
29
+ "moe_n_experts": 12,
30
  "moe_temp": 1,
31
  "noisy_gating_std": 0.1,
32
  "noisy_gating_std_decay": 1,
33
  "pred_len": 96,
34
  "seq_len": 512,
35
+ "top_k_experts": 12,
36
  "torch_dtype": "float32",
37
  "transformers_version": "4.40.1",
38
  "use_fft": 1