File size: 1,922 Bytes
27cf97b
 
 
 
 
db751a5
 
 
 
 
 
 
 
 
 
27cf97b
 
db751a5
 
 
27cf97b
db751a5
 
27cf97b
db751a5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27cf97b
db751a5
 
 
 
 
 
 
 
27cf97b
db751a5
27cf97b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "_name_or_path": "super_linear",
  "architectures": [
    "SuperLinearForCausalLM"
  ],
  "auto_map": {
    "AutoConfig": "configuration_super_linear.SuperLinearConfig",
    "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM"
  },
  
  "_comment_model_architecture": "Model architecture parameters",
  "train_seq_len": 512,
  "train_pred_len": 96,
  "seq_len": 512,
  "pred_len": 96,
  "inf_pred_len": 96,
  "max_horizon": 96,
  "auto_regressive": 1,
  
  "_comment_moe": "MoE (Mixture of Experts) parameters",
  "moe": 1,
  "moe_n_experts": 4,
  "top_k_experts": 12,
  "noisy_gating_std": 0.1,
  "moe_temp": 1.0,
  "moe_norm": false,
  "layer_type": "RLinear",
  "n_experts": 4,
  "comp_moe": 12,
  "freeze_experts": true,
  
  "_comment_fft": "FFT-based gating parameters",
  "use_fft": true,
  "fft_len": 5000,
  
  "_comment_experts": "Expert configuration",
  "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600",
  
  "_comment_loading": "Model loading and saving parameters",
  "load_linear": true,
  "load_weights_full": true,
  "linear_freq_weights_path": "./weights/linear_freq_weights/",
  "full_weights_path": "./weights/full_weights/checkpoint.pth",
  
  "_comment_training": "Training parameters",
  "resample_long_lookback": false,
  
  "_comment_legacy": "Legacy parameters for backward compatibility",
  "linear_checkpoints_path": "/cs/azencot_fsas/MoE/",
  "linear_checkpoints_dir": "checkpoints5",
  "manual_moe": 0,
  "misc_moe": 1,
  "noisy_gating_std_decay": 1,
  "ker_len": 50,
  "con": 0,
  "d_model": 512,
  "mlp_gating": 1,
  "dropout": 0.0,
  
  "_comment_system": "System and framework parameters",
  "model_type": "super_linear",
  "torch_dtype": "float32",
  "transformers_version": "4.40.1"
}