| { | |
| "_name_or_path": "super_linear", | |
| "architectures": [ | |
| "SuperLinearForCausalLM" | |
| ], | |
| "auto_map": { | |
| "AutoConfig": "configuration_super_linear.SuperLinearConfig", | |
| "AutoModelForCausalLM": "modeling_super_linear.SuperLinearForCausalLM" | |
| }, | |
| "_comment_model_architecture": "Model architecture parameters", | |
| "train_seq_len": 512, | |
| "train_pred_len": 96, | |
| "_comment_moe": "MoE (Mixture of Experts) parameters", | |
| "top_k_experts": 12, | |
| "noisy_gating_std": 0.1, | |
| "moe_temp": 1.0, | |
| "moe_norm": false, | |
| "layer_type": "RLinear", | |
| "comp_moe": 12, | |
| "freeze_experts": true, | |
| "_comment_fft": "FFT-based gating parameters", | |
| "use_fft": true, | |
| "fft_len": 5000, | |
| "_comment_experts": "Expert configuration", | |
| "freq_experts": "mean_naive_1/4_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/90_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600", | |
| "_comment_training": "Training parameters", | |
| "resample_long_lookback": false, | |
| "_comment_horizon": "Auto-regressive and horizon parameters", | |
| "long_horizon_scaling": 1, | |
| "_comment_resampling": "Resampling and lookback-based parameters", | |
| "lookback_resampling": 1, | |
| "scale_list": "2,4,6", | |
| "threshold": 0.2, | |
| "freq_bound": 0.25, | |
| "penalty_scale": 2.0, | |
| "_comment_system": "System and framework parameters", | |
| "model_type": "super_linear", | |
| "torch_dtype": "float32", | |
| "transformers_version": "4.40.1" | |
| } |