SuperLinear / configuration_super_linear.py
razmars's picture
Upload 5 files
431f8e0 verified
raw
history blame
2.45 kB
from typing import Optional, Tuple
import torch, torch.nn as nn, torch.nn.functional as F
from transformers import (
PretrainedConfig,
PreTrainedModel,
GenerationMixin,
AutoConfig,
AutoModelForCausalLM,
)
from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions
# 1) --------------------------------------------------------------------------
# CONFIG
# -----------------------------------------------------------------------------
class SuperLinearConfig(PretrainedConfig):
"""
Configuration for the SuperLinear MoE time–series foundation model.
Only *model_type* must be unique inside transformers; the rest mirrors
the __init__ arguments of your original Config object.
"""
model_type = "super_linear"
def __init__(
self,
seq_len=512,
pred_len=96,
inf_pred_len=96,
max_horizon=96,
auto_regressive=1,
moe_n_experts=8,
top_k_experts=3,
moe =1,
freq_experts='mean_naive_1/6_1/7_1/8_1/12_1/14_1/16_1/21_1/24_1/28_1/30_1/32_1/36_1/42_1/48_1/52_1/56_1/60_1/72_1/84_1/96_1/120_1/144_1/168_1/180_1/224_1/252_1/288_1/336_1/365_1/504_1/672_1/1008_1/1440_1/2016_1/3600',
**kwargs, # any extra CLI args
):
self.seq_len = seq_len
self.moe = moe
self.pred_len = pred_len
self.inf_pred_len = inf_pred_len
self.max_horizon = max_horizon
self.auto_regressive = auto_regressive
self.moe_n_experts = moe_n_experts
self.top_k_experts = top_k_experts
self.freq_experts = freq_experts
self.freeze_experts = 1
self.layer_type = "RLinear"
self.linear_checkpoints_path = '/cs/azencot_fsas/MoE/'
self.linear_checkpoints_dir = "checkpoints5"
self.load_linear = 0
self.manual_moe = 0
self.misc_moe = 1
self.noisy_gating_std = 0.1
self.noisy_gating_std_decay = 1
self.ker_len = 50
self.con = 0
self.d_model = 512
self.mlp_gating = 1
self.moe_temp = 1
self.use_fft = 1
self.fft_len = 10000
self.dropout = 0.0
super().__init__(**kwargs)