TimeSeriesHomework1_2 / src /lab4_functions.py
Kolesnikov Dmitry
feat: Sarima и VAR для третей лабораторки
b34a74f
# lab4_functions.py
"""
Простейший набор для ЛР №4:
- Feature engineering (lags, rolling)
- Обучение Ridge, Lasso, RandomForest, LightGBM (опционально)
- TimeSeriesSplit wrapper
"""
import numpy as np
import pandas as pd
from typing import List, Dict
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings("ignore")
try:
import lightgbm as lgb
LGB_AVAILABLE = True
except Exception:
LGB_AVAILABLE = False
def make_lag_features(df: pd.DataFrame, target:str, lags:List[int]=[1,7,30]):
dfc = df.copy().set_index('timestamp').sort_index()
for l in lags:
dfc[f'{target}_lag_{l}'] = dfc[target].shift(l)
dfc = dfc.dropna().reset_index()
return dfc
def train_baselines(X_train, y_train):
models = {}
models['Ridge'] = Ridge().fit(X_train, y_train)
models['Lasso'] = Lasso().fit(X_train, y_train)
models['RF'] = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_train, y_train)
if LGB_AVAILABLE:
models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100).fit(X_train, y_train)
return models
def cv_score_ts(model, X, y, n_splits=5, scoring='neg_mean_absolute_error'):
tscv = TimeSeriesSplit(n_splits=n_splits)
scores = cross_val_score(model, X, y, cv=tscv, scoring=scoring)
return scores.mean()