# lab4_functions.py """ Простейший набор для ЛР №4: - Feature engineering (lags, rolling) - Обучение Ridge, Lasso, RandomForest, LightGBM (опционально) - TimeSeriesSplit wrapper """ import numpy as np import pandas as pd from typing import List, Dict from sklearn.linear_model import Ridge, Lasso from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import TimeSeriesSplit, cross_val_score from sklearn.metrics import mean_absolute_error import warnings warnings.filterwarnings("ignore") try: import lightgbm as lgb LGB_AVAILABLE = True except Exception: LGB_AVAILABLE = False def make_lag_features(df: pd.DataFrame, target:str, lags:List[int]=[1,7,30]): dfc = df.copy().set_index('timestamp').sort_index() for l in lags: dfc[f'{target}_lag_{l}'] = dfc[target].shift(l) dfc = dfc.dropna().reset_index() return dfc def train_baselines(X_train, y_train): models = {} models['Ridge'] = Ridge().fit(X_train, y_train) models['Lasso'] = Lasso().fit(X_train, y_train) models['RF'] = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_train, y_train) if LGB_AVAILABLE: models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100).fit(X_train, y_train) return models def cv_score_ts(model, X, y, n_splits=5, scoring='neg_mean_absolute_error'): tscv = TimeSeriesSplit(n_splits=n_splits) scores = cross_val_score(model, X, y, cv=tscv, scoring=scoring) return scores.mean()