Spaces:
No application file
No application file
| # lab4_functions.py | |
| """ | |
| Простейший набор для ЛР №4: | |
| - Feature engineering (lags, rolling) | |
| - Обучение Ridge, Lasso, RandomForest, LightGBM (опционально) | |
| - TimeSeriesSplit wrapper | |
| """ | |
| import numpy as np | |
| import pandas as pd | |
| from typing import List, Dict | |
| from sklearn.linear_model import Ridge, Lasso | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import TimeSeriesSplit, cross_val_score | |
| from sklearn.metrics import mean_absolute_error | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| try: | |
| import lightgbm as lgb | |
| LGB_AVAILABLE = True | |
| except Exception: | |
| LGB_AVAILABLE = False | |
| def make_lag_features(df: pd.DataFrame, target:str, lags:List[int]=[1,7,30]): | |
| dfc = df.copy().set_index('timestamp').sort_index() | |
| for l in lags: | |
| dfc[f'{target}_lag_{l}'] = dfc[target].shift(l) | |
| dfc = dfc.dropna().reset_index() | |
| return dfc | |
| def train_baselines(X_train, y_train): | |
| models = {} | |
| models['Ridge'] = Ridge().fit(X_train, y_train) | |
| models['Lasso'] = Lasso().fit(X_train, y_train) | |
| models['RF'] = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_train, y_train) | |
| if LGB_AVAILABLE: | |
| models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100).fit(X_train, y_train) | |
| return models | |
| def cv_score_ts(model, X, y, n_splits=5, scoring='neg_mean_absolute_error'): | |
| tscv = TimeSeriesSplit(n_splits=n_splits) | |
| scores = cross_val_score(model, X, y, cv=tscv, scoring=scoring) | |
| return scores.mean() | |