File size: 1,522 Bytes
b34a74f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# lab4_functions.py
"""
Простейший набор для ЛР №4:
- Feature engineering (lags, rolling)
- Обучение Ridge, Lasso, RandomForest, LightGBM (опционально)
- TimeSeriesSplit wrapper
"""
import numpy as np
import pandas as pd
from typing import List, Dict
from sklearn.linear_model import Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.metrics import mean_absolute_error
import warnings
warnings.filterwarnings("ignore")

try:
    import lightgbm as lgb
    LGB_AVAILABLE = True
except Exception:
    LGB_AVAILABLE = False

def make_lag_features(df: pd.DataFrame, target:str, lags:List[int]=[1,7,30]):
    dfc = df.copy().set_index('timestamp').sort_index()
    for l in lags:
        dfc[f'{target}_lag_{l}'] = dfc[target].shift(l)
    dfc = dfc.dropna().reset_index()
    return dfc

def train_baselines(X_train, y_train):
    models = {}
    models['Ridge'] = Ridge().fit(X_train, y_train)
    models['Lasso'] = Lasso().fit(X_train, y_train)
    models['RF'] = RandomForestRegressor(n_estimators=100, random_state=42).fit(X_train, y_train)
    if LGB_AVAILABLE:
        models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100).fit(X_train, y_train)
    return models

def cv_score_ts(model, X, y, n_splits=5, scoring='neg_mean_absolute_error'):
    tscv = TimeSeriesSplit(n_splits=n_splits)
    scores = cross_val_score(model, X, y, cv=tscv, scoring=scoring)
    return scores.mean()