| from __future__ import annotations |
| import numpy as np |
| import pandas as pd |
| from sklearn.ensemble import GradientBoostingRegressor |
| import emflow as ef |
|
|
|
|
| class QuantileRegressionPredictor(ef.Predictor): |
| def __init__(self, name="quantile-regression-ar"): |
| self.name = name |
| self.lags = [1, 24] |
| self.quantile = 0.5 |
| self.models = {} |
|
|
| def _prepare_features(self, df: pd.DataFrame, col: str): |
| series = df[col] |
| |
| |
| hour = df.index.hour |
| day_of_week = df.index.dayofweek |
| month = df.index.month |
|
|
| X = pd.DataFrame( |
| { |
| "hour": hour, |
| "day_of_week": day_of_week, |
| "month": month, |
| "lag_1h": series.shift(1), |
| "lag_24h": series.shift(24), |
| }, |
| index=df.index, |
| ) |
| return X |
|
|
| def train(self, train_df: pd.DataFrame): |
| if isinstance(train_df, pd.Series): |
| train_df = train_df.to_frame() |
|
|
| |
| |
| |
| for col in train_df.columns: |
| X = self._prepare_features(train_df, col) |
| y = train_df[col] |
|
|
| |
| data = pd.concat([X, y], axis=1).dropna() |
| if len(data) < 100: |
| self.models[col] = None |
| continue |
|
|
| X_train = data.drop(columns=[col]) |
| y_train = data[col] |
|
|
| model = GradientBoostingRegressor( |
| loss="quantile", |
| alpha=self.quantile, |
| n_estimators=100, |
| max_depth=5, |
| random_state=42, |
| ) |
| model.fit(X_train, y_train) |
| self.models[col] = model |
| return self |
|
|
| def predict(self, input_df: pd.DataFrame): |
| if isinstance(input_df, pd.Series): |
| input_df = input_df.to_frame() |
|
|
| preds = {} |
| for col in input_df.columns: |
| model = self.models.get(col) |
| if model is None: |
| preds[col] = np.full(len(input_df), np.nan) |
| continue |
|
|
| X = self._prepare_features(input_df, col) |
| |
| |
| |
| |
|
|
| |
| |
| out = np.full(len(input_df), np.nan) |
|
|
| |
| valid_mask = X.notna().all(axis=1) |
| if valid_mask.any(): |
| out[valid_mask] = model.predict(X[valid_mask]) |
|
|
| preds[col] = out |
|
|
| return pd.DataFrame(preds, index=input_df.index, columns=input_df.columns) |
|
|
|
|
| model = QuantileRegressionPredictor() |
|
|