Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
·
41bce18
1
Parent(s):
64b066d
test_predictor_statsforecast.py has MSTL and SeasonalNaive correctly set up.
Browse files- v2/test_predictor_statsforecast.py +55 -10
- v2/test_sf_vs_prophet.py +131 -0
v2/test_predictor_statsforecast.py
CHANGED
|
@@ -6,19 +6,21 @@ import matplotlib.pyplot as plt
|
|
| 6 |
|
| 7 |
from statsforecast import StatsForecast
|
| 8 |
from statsforecast.models import (
|
|
|
|
| 9 |
AutoARIMA,
|
| 10 |
AutoETS,
|
| 11 |
AutoCES,
|
| 12 |
DynamicOptimizedTheta,
|
|
|
|
| 13 |
SeasonalNaive,
|
| 14 |
)
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
|
| 18 |
os.environ["NIXTLA_NUMBA_CACHE"] = "1"
|
| 19 |
|
| 20 |
|
| 21 |
-
|
| 22 |
data = pd.read_csv("terheles_fixed.tsv", sep="\t")
|
| 23 |
data['ds'] = pd.to_datetime(data['Korrigált időpont'])
|
| 24 |
data['y'] = data['Hatásos teljesítmény']
|
|
@@ -26,10 +28,10 @@ data['y'] = data['Hatásos teljesítmény']
|
|
| 26 |
data = data[['ds', 'y']]
|
| 27 |
data['unique_id'] = 1
|
| 28 |
|
| 29 |
-
data = data[data['ds'] < '2019-
|
| 30 |
Y_df = data
|
| 31 |
|
| 32 |
-
train_df = Y_df[Y_df['ds'] < '2019-
|
| 33 |
|
| 34 |
|
| 35 |
horizon = 4 * 24 * 7 # 7 days
|
|
@@ -97,22 +99,65 @@ def run_statistical_ensemble(
|
|
| 97 |
return fcsts_df, total_time, model_name
|
| 98 |
|
| 99 |
|
| 100 |
-
|
|
|
|
|
|
|
| 101 |
models = [
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
AutoETS(season_length=seasonality),
|
| 104 |
AutoCES(season_length=seasonality),
|
| 105 |
DynamicOptimizedTheta(season_length=seasonality),
|
| 106 |
-
]
|
|
|
|
|
|
|
| 107 |
freq = '15min'
|
| 108 |
sf = StatsForecast(
|
| 109 |
-
models=models
|
| 110 |
freq=freq,
|
| 111 |
n_jobs=1,
|
| 112 |
)
|
| 113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
print("starting forecast, dataset size", len(train_df))
|
| 115 |
-
Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
|
|
|
|
|
|
|
| 116 |
|
| 117 |
print(Y_hat_df)
|
| 118 |
|
|
@@ -123,10 +168,10 @@ fig, ax = plt.subplots(1, 1, figsize = (20, 7))
|
|
| 123 |
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
|
| 124 |
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
|
| 125 |
|
| 126 |
-
plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
|
| 127 |
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
|
| 128 |
plot_Y_df.plot(ax=ax, linewidth=1)
|
| 129 |
-
Y_hat_df.set_index('ds')
|
| 130 |
|
| 131 |
|
| 132 |
ax.set_title('AirPassengers Forecast', fontsize=22)
|
|
|
|
| 6 |
|
| 7 |
from statsforecast import StatsForecast
|
| 8 |
from statsforecast.models import (
|
| 9 |
+
ARIMA,
|
| 10 |
AutoARIMA,
|
| 11 |
AutoETS,
|
| 12 |
AutoCES,
|
| 13 |
DynamicOptimizedTheta,
|
| 14 |
+
MSTL,
|
| 15 |
SeasonalNaive,
|
| 16 |
)
|
| 17 |
+
from datasetsforecast.losses import rmse, mae
|
| 18 |
|
| 19 |
|
| 20 |
os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
|
| 21 |
os.environ["NIXTLA_NUMBA_CACHE"] = "1"
|
| 22 |
|
| 23 |
|
|
|
|
| 24 |
data = pd.read_csv("terheles_fixed.tsv", sep="\t")
|
| 25 |
data['ds'] = pd.to_datetime(data['Korrigált időpont'])
|
| 26 |
data['y'] = data['Hatásos teljesítmény']
|
|
|
|
| 28 |
data = data[['ds', 'y']]
|
| 29 |
data['unique_id'] = 1
|
| 30 |
|
| 31 |
+
data = data[data['ds'] < '2019-03-01']
|
| 32 |
Y_df = data
|
| 33 |
|
| 34 |
+
train_df = Y_df[Y_df['ds'] < '2019-02-01']
|
| 35 |
|
| 36 |
|
| 37 |
horizon = 4 * 24 * 7 # 7 days
|
|
|
|
| 99 |
return fcsts_df, total_time, model_name
|
| 100 |
|
| 101 |
|
| 102 |
+
# unlike MSTL, the others only allow a single season_length:
|
| 103 |
+
seasonality = 4 * 24 * 1 # 1 day
|
| 104 |
+
|
| 105 |
models = [
|
| 106 |
+
MSTL(
|
| 107 |
+
season_length=[4 * 24, 4 * 24 * 7], # seasonalities of the time series
|
| 108 |
+
trend_forecaster=AutoARIMA() # model used to forecast trend
|
| 109 |
+
),
|
| 110 |
+
SeasonalNaive(season_length=seasonality)
|
| 111 |
+
]
|
| 112 |
+
|
| 113 |
+
EXTENDED_TEST = False
|
| 114 |
+
if EXTENDED_TEST:
|
| 115 |
+
models += [
|
| 116 |
+
# AutoARIMA(season_length=4 * 24) is just too slow, never even finishes,
|
| 117 |
+
# spends all its time in scipy bfgs.
|
| 118 |
+
# which is weird, because it's works okay as trend-detector of MSTL.
|
| 119 |
+
AutoARIMA(),
|
| 120 |
AutoETS(season_length=seasonality),
|
| 121 |
AutoCES(season_length=seasonality),
|
| 122 |
DynamicOptimizedTheta(season_length=seasonality),
|
| 123 |
+
]
|
| 124 |
+
|
| 125 |
+
|
| 126 |
freq = '15min'
|
| 127 |
sf = StatsForecast(
|
| 128 |
+
models=models,
|
| 129 |
freq=freq,
|
| 130 |
n_jobs=1,
|
| 131 |
)
|
| 132 |
|
| 133 |
+
|
| 134 |
+
model_names = [repr(model) for model in models]
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
n_windows = len(train_df) // horizon - 1
|
| 138 |
+
print("crossvalidation with", n_windows, "windows")
|
| 139 |
+
print("models:", ", ".join(model_names))
|
| 140 |
+
crossvalidation_df = sf.cross_validation(df=train_df, h=horizon, step_size=horizon, n_windows=n_windows)
|
| 141 |
+
|
| 142 |
+
for model_name in model_names:
|
| 143 |
+
rmse_crossval = rmse(crossvalidation_df['y'], crossvalidation_df[model_name])
|
| 144 |
+
mae_crossval = mae(crossvalidation_df['y'], crossvalidation_df[model_name])
|
| 145 |
+
print(model_name, "RMSE", rmse_crossval, "MAE", mae_crossval)
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
exit()
|
| 149 |
+
|
| 150 |
+
sf.fit(train_df)
|
| 151 |
+
|
| 152 |
+
sf.fitted_[0, 0].model_.tail(4 * 24 * 7 * 2).plot(subplots=True, grid=True)
|
| 153 |
+
plt.tight_layout()
|
| 154 |
+
plt.show()
|
| 155 |
+
|
| 156 |
+
|
| 157 |
print("starting forecast, dataset size", len(train_df))
|
| 158 |
+
# Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
|
| 159 |
+
Y_hat_df = sf.predict(h=horizon, level=[68.27])
|
| 160 |
+
|
| 161 |
|
| 162 |
print(Y_hat_df)
|
| 163 |
|
|
|
|
| 168 |
# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
|
| 169 |
# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
|
| 170 |
|
| 171 |
+
plot_Y_df = Y_df # [Y_df['ds'] > '2019-07-01']
|
| 172 |
plot_Y_df = plot_Y_df.set_index('ds')[['y']]
|
| 173 |
plot_Y_df.plot(ax=ax, linewidth=1)
|
| 174 |
+
Y_hat_df.set_index('ds').plot(ax=ax, linewidth=1)
|
| 175 |
|
| 176 |
|
| 177 |
ax.set_title('AirPassengers Forecast', fontsize=22)
|
v2/test_sf_vs_prophet.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
|
| 2 |
+
# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py
|
| 3 |
+
|
| 4 |
+
from time import time
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import numpy as np
|
| 8 |
+
from datasetsforecast.losses import (
|
| 9 |
+
mae, mape, mase, rmse, smape
|
| 10 |
+
)
|
| 11 |
+
from fire import Fire
|
| 12 |
+
from neuralprophet import NeuralProphet
|
| 13 |
+
from prophet import Prophet
|
| 14 |
+
from prophet.diagnostics import cross_validation
|
| 15 |
+
from statsforecast import StatsForecast
|
| 16 |
+
from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive
|
| 17 |
+
|
| 18 |
+
def evaluate_performace(y_hist, y_true, models):
|
| 19 |
+
cutoffs = y_true['cutoff'].unique()
|
| 20 |
+
eval_ = []
|
| 21 |
+
for cutoff in cutoffs:
|
| 22 |
+
evaluation = {}
|
| 23 |
+
for model in models:
|
| 24 |
+
evaluation[model] = {}
|
| 25 |
+
for metric in [mase, mae, mape, rmse, smape]:
|
| 26 |
+
metric_name = metric.__name__
|
| 27 |
+
if metric_name == 'mase':
|
| 28 |
+
evaluation[model][metric_name] = metric(
|
| 29 |
+
y_true.query('cutoff == @cutoff')['y'].values,
|
| 30 |
+
y_true.query('cutoff == @cutoff')[model].values,
|
| 31 |
+
y_hist.query('ds <= @cutoff')['y'].values,
|
| 32 |
+
seasonality=24
|
| 33 |
+
)
|
| 34 |
+
else:
|
| 35 |
+
evaluation[model][metric_name] = metric(
|
| 36 |
+
y_true.query('cutoff == @cutoff')['y'].values,
|
| 37 |
+
y_true.query('cutoff == @cutoff')[model].values
|
| 38 |
+
)
|
| 39 |
+
eval_cutoff = pd.DataFrame(evaluation).T
|
| 40 |
+
eval_cutoff.insert(0, 'cutoff', cutoff)
|
| 41 |
+
eval_cutoff.index = eval_cutoff.index.rename('model')
|
| 42 |
+
eval_.append(eval_cutoff)
|
| 43 |
+
return pd.concat(eval_)
|
| 44 |
+
|
| 45 |
+
def experiment():
|
| 46 |
+
filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
|
| 47 |
+
# 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
|
| 48 |
+
df = pd.read_csv(filename)
|
| 49 |
+
df.columns = ['ds', 'y']
|
| 50 |
+
df.insert(0, 'unique_id', 'PJM_Load_hourly')
|
| 51 |
+
df['ds'] = pd.to_datetime(df['ds'])
|
| 52 |
+
df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)
|
| 53 |
+
|
| 54 |
+
# MSTL model
|
| 55 |
+
mstl = MSTL(
|
| 56 |
+
season_length=[24, 24 * 7], # seasonalities of the time series
|
| 57 |
+
trend_forecaster=AutoARIMA() # model used to forecast trend
|
| 58 |
+
)
|
| 59 |
+
sf = StatsForecast(
|
| 60 |
+
df=df,
|
| 61 |
+
models=[mstl],
|
| 62 |
+
freq='H'
|
| 63 |
+
)
|
| 64 |
+
init = time()
|
| 65 |
+
forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
|
| 66 |
+
end = time()
|
| 67 |
+
time_mstl = (end - init) / 60
|
| 68 |
+
print(f'MSTL Time: {time_mstl:.2f} minutes')
|
| 69 |
+
|
| 70 |
+
# SeasonalNaive model
|
| 71 |
+
sf = StatsForecast(
|
| 72 |
+
df=df,
|
| 73 |
+
models=[SeasonalNaive(season_length=24)],
|
| 74 |
+
freq='H'
|
| 75 |
+
)
|
| 76 |
+
init = time()
|
| 77 |
+
forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
|
| 78 |
+
end = time()
|
| 79 |
+
time_seas = (end - init) / 60
|
| 80 |
+
print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
|
| 81 |
+
forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])
|
| 82 |
+
|
| 83 |
+
cutoffs = forecasts_cv['cutoff'].unique()
|
| 84 |
+
# Prophet model
|
| 85 |
+
forecasts_cv['Prophet'] = None
|
| 86 |
+
time_prophet = 0
|
| 87 |
+
for cutoff in cutoffs:
|
| 88 |
+
df_train = df.query('ds <= @cutoff')
|
| 89 |
+
prophet = Prophet()
|
| 90 |
+
# produce forecasts
|
| 91 |
+
init = time()
|
| 92 |
+
prophet.fit(df_train)
|
| 93 |
+
# produce forecasts
|
| 94 |
+
future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
|
| 95 |
+
forecast_prophet = prophet.predict(future)
|
| 96 |
+
end = time()
|
| 97 |
+
assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
|
| 98 |
+
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
|
| 99 |
+
# data wrangling
|
| 100 |
+
time_prophet += (end - init) / 60
|
| 101 |
+
print(f'Prophet Time: {time_prophet:.2f} minutes')
|
| 102 |
+
times = pd.DataFrame({
|
| 103 |
+
'model': ['MSTL', 'SeasonalNaive', 'Prophet'],
|
| 104 |
+
'time (mins)': [time_mstl, time_seas, time_prophet]
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
# NeuralProphet
|
| 108 |
+
forecasts_cv['NeuralProphet'] = None
|
| 109 |
+
time_np = 0
|
| 110 |
+
for cutoff in cutoffs:
|
| 111 |
+
df_train = df.query('ds <= @cutoff')
|
| 112 |
+
neuralprophet = NeuralProphet()
|
| 113 |
+
init = time()
|
| 114 |
+
neuralprophet.fit(df_train.drop(columns='unique_id'))
|
| 115 |
+
future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
|
| 116 |
+
forecast_np = neuralprophet.predict(future)
|
| 117 |
+
end = time()
|
| 118 |
+
assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
|
| 119 |
+
forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
|
| 120 |
+
time_np += (end - init) / 60
|
| 121 |
+
print(f'NeuralProphet Time: {time_np:.2f} minutes')
|
| 122 |
+
times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
|
| 123 |
+
# Final evalaution
|
| 124 |
+
evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
|
| 125 |
+
print(times)
|
| 126 |
+
print(evaluation)
|
| 127 |
+
print(evaluation.groupby('model').mean(numeric_only=True))
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
if __name__=="__main__":
|
| 131 |
+
Fire(experiment)
|