Daniel Varga commited on
Commit
41bce18
·
1 Parent(s): 64b066d

test_predictor_statsforecast.py has MSTL and SeasonalNaive correctly set up.

Browse files
v2/test_predictor_statsforecast.py CHANGED
@@ -6,19 +6,21 @@ import matplotlib.pyplot as plt
6
 
7
  from statsforecast import StatsForecast
8
  from statsforecast.models import (
 
9
  AutoARIMA,
10
  AutoETS,
11
  AutoCES,
12
  DynamicOptimizedTheta,
 
13
  SeasonalNaive,
14
  )
 
15
 
16
 
17
  os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
18
  os.environ["NIXTLA_NUMBA_CACHE"] = "1"
19
 
20
 
21
-
22
  data = pd.read_csv("terheles_fixed.tsv", sep="\t")
23
  data['ds'] = pd.to_datetime(data['Korrigált időpont'])
24
  data['y'] = data['Hatásos teljesítmény']
@@ -26,10 +28,10 @@ data['y'] = data['Hatásos teljesítmény']
26
  data = data[['ds', 'y']]
27
  data['unique_id'] = 1
28
 
29
- data = data[data['ds'] < '2019-09-01']
30
  Y_df = data
31
 
32
- train_df = Y_df[Y_df['ds'] < '2019-08-01']
33
 
34
 
35
  horizon = 4 * 24 * 7 # 7 days
@@ -97,22 +99,65 @@ def run_statistical_ensemble(
97
  return fcsts_df, total_time, model_name
98
 
99
 
100
- seasonality = 4 * 24 * 7 # 1 week
 
 
101
  models = [
102
- AutoARIMA(season_length=seasonality),
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  AutoETS(season_length=seasonality),
104
  AutoCES(season_length=seasonality),
105
  DynamicOptimizedTheta(season_length=seasonality),
106
- ]
 
 
107
  freq = '15min'
108
  sf = StatsForecast(
109
- models=models[:1],
110
  freq=freq,
111
  n_jobs=1,
112
  )
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  print("starting forecast, dataset size", len(train_df))
115
- Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
 
 
116
 
117
  print(Y_hat_df)
118
 
@@ -123,10 +168,10 @@ fig, ax = plt.subplots(1, 1, figsize = (20, 7))
123
  # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
124
  # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
125
 
126
- plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
127
  plot_Y_df = plot_Y_df.set_index('ds')[['y']]
128
  plot_Y_df.plot(ax=ax, linewidth=1)
129
- Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
130
 
131
 
132
  ax.set_title('AirPassengers Forecast', fontsize=22)
 
6
 
7
  from statsforecast import StatsForecast
8
  from statsforecast.models import (
9
+ ARIMA,
10
  AutoARIMA,
11
  AutoETS,
12
  AutoCES,
13
  DynamicOptimizedTheta,
14
+ MSTL,
15
  SeasonalNaive,
16
  )
17
+ from datasetsforecast.losses import rmse, mae
18
 
19
 
20
  os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
21
  os.environ["NIXTLA_NUMBA_CACHE"] = "1"
22
 
23
 
 
24
  data = pd.read_csv("terheles_fixed.tsv", sep="\t")
25
  data['ds'] = pd.to_datetime(data['Korrigált időpont'])
26
  data['y'] = data['Hatásos teljesítmény']
 
28
  data = data[['ds', 'y']]
29
  data['unique_id'] = 1
30
 
31
+ data = data[data['ds'] < '2019-03-01']
32
  Y_df = data
33
 
34
+ train_df = Y_df[Y_df['ds'] < '2019-02-01']
35
 
36
 
37
  horizon = 4 * 24 * 7 # 7 days
 
99
  return fcsts_df, total_time, model_name
100
 
101
 
102
+ # unlike MSTL, the others only allow a single season_length:
103
+ seasonality = 4 * 24 * 1 # 1 day
104
+
105
  models = [
106
+ MSTL(
107
+ season_length=[4 * 24, 4 * 24 * 7], # seasonalities of the time series
108
+ trend_forecaster=AutoARIMA() # model used to forecast trend
109
+ ),
110
+ SeasonalNaive(season_length=seasonality)
111
+ ]
112
+
113
+ EXTENDED_TEST = False
114
+ if EXTENDED_TEST:
115
+ models += [
116
+ # AutoARIMA(season_length=4 * 24) is just too slow, never even finishes,
117
+ # spends all its time in scipy bfgs.
118
+ # which is weird, because it's works okay as trend-detector of MSTL.
119
+ AutoARIMA(),
120
  AutoETS(season_length=seasonality),
121
  AutoCES(season_length=seasonality),
122
  DynamicOptimizedTheta(season_length=seasonality),
123
+ ]
124
+
125
+
126
  freq = '15min'
127
  sf = StatsForecast(
128
+ models=models,
129
  freq=freq,
130
  n_jobs=1,
131
  )
132
 
133
+
134
+ model_names = [repr(model) for model in models]
135
+
136
+
137
+ n_windows = len(train_df) // horizon - 1
138
+ print("crossvalidation with", n_windows, "windows")
139
+ print("models:", ", ".join(model_names))
140
+ crossvalidation_df = sf.cross_validation(df=train_df, h=horizon, step_size=horizon, n_windows=n_windows)
141
+
142
+ for model_name in model_names:
143
+ rmse_crossval = rmse(crossvalidation_df['y'], crossvalidation_df[model_name])
144
+ mae_crossval = mae(crossvalidation_df['y'], crossvalidation_df[model_name])
145
+ print(model_name, "RMSE", rmse_crossval, "MAE", mae_crossval)
146
+
147
+
148
+ exit()
149
+
150
+ sf.fit(train_df)
151
+
152
+ sf.fitted_[0, 0].model_.tail(4 * 24 * 7 * 2).plot(subplots=True, grid=True)
153
+ plt.tight_layout()
154
+ plt.show()
155
+
156
+
157
  print("starting forecast, dataset size", len(train_df))
158
+ # Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
159
+ Y_hat_df = sf.predict(h=horizon, level=[68.27])
160
+
161
 
162
  print(Y_hat_df)
163
 
 
168
  # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
169
  # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
170
 
171
+ plot_Y_df = Y_df # [Y_df['ds'] > '2019-07-01']
172
  plot_Y_df = plot_Y_df.set_index('ds')[['y']]
173
  plot_Y_df.plot(ax=ax, linewidth=1)
174
+ Y_hat_df.set_index('ds').plot(ax=ax, linewidth=1)
175
 
176
 
177
  ax.set_title('AirPassengers Forecast', fontsize=22)
v2/test_sf_vs_prophet.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
2
+ # https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py
3
+
4
+ from time import time
5
+
6
+ import pandas as pd
7
+ import numpy as np
8
+ from datasetsforecast.losses import (
9
+ mae, mape, mase, rmse, smape
10
+ )
11
+ from fire import Fire
12
+ from neuralprophet import NeuralProphet
13
+ from prophet import Prophet
14
+ from prophet.diagnostics import cross_validation
15
+ from statsforecast import StatsForecast
16
+ from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive
17
+
18
+ def evaluate_performace(y_hist, y_true, models):
19
+ cutoffs = y_true['cutoff'].unique()
20
+ eval_ = []
21
+ for cutoff in cutoffs:
22
+ evaluation = {}
23
+ for model in models:
24
+ evaluation[model] = {}
25
+ for metric in [mase, mae, mape, rmse, smape]:
26
+ metric_name = metric.__name__
27
+ if metric_name == 'mase':
28
+ evaluation[model][metric_name] = metric(
29
+ y_true.query('cutoff == @cutoff')['y'].values,
30
+ y_true.query('cutoff == @cutoff')[model].values,
31
+ y_hist.query('ds <= @cutoff')['y'].values,
32
+ seasonality=24
33
+ )
34
+ else:
35
+ evaluation[model][metric_name] = metric(
36
+ y_true.query('cutoff == @cutoff')['y'].values,
37
+ y_true.query('cutoff == @cutoff')[model].values
38
+ )
39
+ eval_cutoff = pd.DataFrame(evaluation).T
40
+ eval_cutoff.insert(0, 'cutoff', cutoff)
41
+ eval_cutoff.index = eval_cutoff.index.rename('model')
42
+ eval_.append(eval_cutoff)
43
+ return pd.concat(eval_)
44
+
45
+ def experiment():
46
+ filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
47
+ # 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
48
+ df = pd.read_csv(filename)
49
+ df.columns = ['ds', 'y']
50
+ df.insert(0, 'unique_id', 'PJM_Load_hourly')
51
+ df['ds'] = pd.to_datetime(df['ds'])
52
+ df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)
53
+
54
+ # MSTL model
55
+ mstl = MSTL(
56
+ season_length=[24, 24 * 7], # seasonalities of the time series
57
+ trend_forecaster=AutoARIMA() # model used to forecast trend
58
+ )
59
+ sf = StatsForecast(
60
+ df=df,
61
+ models=[mstl],
62
+ freq='H'
63
+ )
64
+ init = time()
65
+ forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
66
+ end = time()
67
+ time_mstl = (end - init) / 60
68
+ print(f'MSTL Time: {time_mstl:.2f} minutes')
69
+
70
+ # SeasonalNaive model
71
+ sf = StatsForecast(
72
+ df=df,
73
+ models=[SeasonalNaive(season_length=24)],
74
+ freq='H'
75
+ )
76
+ init = time()
77
+ forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
78
+ end = time()
79
+ time_seas = (end - init) / 60
80
+ print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
81
+ forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])
82
+
83
+ cutoffs = forecasts_cv['cutoff'].unique()
84
+ # Prophet model
85
+ forecasts_cv['Prophet'] = None
86
+ time_prophet = 0
87
+ for cutoff in cutoffs:
88
+ df_train = df.query('ds <= @cutoff')
89
+ prophet = Prophet()
90
+ # produce forecasts
91
+ init = time()
92
+ prophet.fit(df_train)
93
+ # produce forecasts
94
+ future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
95
+ forecast_prophet = prophet.predict(future)
96
+ end = time()
97
+ assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
98
+ forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
99
+ # data wrangling
100
+ time_prophet += (end - init) / 60
101
+ print(f'Prophet Time: {time_prophet:.2f} minutes')
102
+ times = pd.DataFrame({
103
+ 'model': ['MSTL', 'SeasonalNaive', 'Prophet'],
104
+ 'time (mins)': [time_mstl, time_seas, time_prophet]
105
+ })
106
+
107
+ # NeuralProphet
108
+ forecasts_cv['NeuralProphet'] = None
109
+ time_np = 0
110
+ for cutoff in cutoffs:
111
+ df_train = df.query('ds <= @cutoff')
112
+ neuralprophet = NeuralProphet()
113
+ init = time()
114
+ neuralprophet.fit(df_train.drop(columns='unique_id'))
115
+ future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
116
+ forecast_np = neuralprophet.predict(future)
117
+ end = time()
118
+ assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
119
+ forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
120
+ time_np += (end - init) / 60
121
+ print(f'NeuralProphet Time: {time_np:.2f} minutes')
122
+ times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
123
+ # Final evalaution
124
+ evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
125
+ print(times)
126
+ print(evaluation)
127
+ print(evaluation.groupby('model').mean(numeric_only=True))
128
+
129
+
130
+ if __name__=="__main__":
131
+ Fire(experiment)