Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

Daniel Varga commited on Apr 16, 2024

Commit

41bce18

1 Parent(s): 64b066d

test_predictor_statsforecast.py has MSTL and SeasonalNaive correctly set up.

Browse files

Files changed (2) hide show

v2/test_predictor_statsforecast.py +55 -10
v2/test_sf_vs_prophet.py +131 -0

v2/test_predictor_statsforecast.py CHANGED Viewed

@@ -6,19 +6,21 @@ import matplotlib.pyplot as plt
 from statsforecast import StatsForecast
 from statsforecast.models import (
     AutoARIMA,
     AutoETS,
     AutoCES,
     DynamicOptimizedTheta,
     SeasonalNaive,
 )
 os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
 os.environ["NIXTLA_NUMBA_CACHE"] = "1"
 data = pd.read_csv("terheles_fixed.tsv", sep="\t")
 data['ds'] = pd.to_datetime(data['Korrigált időpont'])
 data['y'] = data['Hatásos teljesítmény']
@@ -26,10 +28,10 @@ data['y'] = data['Hatásos teljesítmény']
 data = data[['ds', 'y']]
 data['unique_id'] = 1
-data = data[data['ds'] < '2019-09-01']
 Y_df = data
-train_df = Y_df[Y_df['ds'] < '2019-08-01']
 horizon = 4 * 24 * 7 # 7 days
@@ -97,22 +99,65 @@ def run_statistical_ensemble(
     return fcsts_df, total_time, model_name
-seasonality = 4 * 24 * 7 # 1 week
 models = [
-        AutoARIMA(season_length=seasonality),
         AutoETS(season_length=seasonality),
         AutoCES(season_length=seasonality),
         DynamicOptimizedTheta(season_length=seasonality),
-]
 freq = '15min'
 sf = StatsForecast(
-        models=models[:1],
         freq=freq,
         n_jobs=1,
 )
 print("starting forecast, dataset size", len(train_df))
-Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
 print(Y_hat_df)
@@ -123,10 +168,10 @@ fig, ax = plt.subplots(1, 1, figsize = (20, 7))
 # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
 # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
-plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
 plot_Y_df = plot_Y_df.set_index('ds')[['y']]
 plot_Y_df.plot(ax=ax, linewidth=1)
-Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
 ax.set_title('AirPassengers Forecast', fontsize=22)

 from statsforecast import StatsForecast
 from statsforecast.models import (
+    ARIMA,
     AutoARIMA,
     AutoETS,
     AutoCES,
     DynamicOptimizedTheta,
+    MSTL,
     SeasonalNaive,
 )
+from datasetsforecast.losses import rmse, mae
 os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
 os.environ["NIXTLA_NUMBA_CACHE"] = "1"
 data = pd.read_csv("terheles_fixed.tsv", sep="\t")
 data['ds'] = pd.to_datetime(data['Korrigált időpont'])
 data['y'] = data['Hatásos teljesítmény']
 data = data[['ds', 'y']]
 data['unique_id'] = 1
+data = data[data['ds'] < '2019-03-01']
 Y_df = data
+train_df = Y_df[Y_df['ds'] < '2019-02-01']
 horizon = 4 * 24 * 7 # 7 days
     return fcsts_df, total_time, model_name
+# unlike MSTL, the others only allow a single season_length:
+seasonality = 4 * 24 * 1 # 1 day
 models = [
+        MSTL(
+            season_length=[4 * 24, 4 * 24 * 7], # seasonalities of the time series
+            trend_forecaster=AutoARIMA() # model used to forecast trend
+        ),
+        SeasonalNaive(season_length=seasonality)
+]
+EXTENDED_TEST = False
+if EXTENDED_TEST:
+    models += [
+        # AutoARIMA(season_length=4 * 24) is just too slow, never even finishes,
+        # spends all its time in scipy bfgs.
+        # which is weird, because it's works okay as trend-detector of MSTL.
+        AutoARIMA(),
         AutoETS(season_length=seasonality),
         AutoCES(season_length=seasonality),
         DynamicOptimizedTheta(season_length=seasonality),
+    ]
 freq = '15min'
 sf = StatsForecast(
+        models=models,
         freq=freq,
         n_jobs=1,
 )
+model_names = [repr(model) for model in models]
+n_windows = len(train_df) // horizon - 1
+print("crossvalidation with", n_windows, "windows")
+print("models:", ", ".join(model_names))
+crossvalidation_df = sf.cross_validation(df=train_df, h=horizon, step_size=horizon, n_windows=n_windows)
+for model_name in model_names:
+    rmse_crossval = rmse(crossvalidation_df['y'], crossvalidation_df[model_name])
+    mae_crossval = mae(crossvalidation_df['y'], crossvalidation_df[model_name])
+    print(model_name, "RMSE", rmse_crossval, "MAE", mae_crossval)
+exit()
+sf.fit(train_df)
+sf.fitted_[0, 0].model_.tail(4 * 24 * 7 * 2).plot(subplots=True, grid=True)
+plt.tight_layout()
+plt.show()
 print("starting forecast, dataset size", len(train_df))
+# Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
+Y_hat_df = sf.predict(h=horizon, level=[68.27])
 print(Y_hat_df)
 # plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
 # plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
+plot_Y_df = Y_df # [Y_df['ds'] > '2019-07-01']
 plot_Y_df = plot_Y_df.set_index('ds')[['y']]
 plot_Y_df.plot(ax=ax, linewidth=1)
+Y_hat_df.set_index('ds').plot(ax=ax, linewidth=1)
 ax.set_title('AirPassengers Forecast', fontsize=22)

v2/test_sf_vs_prophet.py ADDED Viewed

	@@ -0,0 +1,131 @@

+# https://github.com/Nixtla/statsforecast/tree/main/experiments/mstl
+# https://github.com/Nixtla/statsforecast/blob/main/experiments/mstl/src/main.py
+from time import time
+import pandas as pd
+import numpy as np
+from datasetsforecast.losses import (
+    mae, mape, mase, rmse, smape
+)
+from fire import Fire
+from neuralprophet import NeuralProphet
+from prophet import Prophet
+from prophet.diagnostics import cross_validation
+from statsforecast import StatsForecast
+from statsforecast.models import MSTL, AutoARIMA, SeasonalNaive
+def evaluate_performace(y_hist, y_true, models):
+    cutoffs = y_true['cutoff'].unique()
+    eval_ = []
+    for cutoff in cutoffs:
+        evaluation = {}
+        for model in models:
+            evaluation[model] = {}
+            for metric in [mase, mae, mape, rmse, smape]:
+                metric_name = metric.__name__
+                if metric_name == 'mase':
+                    evaluation[model][metric_name] = metric(
+                        y_true.query('cutoff == @cutoff')['y'].values,
+                        y_true.query('cutoff == @cutoff')[model].values,
+                        y_hist.query('ds <= @cutoff')['y'].values,
+                        seasonality=24
+                    )
+                else:
+                    evaluation[model][metric_name] = metric(
+                        y_true.query('cutoff == @cutoff')['y'].values,
+                        y_true.query('cutoff == @cutoff')[model].values
+                    )
+        eval_cutoff = pd.DataFrame(evaluation).T
+        eval_cutoff.insert(0, 'cutoff', cutoff)
+        eval_cutoff.index = eval_cutoff.index.rename('model')
+        eval_.append(eval_cutoff)
+    return pd.concat(eval_)
+def experiment():
+    filename = 'https://github.com/archd3sai/Hourly-Energy-Consumption-Prediction/raw/master/PJME_hourly.csv'
+    # 404: filename = 'https://raw.githubusercontent.com/jnagura/Energy-consumption-prediction-analysis/master/PJM_Load_hourly.csv'
+    df = pd.read_csv(filename)
+    df.columns = ['ds', 'y']
+    df.insert(0, 'unique_id', 'PJM_Load_hourly')
+    df['ds'] = pd.to_datetime(df['ds'])
+    df = df.sort_values(['unique_id', 'ds']).reset_index(drop=True)
+    # MSTL model
+    mstl = MSTL(
+	season_length=[24, 24 * 7], # seasonalities of the time series
+	trend_forecaster=AutoARIMA() # model used to forecast trend
+    )
+    sf = StatsForecast(
+        df=df,
+	models=[mstl],
+	freq='H'
+    )
+    init = time()
+    forecasts_cv = sf.cross_validation(h=24, n_windows=7, step_size=24)
+    end = time()
+    time_mstl = (end - init) / 60
+    print(f'MSTL Time: {time_mstl:.2f} minutes')
+    # SeasonalNaive model
+    sf = StatsForecast(
+        df=df,
+	models=[SeasonalNaive(season_length=24)],
+	freq='H'
+    )
+    init = time()
+    forecasts_cv_seas = sf.cross_validation(h=24, n_windows=7, step_size=24)
+    end = time()
+    time_seas = (end - init) / 60
+    print(f'SeasonalNaive Time: {time_seas:.2f} minutes')
+    forecasts_cv = forecasts_cv.merge(forecasts_cv_seas.drop(columns='y'), how='left', on=['unique_id', 'ds', 'cutoff'])
+    cutoffs = forecasts_cv['cutoff'].unique()
+    # Prophet model
+    forecasts_cv['Prophet'] = None
+    time_prophet = 0
+    for cutoff in cutoffs:
+        df_train = df.query('ds <= @cutoff')
+        prophet = Prophet()
+        # produce forecasts
+        init = time()
+        prophet.fit(df_train)
+        # produce forecasts
+        future = prophet.make_future_dataframe(periods=24, freq='H', include_history=False)
+        forecast_prophet = prophet.predict(future)
+        end = time()
+        assert (forecast_prophet['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
+        forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'Prophet'] = forecast_prophet['yhat'].values
+        # data wrangling
+        time_prophet += (end - init) / 60
+    print(f'Prophet Time: {time_prophet:.2f} minutes')
+    times = pd.DataFrame({
+        'model': ['MSTL', 'SeasonalNaive', 'Prophet'],
+        'time (mins)': [time_mstl, time_seas, time_prophet]
+    })
+    # NeuralProphet
+    forecasts_cv['NeuralProphet'] = None
+    time_np = 0
+    for cutoff in cutoffs:
+        df_train = df.query('ds <= @cutoff')
+        neuralprophet = NeuralProphet()
+        init = time()
+        neuralprophet.fit(df_train.drop(columns='unique_id'))
+        future = neuralprophet.make_future_dataframe(df=df_train.drop(columns='unique_id'), periods=24)
+        forecast_np = neuralprophet.predict(future)
+        end = time()
+        assert (forecast_np['ds'].values == forecasts_cv.query('cutoff == @cutoff')['ds']).all()
+        forecasts_cv.loc[forecasts_cv['cutoff'] == cutoff, 'NeuralProphet'] = forecast_np['yhat1'].values
+        time_np += (end - init) / 60
+    print(f'NeuralProphet Time: {time_np:.2f} minutes')
+    times = times.append({'model': 'NeuralProphet', 'time (mins)': time_np}, ignore_index=True)
+    # Final evalaution
+    evaluation = evaluate_performace(df_train, forecasts_cv, models=['MSTL', 'NeuralProphet', 'Prophet', 'SeasonalNaive'])
+    print(times)
+    print(evaluation)
+    print(evaluation.groupby('model').mean(numeric_only=True))
+if __name__=="__main__":
+    Fire(experiment)