Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

Daniel Varga commited on Apr 15, 2024

Commit

64b066d

1 Parent(s): 3fd575d

hangs

Browse files

Files changed (1) hide show

v2/test_predictor_statsforecast.py +156 -30

v2/test_predictor_statsforecast.py CHANGED Viewed

@@ -1,8 +1,22 @@
 import pandas as pd
 import matplotlib.pyplot as plt
-from neuralforecast import NeuralForecast
-from neuralforecast.models import LSTM, NHITS, RNN, PatchTST
 data = pd.read_csv("terheles_fixed.tsv", sep="\t")
@@ -14,39 +28,151 @@ data['unique_id'] = 1
 data = data[data['ds'] < '2019-09-01']
 Y_df = data
-print(Y_df)
 horizon = 4 * 24 * 7 # 7 days
-# Try different hyperparmeters to improve accuracy.
 models = [
-            PatchTST(h=horizon,                    # Forecast horizon
-               input_size=2 * horizon,      # Length of input sequence
-               max_steps=20,                # Number of steps to train
-               scaler_type='standard'),       # Type of scaler to normalize data
-            NHITS(h=horizon,                   # Forecast horizon
-                input_size=2 * horizon,      # Length of input sequence
-                max_steps=100,               # Number of steps to train
-                n_freq_downsample=[2, 1, 1]) # Downsampling factors for each stack output
-          ]
-'''
-            LSTM(h=horizon,                    # Forecast horizon
-               max_steps=100,                # Number of steps to train
-               scaler_type='standard',       # Type of scaler to normalize data
-               encoder_hidden_size=64,       # Defines the size of the hidden state of the LSTM
-               decoder_hidden_size=64,),     # Defines the number of hidden units of each layer of the MLP decoder
-'''
-nf = NeuralForecast(models=models, freq='15min')
-shorter_Y_df = Y_df[Y_df['ds'] < '2019-08-01']
-print("-=======-")
-print(len(shorter_Y_df))
-print(shorter_Y_df)
-nf.fit(df=shorter_Y_df)
 Y_hat_df = nf.predict()

+import os
+import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
+from statsforecast import StatsForecast
+from statsforecast.models import (
+    AutoARIMA,
+    AutoETS,
+    AutoCES,
+    DynamicOptimizedTheta,
+    SeasonalNaive,
+)
+os.environ["NIXTLA_NUMBA_RELEASE_GIL"] = "1"
+os.environ["NIXTLA_NUMBA_CACHE"] = "1"
 data = pd.read_csv("terheles_fixed.tsv", sep="\t")
 data = data[data['ds'] < '2019-09-01']
 Y_df = data
+train_df = Y_df[Y_df['ds'] < '2019-08-01']
 horizon = 4 * 24 * 7 # 7 days
+def ensemble_forecasts(
+    fcsts_df,
+    quantiles,
+    name_models,
+    model_name,
+) -> pd.DataFrame:
+    fcsts_df[model_name] = fcsts_df[name_models].mean(axis=1).values  # type: ignore
+    # compute quantiles based on the mean of the forecasts
+    sigma_models = []
+    for model in name_models:
+        fcsts_df[f"sigma_{model}"] = fcsts_df[f"{model}-hi-68.27"] - fcsts_df[model]
+        sigma_models.append(f"sigma_{model}")
+    fcsts_df[f"std_{model_name}"] = (
+        fcsts_df[sigma_models].pow(2).sum(axis=1).div(len(sigma_models) ** 2).pow(0.5)
+    )
+    z = norm.ppf(quantiles)
+    q_cols = []
+    for q, zq in zip(quantiles, z):
+        q_col = f"{model_name}-q-{q}"
+        fcsts_df[q_col] = fcsts_df[model_name] + zq * fcsts_df[f"std_{model_name}"]
+        q_cols.append(q_col)
+    fcsts_df = fcsts_df[["unique_id", "ds"] + [model_name] + q_cols]
+    return fcsts_df
+def run_statistical_ensemble(
+    train_df: pd.DataFrame,
+    horizon: int,
+    freq: str,
+    seasonality: int,
+    quantiles,
+):
+    os.environ["NIXTLA_ID_AS_COL"] = "true"
+    models = [
+        AutoARIMA(season_length=seasonality),
+        AutoETS(season_length=seasonality),
+        AutoCES(season_length=seasonality),
+        DynamicOptimizedTheta(season_length=seasonality),
+    ]
+    init_time = time()
+    series_per_core = 15
+    n_series = train_df["unique_id"].nunique()
+    n_jobs = min(n_series // series_per_core, os.cpu_count())
+    sf = StatsForecast(
+        models=models,
+        freq=freq,
+        n_jobs=n_jobs,
+    )
+    fcsts_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
+    name_models = [repr(model) for model in models]
+    model_name = "StatisticalEnsemble"
+    fcsts_df = ensemble_forecasts(
+        fcsts_df,
+        quantiles,
+        name_models,
+        model_name,
+    )
+    total_time = time() - init_time
+    return fcsts_df, total_time, model_name
+seasonality = 4 * 24 * 7 # 1 week
 models = [
+        AutoARIMA(season_length=seasonality),
+        AutoETS(season_length=seasonality),
+        AutoCES(season_length=seasonality),
+        DynamicOptimizedTheta(season_length=seasonality),
+]
+freq = '15min'
+sf = StatsForecast(
+        models=models[:1],
+        freq=freq,
+        n_jobs=1,
+)
+print("starting forecast, dataset size", len(train_df))
+Y_hat_df = sf.forecast(df=train_df, h=horizon, level=[68.27])
+print(Y_hat_df)
+Y_hat_df = Y_hat_df.reset_index()
+fig, ax = plt.subplots(1, 1, figsize = (20, 7))
+# plot_df = pd.concat([Y_df, Y_hat_df]).set_index('ds') # Concatenate the train and forecast dataframes
+# plot_df[['y', 'LSTM', 'NHITS']].plot(ax=ax, linewidth=2)
+plot_Y_df = Y_df[Y_df['ds'] > '2019-07-01']
+plot_Y_df = plot_Y_df.set_index('ds')[['y']]
+plot_Y_df.plot(ax=ax, linewidth=1)
+Y_hat_df.set_index('ds')[['PatchTST', 'NHITS']].plot(ax=ax, linewidth=1)
+ax.set_title('AirPassengers Forecast', fontsize=22)
+ax.set_ylabel('Monthly Passengers', fontsize=20)
+ax.set_xlabel('Timestamp [t]', fontsize=20)
+ax.legend(prop={'size': 15})
+ax.grid()
+plt.savefig("neuralforecast.pdf")
+exit()
+quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+fcst_df, total_time, model_name = run_statistical_ensemble(
+    train_df,
+    horizon=horizon,
+    freq='15m',
+    seasonality=4 * 24 * 7,
+    quantiles=quantiles
+)
+nf.fit(df=train_df)
 Y_hat_df = nf.predict()