abnsol commited on
Commit
f32736b
·
verified ·
1 Parent(s): e7f10e1

refactor: remove ARIMA forcasting model

Browse files
Files changed (1) hide show
  1. src/forecast.py +0 -106
src/forecast.py CHANGED
@@ -1,6 +1,5 @@
1
  import pandas as pd
2
  import numpy as np
3
- from pmdarima import auto_arima
4
  from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error
5
  from sklearn.preprocessing import MinMaxScaler
6
  from tensorflow.keras.models import Sequential
@@ -17,111 +16,6 @@ def train_test_split_series(series, split_date):
17
  test = series.loc[split_date:].dropna()
18
  return train, test
19
 
20
- def fit_arima_model(train, seasonal=False, m=1):
21
- """
22
- Fit an ARIMA/SARIMA model using auto_arima.
23
- - train: pandas Series
24
- - seasonal: whether to fit seasonal ARIMA
25
- - m: seasonal period (e.g., 12 for monthly seasonality)
26
- """
27
- model = auto_arima(
28
- train,
29
- start_p=1, start_q=1,
30
- max_p=5, max_q=5,
31
- d=None,
32
- seasonal=seasonal,
33
- m=m,
34
- trace=True,
35
- error_action='ignore',
36
- suppress_warnings=True,
37
- stepwise=True
38
- )
39
- return model
40
-
41
- def forecast_and_evaluate(model, test, return_conf_int=True, alpha=0.05):
42
- """
43
- Forecast on the test set, evaluate metrics, and return confidence intervals.
44
- This version is based on the user's provided working alignment logic.
45
- """
46
- print("TEST HEAD\n", test.head())
47
- actual_series = test.dropna()
48
- n_periods = len(actual_series)
49
-
50
- # Generate the forecast and optionally confidence intervals
51
- pred_out = model.predict(n_periods=n_periods, return_conf_int=return_conf_int, alpha=alpha)
52
-
53
- # model.predict may return just an array or (array, conf_int_array)
54
- if return_conf_int:
55
- try:
56
- forecast_vals, conf_int_array = pred_out
57
- except Exception:
58
- # unexpected format — try to coerce
59
- forecast_vals = np.asarray(pred_out)
60
- conf_int_array = None
61
- else:
62
- forecast_vals = np.asarray(pred_out)
63
- conf_int_array = None
64
-
65
- forecast_vals = np.asarray(forecast_vals)
66
- print("Forecast Values Head:\n", forecast_vals[:5])
67
-
68
- # Primary alignment: label-based using the test index (positional subset)
69
- test_index = actual_series.index
70
- if forecast_vals.shape[0] >= len(test_index):
71
- # take first len(test_index) predictions
72
- forecast_series = pd.Series(forecast_vals[: len(test_index)], index=test_index, name="forecast")
73
- else:
74
- # fewer predictions than test rows: align to the first N positions
75
- forecast_series = pd.Series(forecast_vals, index=test_index[: forecast_vals.shape[0]], name="forecast")
76
-
77
- # Build conf_int_df if available and match to the same index used for forecast_series
78
- if conf_int_array is not None:
79
- conf_arr = np.asarray(conf_int_array)
80
- # match rows to forecast_series length
81
- conf_len = conf_arr.shape[0]
82
- conf_idx = forecast_series.index[:conf_len]
83
- try:
84
- conf_int_df = pd.DataFrame(conf_arr[:conf_len], index=conf_idx, columns=['lower_ci', 'upper_ci'])
85
- except Exception:
86
- # fallback: create numeric columns without column names
87
- conf_int_df = pd.DataFrame(conf_arr[:conf_len], index=conf_idx)
88
- if conf_int_df.shape[1] >= 2:
89
- conf_int_df.columns = ['lower_ci', 'upper_ci']
90
- else:
91
- conf_int_df = pd.DataFrame(index=forecast_series.index)
92
-
93
- # Create evaluation DataFrame by joining on index
94
- df_eval = pd.concat([actual_series, forecast_series], axis=1)
95
- df_eval.columns = ["actual", "forecast"]
96
- df_eval.dropna(inplace=True)
97
-
98
- # If joining by labels produced an empty DataFrame, fallback to positional alignment
99
- if df_eval.empty:
100
- k = min(len(actual_series), len(forecast_vals))
101
- if k == 0:
102
- raise ValueError("No overlapping data to evaluate: actual or predicted series is empty.")
103
- pos_index = actual_series.index[:k]
104
- df_eval = pd.DataFrame({
105
- 'actual': actual_series.values[:k],
106
- 'forecast': forecast_vals[:k]
107
- }, index=pos_index)
108
- # update conf_int_df index to pos_index if possible
109
- if conf_int_array is not None:
110
- conf_int_df = conf_int_df.reindex(pos_index)
111
-
112
- # --- Metric Calculation ---
113
- non_zero_actuals = df_eval["actual"] != 0
114
-
115
- mae = mean_absolute_error(df_eval["actual"], df_eval["forecast"])
116
- rmse = mean_squared_error(df_eval["actual"], df_eval["forecast"])
117
- mape = np.mean(np.abs((df_eval["actual"][non_zero_actuals] - df_eval["forecast"][non_zero_actuals]) / df_eval["actual"][non_zero_actuals])) * 100
118
-
119
- metrics = {"MAE": mae, "RMSE": rmse, "MAPE": mape}
120
-
121
- # Return all three required components
122
- return forecast_series, conf_int_df, metrics
123
-
124
-
125
  # --- LSTM Functions ---
126
  def create_lstm_sequences(data, sequence_length):
127
  """Create input sequences and corresponding labels for LSTM."""
 
1
  import pandas as pd
2
  import numpy as np
 
3
  from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error
4
  from sklearn.preprocessing import MinMaxScaler
5
  from tensorflow.keras.models import Sequential
 
16
  test = series.loc[split_date:].dropna()
17
  return train, test
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # --- LSTM Functions ---
20
  def create_lstm_sequences(data, sequence_length):
21
  """Create input sequences and corresponding labels for LSTM."""