Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
·
6f8bbb1
1
Parent(s):
5711d94
in progress: making predictor usable from architecture.py
Browse files- v2/predictor.py +84 -36
v2/predictor.py
CHANGED
|
@@ -7,20 +7,21 @@ import logging
|
|
| 7 |
from sklearn.metrics import mean_absolute_error
|
| 8 |
|
| 9 |
|
| 10 |
-
# kW
|
| 11 |
-
PREDICTION_LOWER_BOUND = 0 # 15
|
| 12 |
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
|
| 13 |
|
| 14 |
-
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
|
| 15 |
-
HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
def prophet_backend(train_data, forecast_horizon):
|
| 20 |
# Initialize and train the Prophet model using the training data
|
| 21 |
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
| 22 |
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
| 23 |
-
holidays=
|
| 24 |
|
| 25 |
# we can also play with setting daily_seasonality=False above, and then manually adding
|
| 26 |
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
|
@@ -36,8 +37,9 @@ def prophet_backend(train_data, forecast_horizon):
|
|
| 36 |
forecast = model.predict(future)
|
| 37 |
assert len(forecast) == forecast_horizon
|
| 38 |
|
|
|
|
| 39 |
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
| 40 |
-
forecast[key] = np.maximum(forecast[key],
|
| 41 |
|
| 42 |
return forecast, model
|
| 43 |
|
|
@@ -79,46 +81,92 @@ def prediction_task(backend, df, split_date, forecast_horizon):
|
|
| 79 |
return mae, eval_data['y'].mean()
|
| 80 |
|
| 81 |
|
| 82 |
-
|
| 83 |
-
logger
|
| 84 |
-
logger.
|
| 85 |
-
logger.
|
|
|
|
| 86 |
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
-
|
|
|
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
print("forecast horizon", forecast_horizon // 4, "hours")
|
| 103 |
|
|
|
|
| 104 |
|
| 105 |
-
start_date = '2021-06-01'
|
| 106 |
-
end_date = '2021-10-24'
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
maes = []
|
| 112 |
-
mean_values = []
|
| 113 |
-
for split_date in weekly_date_range:
|
| 114 |
-
# prophet_backend is the only backend currently
|
| 115 |
-
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
|
| 116 |
-
maes.append(mae)
|
| 117 |
-
mean_values.append(mean_value)
|
| 118 |
-
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
aggregate_mae = maes.mean()
|
| 123 |
-
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
|
| 124 |
-
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|
|
|
|
| 7 |
from sklearn.metrics import mean_absolute_error
|
| 8 |
|
| 9 |
|
| 10 |
+
PREDICTION_LOWER_BOUND = 0 # 15 [kW]
|
|
|
|
| 11 |
print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
|
| 12 |
|
|
|
|
|
|
|
| 13 |
|
| 14 |
+
def get_holidays():
|
| 15 |
+
hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
|
| 16 |
+
holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
|
| 17 |
+
return holiday_df
|
| 18 |
|
| 19 |
|
| 20 |
def prophet_backend(train_data, forecast_horizon):
|
| 21 |
# Initialize and train the Prophet model using the training data
|
| 22 |
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
| 23 |
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
| 24 |
+
holidays=get_holidays())
|
| 25 |
|
| 26 |
# we can also play with setting daily_seasonality=False above, and then manually adding
|
| 27 |
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
|
|
|
| 37 |
forecast = model.predict(future)
|
| 38 |
assert len(forecast) == forecast_horizon
|
| 39 |
|
| 40 |
+
# we never predict below zero, although prophet happily does.
|
| 41 |
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
| 42 |
+
forecast[key] = np.maximum(forecast[key], 0)
|
| 43 |
|
| 44 |
return forecast, model
|
| 45 |
|
|
|
|
| 81 |
return mae, eval_data['y'].mean()
|
| 82 |
|
| 83 |
|
| 84 |
+
def quiet_logging():
|
| 85 |
+
logger = logging.getLogger('cmdstanpy')
|
| 86 |
+
logger.addHandler(logging.NullHandler())
|
| 87 |
+
logger.propagate = False
|
| 88 |
+
logger.setLevel(logging.CRITICAL)
|
| 89 |
|
| 90 |
|
| 91 |
+
def build_predictor(training_data: pd.Series):
|
| 92 |
+
quiet_logging()
|
| 93 |
+
training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
|
| 94 |
+
model = Prophet(seasonality_mode='multiplicative', growth='flat',
|
| 95 |
+
yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
|
| 96 |
+
holidays=get_holidays())
|
| 97 |
+
|
| 98 |
+
# we can also play with setting daily_seasonality=False above, and then manually adding
|
| 99 |
+
# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
|
| 100 |
+
# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
|
| 101 |
+
# the rest didn't have much effect.
|
| 102 |
+
|
| 103 |
+
model.fit(training_data_frame)
|
| 104 |
+
return model
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
|
| 108 |
+
date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
|
| 109 |
+
for split_date in date_range:
|
| 110 |
+
future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
|
| 111 |
+
|
| 112 |
+
# Make predictions for the evaluation period
|
| 113 |
+
forecast = prophet_model.predict(future)
|
| 114 |
+
assert len(forecast) == forecast_horizon
|
| 115 |
+
|
| 116 |
+
# we never predict below zero, although prophet happily does.
|
| 117 |
+
for key in ('yhat', 'yhat_lower', 'yhat_upper'):
|
| 118 |
+
forecast[key] = np.maximum(forecast[key], 0)
|
| 119 |
+
|
| 120 |
+
return forecast
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def main():
|
| 124 |
+
quiet_logging()
|
| 125 |
+
|
| 126 |
+
cons_filename = 'pq_terheles_2021_adatok.tsv'
|
| 127 |
+
|
| 128 |
+
df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
|
| 129 |
+
df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
|
| 130 |
+
df = df.set_index('Time')
|
| 131 |
+
df['Consumption'] = df['Hatásos teljesítmény [kW]']
|
| 132 |
|
| 133 |
+
df['ds'] = df.index
|
| 134 |
+
df['y'] = df['Consumption']
|
| 135 |
|
| 136 |
+
# we slightly alter both the train and the test
|
| 137 |
+
# because we have an almost constant shift, and the model is multiplicative.
|
| 138 |
+
# we add it back in the end.
|
| 139 |
+
print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
|
| 140 |
+
(df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
|
| 141 |
+
df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)
|
| 142 |
|
| 143 |
+
# TODO 15 minutes timestep hardwired!
|
| 144 |
+
forecast_horizon = 7 * 24 * 4
|
| 145 |
+
print("forecast horizon", forecast_horizon // 4, "hours")
|
| 146 |
|
| 147 |
|
| 148 |
+
start_date = '2021-06-01'
|
| 149 |
+
end_date = '2021-10-24'
|
|
|
|
| 150 |
|
| 151 |
+
weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
|
| 152 |
|
|
|
|
|
|
|
| 153 |
|
| 154 |
+
maes = []
|
| 155 |
+
mean_values = []
|
| 156 |
+
for split_date in weekly_date_range:
|
| 157 |
+
# prophet_backend is the only backend currently
|
| 158 |
+
mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
|
| 159 |
+
mean_value += PREDICTION_LOWER_BOUND
|
| 160 |
+
maes.append(mae)
|
| 161 |
+
mean_values.append(mean_value)
|
| 162 |
+
print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
|
| 163 |
|
| 164 |
+
maes = np.array(maes)
|
| 165 |
+
mean_values = np.array(mean_values)
|
| 166 |
+
aggregate_mae = maes.mean()
|
| 167 |
+
print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
|
| 168 |
+
print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
+
if __name__ == '__main__':
|
| 172 |
+
main()
|
|
|
|
|
|
|
|
|