Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

Daniel Varga commited on Dec 19, 2023

Commit

f1abbf3

1 Parent(s): a13327d

offline prediction

Browse files

Files changed (2) hide show

v2/architecture.py +14 -9
v2/predictor.py +124 -0

v2/architecture.py CHANGED Viewed

@@ -127,11 +127,13 @@ class DummyPredictor:
 # this function does not mutate its inputs.
 # it makes a clone of battery_model and modifies that.
-def simulator(battery_model, supplier, prod_cons, prod_predictor, cons_predictor, decider):
     battery_model = copy.copy(battery_model)
     demand_np = prod_cons['Consumption'].to_numpy()
     production_np = prod_cons['Production'].to_numpy()
     assert len(demand_np) == len(production_np)
     step_in_minutes = prod_cons.index.freq.n
     assert step_in_minutes == 5
@@ -170,8 +172,12 @@ def simulator(battery_model, supplier, prod_cons, prod_predictor, cons_predictor
         unsatisfied_demand = demand
         remaining_production = production
-        prod_prediction = prod_predictor.predict(i, decider.input_window_size)
-        cons_prediction = cons_predictor.predict(i, decider.input_window_size)
         decision = decider.decide(prod_prediction, cons_prediction, battery_model)
         production_used_to_charge = 0
@@ -246,18 +252,17 @@ def main():
     met_2021_data, cons_2021_data = read_datasets()
     add_production_field(met_2021_data, parameters)
-    all_2021_data = interpolate_and_join(met_2021_data, cons_2021_data)
-    time_interval_min = all_2021_data.index.freq.n
     time_interval_h = time_interval_min / 60
     battery_model = BatteryModel(capacity_Ah=600, time_interval_h=time_interval_h)
-    prod_predictor = DummyPredictor(pd.Series(all_2021_data['Production']))
-    cons_predictor = DummyPredictor(pd.Series(all_2021_data['Consumption']))
     decider = Decider()
-    results = simulator(battery_model, supplier, all_2021_data, prod_predictor, cons_predictor, decider)
     import matplotlib.pyplot as plt
     results['soc_series'].plot()

 # this function does not mutate its inputs.
 # it makes a clone of battery_model and modifies that.
+def simulator(battery_model, supplier, prod_cons, decider):
     battery_model = copy.copy(battery_model)
     demand_np = prod_cons['Consumption'].to_numpy()
     production_np = prod_cons['Production'].to_numpy()
+    demand_prediction_np = prod_cons['Consumption_prediction'].to_numpy()
+    production_prediction_np = prod_cons['Production_prediction'].to_numpy()
     assert len(demand_np) == len(production_np)
     step_in_minutes = prod_cons.index.freq.n
     assert step_in_minutes == 5
         unsatisfied_demand = demand
         remaining_production = production
+        # TODO what to call it, demand or consumption?
+        # 1. sometimes demand is inappropriate, like consumption_from_solar vs demand_from_solar.
+        # 2. sometimes consumption is inappropriate, like unsatisfied_demand vs unsatisfied_consumption.
+        # 3. there should not be two of them.
+        prod_prediction = production_prediction_np[i: i + decider.input_window_size]
+        cons_prediction = demand_prediction_np[i: i + decider.input_window_size]
         decision = decider.decide(prod_prediction, cons_prediction, battery_model)
         production_used_to_charge = 0
     met_2021_data, cons_2021_data = read_datasets()
     add_production_field(met_2021_data, parameters)
+    all_data = interpolate_and_join(met_2021_data, cons_2021_data)
+    all_data_with_predictions = all_data.copy()
+    time_interval_min = all_data.index.freq.n
     time_interval_h = time_interval_min / 60
     battery_model = BatteryModel(capacity_Ah=600, time_interval_h=time_interval_h)
     decider = Decider()
+    results = simulator(battery_model, supplier, all_data_with_predictions, decider)
     import matplotlib.pyplot as plt
     results['soc_series'].plot()

v2/predictor.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from prophet import Prophet
+import holidays
+import logging
+from sklearn.metrics import mean_absolute_error
+# kW
+PREDICTION_LOWER_BOUND = 0 # 15
+print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
+hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
+HOLIDAY_DF = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
+def prophet_backend(train_data, forecast_horizon):
+    # Initialize and train the Prophet model using the training data
+    model = Prophet(seasonality_mode='multiplicative', growth='flat',
+        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
+        holidays=HOLIDAY_DF)
+    # we can also play with setting daily_seasonality=False above, and then manually adding
+    # model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
+    # ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
+    # the rest didn't have much effect.
+    model.fit(train_data)
+    # Create a DataFrame with future timestamps for the evaluation period
+    future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
+    # Make predictions for the evaluation period
+    forecast = model.predict(future)
+    assert len(forecast) == forecast_horizon
+    for key in ('yhat', 'yhat_lower', 'yhat_upper'):
+        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
+    return forecast, model
+def prediction_task(backend, df, split_date, forecast_horizon):
+    # Split the data into training (past) and evaluation (future) sets
+    train_data = df[df['ds'] <= split_date]
+    eval_data = df[df['ds'] > split_date]
+    eval_data = eval_data.head(forecast_horizon)
+    forecast, model = backend(train_data, forecast_horizon)
+    mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
+    do_vis = False
+    if do_vis:
+        future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
+        forecast = model.predict(future)
+        plt.figure(figsize=(12, 6))
+        plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
+        plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
+        plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
+        plt.xlabel('Timestamp')
+        plt.ylabel('Value')
+        plt.title('Actual vs. Predicted Values')
+        plt.legend()
+        plt.grid(True)
+        plt.show()
+        fig1 = model.plot(forecast)
+        plt.plot(eval_data['ds'], eval_data['y'], c='r')
+        plt.show()
+        fig2 = model.plot_components(forecast)
+        plt.show()
+        exit()
+    return mae, eval_data['y'].mean()
+logger = logging.getLogger('cmdstanpy')
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.CRITICAL)
+cons_filename = 'pq_terheles_2021_adatok.tsv'
+df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
+df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
+df = df.set_index('Time')
+df['Consumption'] = df['Hatásos teljesítmény [kW]']
+df['ds'] = df.index
+df['y'] = df['Consumption']
+# TODO 15 minutes timestep hardwired!
+forecast_horizon = 24 * 4
+print("forecast horizon", forecast_horizon // 4, "hours")
+start_date = '2021-06-01'
+end_date = '2021-10-24'
+weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
+maes = []
+mean_values = []
+for split_date in weekly_date_range:
+    # prophet_backend is the only backend currently
+    mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
+    maes.append(mae)
+    mean_values.append(mean_value)
+    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
+maes = np.array(maes)
+mean_values = np.array(mean_values)
+aggregate_mae = maes.mean()
+print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
+print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())