Spaces:

danielvarga
/

pq

Sleeping

App Files Files Community

Daniel Varga commited on Sep 20, 2023

Commit

0ea2c50

1 Parent(s): e9cac80

mean absolute error

Browse files

Files changed (1) hide show

demo_prophet.py +82 -33

demo_prophet.py CHANGED Viewed

@@ -1,58 +1,107 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 from prophet import Prophet
-# df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_wp_log_peyton_manning.csv')
-cons_filename = 'pq_terheles_2021_adatok.tsv'
-df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
-df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
-df = df.set_index('Time')
-df['Consumption'] = df['Hatásos teljesítmény [kW]']
-df['ds'] = df.index
-df['y'] = df['Consumption']
-split_date = '2021-07-01'
-# TODO 15 minutes hardwired!
-forecast_horizon = 7 * 24 * 4
-# Split the data into training (past) and evaluation (future) sets
-train_data = df[df['ds'] <= split_date]
-eval_data = df[df['ds'] > split_date]
-# Initialize and train the Prophet model using the training data
-model = Prophet(seasonality_mode='multiplicative', growth='flat',
-    yearly_seasonality=True, weekly_seasonality=True, daily_seasonality=True)
-model.fit(train_data)
-# Create a DataFrame with future timestamps for the evaluation period
-future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
-# Make predictions for the evaluation period
-forecast = model.predict(future)
-# Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
-# For example, you can calculate MAE as follows:
-from sklearn.metrics import mean_absolute_error
-eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
-mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
-# Print or store the evaluation metrics
-print(f"Mean Absolute Error (MAE): {mae}")
-fig1 = model.plot(forecast)
-plt.show()
-fig2 = model.plot_components(forecast)
-plt.show()

+import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 from prophet import Prophet
+import logging
+# kW
+PREDICTION_LOWER_BOUND = 0 # 15
+print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")
+def prediction_task(df, split_date, forecast_horizon):
+    # Split the data into training (past) and evaluation (future) sets
+    train_data = df[df['ds'] <= split_date]
+    eval_data = df[df['ds'] > split_date]
+    # Initialize and train the Prophet model using the training data
+    model = Prophet(seasonality_mode='multiplicative', growth='flat',
+        yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True)
+    model.fit(train_data)
+    # Create a DataFrame with future timestamps for the evaluation period
+    future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)
+    # Make predictions for the evaluation period
+    forecast = model.predict(future)
+    # Calculate evaluation metrics (e.g., MAE, MSE, RMSE) by comparing eval_predictions with eval_data['y']
+    # For example, you can calculate MAE as follows:
+    from sklearn.metrics import mean_absolute_error
+    eval_data = eval_data[eval_data['ds'] <= forecast['ds'].max()]
+    for key in ('yhat', 'yhat_lower', 'yhat_upper'):
+        forecast[key] = np.maximum(forecast[key], PREDICTION_LOWER_BOUND)
+    mae = mean_absolute_error(eval_data['y'], forecast['yhat'])
+    # Print or store the evaluation metrics
+    do_vis = False
+    if do_vis:
+        plt.figure(figsize=(12, 6))
+        plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
+        plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
+        plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
+        plt.xlabel('Timestamp')
+        plt.ylabel('Value')
+        plt.title('Actual vs. Predicted Values')
+        plt.legend()
+        plt.grid(True)
+        plt.show()
+        '''
+        fig1 = model.plot(forecast)
+        plt.plot(eval_data['ds'], eval_data['y'], c='r')
+        plt.show()
+        '''
+        fig2 = model.plot_components(forecast)
+        plt.show()
+    return mae, eval_data['y'].mean()
+logger = logging.getLogger('cmdstanpy')
+logger.addHandler(logging.NullHandler())
+logger.propagate = False
+logger.setLevel(logging.CRITICAL)
+cons_filename = 'pq_terheles_2021_adatok.tsv'
+df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
+df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
+df = df.set_index('Time')
+df['Consumption'] = df['Hatásos teljesítmény [kW]']
+df['ds'] = df.index
+df['y'] = df['Consumption']
+# TODO 15 minutes timestep hardwired!
+forecast_horizon = 7 * 24 * 4
+start_date = '2021-06-01'
+end_date = '2021-10-24'
+weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')
+maes = []
+mean_values = []
+for split_date in weekly_date_range:
+    mae, mean_value = prediction_task(df, split_date, forecast_horizon)
+    maes.append(mae)
+    mean_values.append(mean_value)
+    print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)
+maes = np.array(maes)
+mean_values = np.array(mean_values)
+aggregate_mae = maes.mean()
+print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
+print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())