Spaces:

danielvarga
/

pq

Sleeping

pq / predictor.py

Daniel Varga

moving v2 to .

0202035 7 months ago

6.49 kB

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from prophet import Prophet
	import holidays
	import logging
	from sklearn.metrics import mean_absolute_error


	PREDICTION_LOWER_BOUND = 0 # 15 [kW]
	print("do not forget about hardwired prediction lower bound", PREDICTION_LOWER_BOUND, "kW")


	def get_holidays():
	hungarian_holidays = holidays.Hungary(years=range(2019, 2031))
	holiday_df = pd.DataFrame(list(hungarian_holidays.items()), columns=['ds', 'holiday'])
	return holiday_df


	def prophet_backend(train_data, forecast_horizon):
	# Initialize and train the Prophet model using the training data
	model = Prophet(seasonality_mode='multiplicative', growth='flat',
	yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
	holidays=get_holidays())

	# we can also play with setting daily_seasonality=False above, and then manually adding
	# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
	# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
	# the rest didn't have much effect.

	model.fit(train_data)

	# Create a DataFrame with future timestamps for the evaluation period
	future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

	# Make predictions for the evaluation period
	forecast = model.predict(future)
	assert len(forecast) == forecast_horizon

	# we never predict below zero, although prophet happily does.
	for key in ('yhat', 'yhat_lower', 'yhat_upper'):
	forecast[key] = np.maximum(forecast[key], 0)

	return forecast, model


	def prediction_task(backend, df, split_date, forecast_horizon):
	# Split the data into training (past) and evaluation (future) sets
	train_data = df[df['ds'] <= split_date]
	eval_data = df[df['ds'] > split_date]
	eval_data = eval_data.head(forecast_horizon)

	forecast, model = backend(train_data, forecast_horizon)

	mae = mean_absolute_error(eval_data['y'], forecast['yhat'])

	do_vis = False
	if do_vis:
	future = model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=True)
	forecast = model.predict(future)

	plt.figure(figsize=(12, 6))
	plt.plot(eval_data['ds'], eval_data['y'], label='Actual', color='blue')
	plt.plot(forecast['ds'], forecast['yhat'], label='Predicted', color='red')
	plt.fill_between(forecast['ds'], forecast['yhat_lower'], forecast['yhat_upper'], color='pink', alpha=0.5, label='Uncertainty')
	plt.xlabel('Timestamp')
	plt.ylabel('Value')
	plt.title('Actual vs. Predicted Values')
	plt.legend()
	plt.grid(True)
	plt.show()

	fig1 = model.plot(forecast)
	plt.plot(eval_data['ds'], eval_data['y'], c='r')
	plt.show()

	fig2 = model.plot_components(forecast)
	plt.show()
	exit()

	return mae, eval_data['y'].mean()


	def quiet_logging():
	logger = logging.getLogger('cmdstanpy')
	logger.addHandler(logging.NullHandler())
	logger.propagate = False
	logger.setLevel(logging.CRITICAL)


	def build_predictor(training_data: pd.Series):
	quiet_logging()
	training_data_frame = pd.DataFrame({'ds': training_data.index, 'y': training_data})
	model = Prophet(seasonality_mode='multiplicative', growth='flat',
	yearly_seasonality=False, weekly_seasonality=True, daily_seasonality=True,
	holidays=get_holidays())

	# we can also play with setting daily_seasonality=False above, and then manually adding
	# model.add_seasonality("daily", 1, fourier_order=10, prior_scale=100, mode="multiplicative")
	# ...it didn't really work though. bumping the fourier_order helps, but makes the model slow.
	# the rest didn't have much effect.

	model.fit(training_data_frame)
	return model


	def make_prediction(prophet_model: Prophet, test_data: pd.Series, batch_size_in_days: int):
	date_range = pd.date_range(start=test_data.index[0], end=test_data.index[-1], freq=f'{batch_size_in_days}d')
	for split_date in date_range:
	future = prophet_model.make_future_dataframe(periods=forecast_horizon, freq='15T', include_history=False)

	# Make predictions for the evaluation period
	forecast = prophet_model.predict(future)
	assert len(forecast) == forecast_horizon

	# we never predict below zero, although prophet happily does.
	for key in ('yhat', 'yhat_lower', 'yhat_upper'):
	forecast[key] = np.maximum(forecast[key], 0)

	return forecast


	def main():
	quiet_logging()

	cons_filename = 'pq_terheles_2021_adatok.tsv'

	df = pd.read_csv(cons_filename, sep='\t', skipinitialspace=True, na_values='n/a', decimal=',')
	df['Time'] = pd.to_datetime(df['Korrigált időpont'], format='%m/%d/%y %H:%M')
	df = df.set_index('Time')
	df['Consumption'] = df['Hatásos teljesítmény [kW]']

	df['ds'] = df.index
	df['y'] = df['Consumption']

	# we slightly alter both the train and the test
	# because we have an almost constant shift, and the model is multiplicative.
	# we add it back in the end.
	print("values below PREDICTION_LOWER_BOUND", PREDICTION_LOWER_BOUND, ":",
	(df['y'] <= PREDICTION_LOWER_BOUND).sum(), "/", len(df['y']))
	df['y'] = (df['y'] - PREDICTION_LOWER_BOUND).clip(0.0)

	# TODO 15 minutes timestep hardwired!
	forecast_horizon = 7 * 24 * 4
	print("forecast horizon", forecast_horizon // 4, "hours")


	start_date = '2021-06-01'
	end_date = '2021-10-24'

	weekly_date_range = pd.date_range(start=start_date, end=end_date, freq='8d')


	maes = []
	mean_values = []
	for split_date in weekly_date_range:
	# prophet_backend is the only backend currently
	mae, mean_value = prediction_task(prophet_backend, df, split_date, forecast_horizon)
	mean_value += PREDICTION_LOWER_BOUND
	maes.append(mae)
	mean_values.append(mean_value)
	print(split_date, "Mean Absolute Error", mae, "MAE/true mean", mae / mean_value)

	maes = np.array(maes)
	mean_values = np.array(mean_values)
	aggregate_mae = maes.mean()
	print("Mean Absolute Error over whole date range", weekly_date_range[0], "-", weekly_date_range[-1], ":", aggregate_mae)
	print("Mean Absolute Error / true mean over whole date range", aggregate_mae / mean_values.mean())


	if __name__ == '__main__':
	main()