Spaces:

Shafeek
/

weather_prediction_tutorial

Build error

weather_prediction_tutorial / pages /3_Training the Model.py

Shafeek Saleem

initjj

c1a413c over 2 years ago

6.48 kB

	import time
	from utils.levels import complete_level, render_page, initialize_level
	from utils.login import get_login, initialize_login
	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import r2_score
	from sklearn.ensemble import RandomForestRegressor
	from sklearn.model_selection import RandomizedSearchCV
	import matplotlib.pyplot as plt
	from matplotlib.backends.backend_agg import RendererAgg
	_lock = RendererAgg.lock
	import base64
	from io import BytesIO
	from PIL import Image, ImageFilter
	import lightgbm as lgb

	initialize_login()
	initialize_level()

	LEVEL = 3

	File_PATH = 'datasets/Building_forcasting.csv'

	def process_file(csv_file):
	data = pd.read_csv(csv_file, index_col='Timestamp')
	data.index = pd.to_datetime(data.index)
	data = data.fillna(0)
	return data


	def model_predict(data, model_choice, train_size, tune_model):
	if model_choice == 'LightGBM':
	model = lgb.LGBMRegressor() if not tune_model else lgb.LGBMRegressor(**tuned_parameters('lgbm'))
	elif model_choice == 'Random Forest':
	model = RandomForestRegressor(n_estimators=100, random_state=42) if not tune_model else RandomForestRegressor(**tuned_parameters('rf'))

	X, y = create_model_inputs(data, 288, 288)

	X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size/100, random_state=42, shuffle=False)

	model.fit(X_train, y_train)
	y_pred = model.predict(X_test)

	return y_test, y_pred, model


	def create_model_inputs(data, lag, mean_period):
	df_processed = data.copy()
	df_processed['PV_Output_lag'] = df_processed['PV_Output'].shift(lag)
	df_processed['PV_Output_mean'] = df_processed['PV_Output'].rolling(window=mean_period).mean()

	X = df_processed[['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', 'PV_Output_mean']].dropna()
	y = df_processed[['PV_Output']].loc[X.index]

	return X, y


	def show_output(y_test, y_pred):
	st.sidebar.subheader("Model Performance")
	st.sidebar.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}")

	fig, axs = plt.subplots(3, figsize=(12, 18))
	axs[0].plot(y_test.index, y_pred/1000, label='Predicted')
	axs[0].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
	axs[0].legend()
	axs[0].set_title('Prediction vs Actual (Solar Power Generation)')
	axs[0].set_xlabel('Date')
	axs[0].set_ylabel('Solar Power Generation (kW)')

	axs[1].plot(y_test.index, y_pred/1000, label='Predicted')
	axs[1].set_title('Predicted Solar Power Generation')
	axs[1].set_xlabel('Date')
	axs[1].set_ylabel('Solar Power Generation (kW)')

	axs[2].plot(y_test.index, y_test['PV_Output']/1000, label='Actual')
	axs[2].set_title('Actual Solar Power Generation')
	axs[2].set_xlabel('Date')
	axs[2].set_ylabel('Solar Power Generation (kW)')

	fig.tight_layout()
	with _lock:
	st.pyplot(fig)

	return fig


	def download_link(y_test, y_pred):
	y_pred_df = pd.DataFrame({'Timestamp': y_test.index, 'Predicted_Power': y_pred, 'Actual_Total_Power_(kW)': y_test['PV_Output']})
	csv = y_pred_df.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="Predicted_Solar_Power.csv">Download Predicted Power CSV File</a>'
	st.sidebar.markdown(href, unsafe_allow_html=True)


	def feature_importance_plot(model, feature_names):
	# Get feature importances
	importance = model.feature_importances_
	# Normalize by the sum of all importances
	importance = 100.0 * (importance / importance.sum())
	plt.figure(figsize=(10, 6))
	plt.bar(feature_names, importance)
	plt.title('Feature Importance')
	plt.xlabel('Features')
	plt.ylabel('Importance (%)')
	return plt.gcf()


	def download_plot(fig):
	tmpfile = BytesIO()
	fig.savefig(tmpfile, format='png')
	encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8')

	href = f'<a href="data:image/png;base64,{encoded}" download="plot.png">Download Result Plot</a>'
	st.sidebar.markdown(href, unsafe_allow_html=True)


	def tuned_parameters(model):
	if model == 'lgbm':
	params = {
	'num_leaves': [10, 20, 30, 40, 50],
	'max_depth': [-1, 3, 5, 10],
	'learning_rate': [0.01, 0.05, 0.1],
	'n_estimators': [100, 500, 1000]
	}
	return params

	elif model == 'rf':
	params = {
	'n_estimators': [10, 100, 500, 1000],
	'max_depth': [None, 10, 20, 30, 40, 50],
	'min_samples_split': [2, 5, 10],
	'min_samples_leaf': [1, 2, 4]
	}
	return params

	def step3_page():
	st.header("Training the Model")
	st.subheader("Exploring the data")
	st.title("Solar Forecasting App")

	# Display the image and information in a grid layout
	col1 = st.columns([1])

	with col1[0]:
	data = {
	'Timestamp': ['11/1/2022 0:20', '11/1/2022 0:25'],
	'Total_Power (kW)': [37337, 44590],
	'PV_Output': [296.6, 298.4],
	'Solar_Irradiance': [0, 0],
	'Temperature': [25.1, 24.7],
	'Rain_Fall': [42.6, 42.6],
	'Wind_Speed': [0.6, 0.4]
	}
	df = pd.DataFrame(data)
	st.subheader("Example of CSV file DataFrame")
	st.table(df)

	csv_file = st.sidebar.file_uploader("Upload CSV", type=['csv'])

	if csv_file is not None:
	data = process_file(csv_file)

	train_size = st.sidebar.slider("Select Train Dataset Size (%)", min_value=10, max_value=90, value=70)

	models = ['LightGBM', 'Random Forest']
	model_choice = st.sidebar.selectbox('Choose Model', models)

	tune_model = st.sidebar.checkbox('Tune Hyperparameters')

	y_test, y_pred, model = model_predict(data, model_choice, train_size, tune_model)

	# Display feature importance
	if st.sidebar.checkbox('Show feature importance'):
	feature_names = ['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag',
	'PV_Output_mean']
	fig = feature_importance_plot(model, feature_names)
	with _lock:
	st.pyplot(fig)

	fig = show_output(y_test, y_pred)

	download_link(y_test, y_pred)

	download_plot(fig)

	if st.button("Complete"):
	complete_level(LEVEL)


	render_page(step3_page, LEVEL)