Spaces:
Build error
Build error
| import time | |
| from utils.levels import complete_level, render_page, initialize_level | |
| from utils.login import get_login, initialize_login | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import r2_score | |
| from sklearn.ensemble import RandomForestRegressor | |
| from sklearn.model_selection import RandomizedSearchCV | |
| import matplotlib.pyplot as plt | |
| from matplotlib.backends.backend_agg import RendererAgg | |
| _lock = RendererAgg.lock | |
| import base64 | |
| from io import BytesIO | |
| from PIL import Image, ImageFilter | |
| import lightgbm as lgb | |
| initialize_login() | |
| initialize_level() | |
| LEVEL = 3 | |
| File_PATH = 'datasets/Building_forcasting.csv' | |
| def process_file(csv_file): | |
| data = pd.read_csv(csv_file, index_col='Timestamp') | |
| data.index = pd.to_datetime(data.index) | |
| data = data.fillna(0) | |
| return data | |
| def model_predict(data, model_choice, train_size, tune_model): | |
| if model_choice == 'LightGBM': | |
| model = lgb.LGBMRegressor() if not tune_model else lgb.LGBMRegressor(**tuned_parameters('lgbm')) | |
| elif model_choice == 'Random Forest': | |
| model = RandomForestRegressor(n_estimators=100, random_state=42) if not tune_model else RandomForestRegressor(**tuned_parameters('rf')) | |
| X, y = create_model_inputs(data, 288, 288) | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size/100, random_state=42, shuffle=False) | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| return y_test, y_pred, model | |
| def create_model_inputs(data, lag, mean_period): | |
| df_processed = data.copy() | |
| df_processed['PV_Output_lag'] = df_processed['PV_Output'].shift(lag) | |
| df_processed['PV_Output_mean'] = df_processed['PV_Output'].rolling(window=mean_period).mean() | |
| X = df_processed[['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', 'PV_Output_mean']].dropna() | |
| y = df_processed[['PV_Output']].loc[X.index] | |
| return X, y | |
| def show_output(y_test, y_pred): | |
| st.sidebar.subheader("Model Performance") | |
| st.sidebar.write(f"Test R2 score: {r2_score(y_test, y_pred):.2f}") | |
| fig, axs = plt.subplots(3, figsize=(12, 18)) | |
| axs[0].plot(y_test.index, y_pred/1000, label='Predicted') | |
| axs[0].plot(y_test.index, y_test['PV_Output']/1000, label='Actual') | |
| axs[0].legend() | |
| axs[0].set_title('Prediction vs Actual (Solar Power Generation)') | |
| axs[0].set_xlabel('Date') | |
| axs[0].set_ylabel('Solar Power Generation (kW)') | |
| axs[1].plot(y_test.index, y_pred/1000, label='Predicted') | |
| axs[1].set_title('Predicted Solar Power Generation') | |
| axs[1].set_xlabel('Date') | |
| axs[1].set_ylabel('Solar Power Generation (kW)') | |
| axs[2].plot(y_test.index, y_test['PV_Output']/1000, label='Actual') | |
| axs[2].set_title('Actual Solar Power Generation') | |
| axs[2].set_xlabel('Date') | |
| axs[2].set_ylabel('Solar Power Generation (kW)') | |
| fig.tight_layout() | |
| with _lock: | |
| st.pyplot(fig) | |
| return fig | |
| def download_link(y_test, y_pred): | |
| y_pred_df = pd.DataFrame({'Timestamp': y_test.index, 'Predicted_Power': y_pred, 'Actual_Total_Power_(kW)': y_test['PV_Output']}) | |
| csv = y_pred_df.to_csv(index=False) | |
| b64 = base64.b64encode(csv.encode()).decode() | |
| href = f'<a href="data:file/csv;base64,{b64}" download="Predicted_Solar_Power.csv">Download Predicted Power CSV File</a>' | |
| st.sidebar.markdown(href, unsafe_allow_html=True) | |
| def feature_importance_plot(model, feature_names): | |
| # Get feature importances | |
| importance = model.feature_importances_ | |
| # Normalize by the sum of all importances | |
| importance = 100.0 * (importance / importance.sum()) | |
| plt.figure(figsize=(10, 6)) | |
| plt.bar(feature_names, importance) | |
| plt.title('Feature Importance') | |
| plt.xlabel('Features') | |
| plt.ylabel('Importance (%)') | |
| return plt.gcf() | |
| def download_plot(fig): | |
| tmpfile = BytesIO() | |
| fig.savefig(tmpfile, format='png') | |
| encoded = base64.b64encode(tmpfile.getvalue()).decode('utf-8') | |
| href = f'<a href="data:image/png;base64,{encoded}" download="plot.png">Download Result Plot</a>' | |
| st.sidebar.markdown(href, unsafe_allow_html=True) | |
| def tuned_parameters(model): | |
| if model == 'lgbm': | |
| params = { | |
| 'num_leaves': [10, 20, 30, 40, 50], | |
| 'max_depth': [-1, 3, 5, 10], | |
| 'learning_rate': [0.01, 0.05, 0.1], | |
| 'n_estimators': [100, 500, 1000] | |
| } | |
| return params | |
| elif model == 'rf': | |
| params = { | |
| 'n_estimators': [10, 100, 500, 1000], | |
| 'max_depth': [None, 10, 20, 30, 40, 50], | |
| 'min_samples_split': [2, 5, 10], | |
| 'min_samples_leaf': [1, 2, 4] | |
| } | |
| return params | |
| def step3_page(): | |
| st.header("Training the Model") | |
| st.subheader("Exploring the data") | |
| st.title("Solar Forecasting App") | |
| # Display the image and information in a grid layout | |
| col1 = st.columns([1]) | |
| with col1[0]: | |
| data = { | |
| 'Timestamp': ['11/1/2022 0:20', '11/1/2022 0:25'], | |
| 'Total_Power (kW)': [37337, 44590], | |
| 'PV_Output': [296.6, 298.4], | |
| 'Solar_Irradiance': [0, 0], | |
| 'Temperature': [25.1, 24.7], | |
| 'Rain_Fall': [42.6, 42.6], | |
| 'Wind_Speed': [0.6, 0.4] | |
| } | |
| df = pd.DataFrame(data) | |
| st.subheader("Example of CSV file DataFrame") | |
| st.table(df) | |
| csv_file = st.sidebar.file_uploader("Upload CSV", type=['csv']) | |
| if csv_file is not None: | |
| data = process_file(csv_file) | |
| train_size = st.sidebar.slider("Select Train Dataset Size (%)", min_value=10, max_value=90, value=70) | |
| models = ['LightGBM', 'Random Forest'] | |
| model_choice = st.sidebar.selectbox('Choose Model', models) | |
| tune_model = st.sidebar.checkbox('Tune Hyperparameters') | |
| y_test, y_pred, model = model_predict(data, model_choice, train_size, tune_model) | |
| # Display feature importance | |
| if st.sidebar.checkbox('Show feature importance'): | |
| feature_names = ['Solar_Irradiance', 'Temperature', 'Rain_Fall', 'Wind_speed', 'PV_Output_lag', | |
| 'PV_Output_mean'] | |
| fig = feature_importance_plot(model, feature_names) | |
| with _lock: | |
| st.pyplot(fig) | |
| fig = show_output(y_test, y_pred) | |
| download_link(y_test, y_pred) | |
| download_plot(fig) | |
| if st.button("Complete"): | |
| complete_level(LEVEL) | |
| render_page(step3_page, LEVEL) | |