import xgboost as xgb import pandas as pd import os from datetime import datetime from pandas.tseries.holiday import USFederalHolidayCalendar as calendar import streamlit as st # Load your trained XGBoost model from a .bin file model = xgb.Booster() model.load_model(os.path.join(os.path.dirname(__file__), "xgb_model.bin")) # Load the unseen data from a CSV file df = pd.read_csv(os.path.join(os.path.dirname(__file__), "unseen_data.csv")) # Define the expected columns for prediction expected_columns = ['temperature', 'year', 'month', 'day', 'hr', 'day_of_week', 'is_weekend', 'holiday'] def get_random_data(): # Select 5 random rows from the unseen data random_data = df.sample(5).copy() return random_data def predict_demand(input_df): # Prepare data for prediction prediction_data = input_df[expected_columns] dmatrix = xgb.DMatrix(prediction_data) predictions = model.predict(dmatrix) # Add predictions to the dataframe input_df['predicted_demand'] = predictions.round(0).astype(int) return input_df def format_random_output(prediction_df): # Calculate percentage error prediction_df['error_percentage'] = ((prediction_df['predicted_demand'] - prediction_df['demand']) / prediction_df['demand'] * 100).round(2) # Format date and time prediction_df['datetime'] = pd.to_datetime(prediction_df['date'] + ' ' + prediction_df['hr'].astype(str) + ':00:00') # Select and rename columns output_df = prediction_df[['datetime', 'temperature', 'predicted_demand', 'demand', 'error_percentage']] output_df.columns = ['Date and Time', 'Temperature (°C)', 'Predicted Demand (MW)', 'Actual Demand (MW)', 'Error (%)'] return output_df def custom_predict(date, temperature): # Parse date dt = pd.to_datetime(date) # Calculate additional parameters is_weekend = dt.dayofweek >= 5 holidays = calendar().holidays(start=dt.floor('D'), end=dt.ceil('D')) is_holiday = dt.floor('D') in holidays # Create custom data custom_data = pd.DataFrame([[ temperature, dt.year, dt.month, dt.day, dt.hour, dt.dayofweek, int(is_weekend), int(is_holiday) ]], columns=expected_columns) # Predict prediction_df = predict_demand(custom_data) # Format output output_df = pd.DataFrame({ 'Date': [dt], 'Temperature (°C)': [temperature], 'Predicted Demand (MW)': prediction_df['predicted_demand'] }) return output_df # Streamlit app st.title("Electricity Demand Prediction") st.markdown("Predict electricity demand based on various factors.") tab1, tab2 = st.tabs(["Random Predictions", "Custom Prediction"]) with tab1: st.header("Random Predictions") if st.button("Predict for 5 Random Data Points"): random_data = get_random_data() prediction_df = predict_demand(random_data) formatted_output = format_random_output(prediction_df) st.dataframe(formatted_output) with tab2: st.header("Custom Prediction") date = st.date_input("Date", value=datetime.now()) temperature = st.slider("Temperature (°C)", 0, 40) if st.button("Predict for Custom Input"): custom_output = custom_predict(date, temperature) st.dataframe(custom_output)