nyc / app.py
Sanket Kathrotiya
v0
285a850
import xgboost as xgb
import pandas as pd
import os
from datetime import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import streamlit as st
# Load your trained XGBoost model from a .bin file
model = xgb.Booster()
model.load_model(os.path.join(os.path.dirname(__file__), "xgb_model.bin"))
# Load the unseen data from a CSV file
df = pd.read_csv(os.path.join(os.path.dirname(__file__), "unseen_data.csv"))
# Define the expected columns for prediction
expected_columns = ['temperature', 'year', 'month', 'day', 'hr', 'day_of_week', 'is_weekend', 'holiday']
def get_random_data():
# Select 5 random rows from the unseen data
random_data = df.sample(5).copy()
return random_data
def predict_demand(input_df):
# Prepare data for prediction
prediction_data = input_df[expected_columns]
dmatrix = xgb.DMatrix(prediction_data)
predictions = model.predict(dmatrix)
# Add predictions to the dataframe
input_df['predicted_demand'] = predictions.round(0).astype(int)
return input_df
def format_random_output(prediction_df):
# Calculate percentage error
prediction_df['error_percentage'] = ((prediction_df['predicted_demand'] - prediction_df['demand']) / prediction_df['demand'] * 100).round(2)
# Format date and time
prediction_df['datetime'] = pd.to_datetime(prediction_df['date'] + ' ' + prediction_df['hr'].astype(str) + ':00:00')
# Select and rename columns
output_df = prediction_df[['datetime', 'temperature', 'predicted_demand', 'demand', 'error_percentage']]
output_df.columns = ['Date and Time', 'Temperature (°C)', 'Predicted Demand (MW)', 'Actual Demand (MW)', 'Error (%)']
return output_df
def custom_predict(date, temperature):
# Parse date
dt = pd.to_datetime(date)
# Calculate additional parameters
is_weekend = dt.dayofweek >= 5
holidays = calendar().holidays(start=dt.floor('D'), end=dt.ceil('D'))
is_holiday = dt.floor('D') in holidays
# Create custom data
custom_data = pd.DataFrame([[
temperature,
dt.year,
dt.month,
dt.day,
dt.hour,
dt.dayofweek,
int(is_weekend),
int(is_holiday)
]], columns=expected_columns)
# Predict
prediction_df = predict_demand(custom_data)
# Format output
output_df = pd.DataFrame({
'Date': [dt],
'Temperature (°C)': [temperature],
'Predicted Demand (MW)': prediction_df['predicted_demand']
})
return output_df
# Streamlit app
st.title("Electricity Demand Prediction")
st.markdown("Predict electricity demand based on various factors.")
tab1, tab2 = st.tabs(["Random Predictions", "Custom Prediction"])
with tab1:
st.header("Random Predictions")
if st.button("Predict for 5 Random Data Points"):
random_data = get_random_data()
prediction_df = predict_demand(random_data)
formatted_output = format_random_output(prediction_df)
st.dataframe(formatted_output)
with tab2:
st.header("Custom Prediction")
date = st.date_input("Date", value=datetime.now())
temperature = st.slider("Temperature (°C)", 0, 40)
if st.button("Predict for Custom Input"):
custom_output = custom_predict(date, temperature)
st.dataframe(custom_output)