Spaces:

SanketAI
/

nyc

Sleeping

nyc

File size: 3,338 Bytes

import xgboost as xgb
import pandas as pd
import os
from datetime import datetime
from pandas.tseries.holiday import USFederalHolidayCalendar as calendar
import streamlit as st

# Load your trained XGBoost model from a .bin file
model = xgb.Booster()
model.load_model(os.path.join(os.path.dirname(__file__), "xgb_model.bin"))

# Load the unseen data from a CSV file
df = pd.read_csv(os.path.join(os.path.dirname(__file__), "unseen_data.csv"))

# Define the expected columns for prediction
expected_columns = ['temperature', 'year', 'month', 'day', 'hr', 'day_of_week', 'is_weekend', 'holiday']

def get_random_data():
    # Select 5 random rows from the unseen data
    random_data = df.sample(5).copy()
    return random_data

def predict_demand(input_df):
    # Prepare data for prediction
    prediction_data = input_df[expected_columns]
    dmatrix = xgb.DMatrix(prediction_data)
    predictions = model.predict(dmatrix)
    # Add predictions to the dataframe
    input_df['predicted_demand'] = predictions.round(0).astype(int)
    return input_df

def format_random_output(prediction_df):
    # Calculate percentage error
    prediction_df['error_percentage'] = ((prediction_df['predicted_demand'] - prediction_df['demand']) / prediction_df['demand'] * 100).round(2)
    
    # Format date and time
    prediction_df['datetime'] = pd.to_datetime(prediction_df['date'] + ' ' + prediction_df['hr'].astype(str) + ':00:00')
    
    # Select and rename columns
    output_df = prediction_df[['datetime', 'temperature', 'predicted_demand', 'demand', 'error_percentage']]
    output_df.columns = ['Date and Time', 'Temperature (°C)', 'Predicted Demand (MW)', 'Actual Demand (MW)', 'Error (%)']
    
    return output_df

def custom_predict(date, temperature):
    # Parse date
    dt = pd.to_datetime(date)
    
    # Calculate additional parameters
    is_weekend = dt.dayofweek >= 5
    holidays = calendar().holidays(start=dt.floor('D'), end=dt.ceil('D'))
    is_holiday = dt.floor('D') in holidays

    # Create custom data
    custom_data = pd.DataFrame([[
        temperature,
        dt.year,
        dt.month,
        dt.day,
        dt.hour,
        dt.dayofweek,
        int(is_weekend),
        int(is_holiday)
    ]], columns=expected_columns)
    
    # Predict
    prediction_df = predict_demand(custom_data)
    
    # Format output
    output_df = pd.DataFrame({
        'Date': [dt],
        'Temperature (°C)': [temperature],
        'Predicted Demand (MW)': prediction_df['predicted_demand']
    })
    
    return output_df

# Streamlit app
st.title("Electricity Demand Prediction")
st.markdown("Predict electricity demand based on various factors.")

tab1, tab2 = st.tabs(["Random Predictions", "Custom Prediction"])

with tab1:
    st.header("Random Predictions")
    if st.button("Predict for 5 Random Data Points"):
        random_data = get_random_data()
        prediction_df = predict_demand(random_data)
        formatted_output = format_random_output(prediction_df)
        st.dataframe(formatted_output)

with tab2:
    st.header("Custom Prediction")
    date = st.date_input("Date", value=datetime.now())
    temperature = st.slider("Temperature (°C)", 0, 40)
    
    if st.button("Predict for Custom Input"):
        custom_output = custom_predict(date, temperature)
        st.dataframe(custom_output)