Spaces:

sunnynazir
/

load_forecasting

Sleeping

File size: 2,553 Bytes

ff8feee
 
 
 
 
e38c149
ff8feee
e38c149
 
 
 
 
ff8feee
 
e38c149
ff8feee
e38c149
ff8feee
 
e38c149
 
 
 
 
 
 
 
 
 
 
 
ff8feee
e38c149
 
 
 
ff8feee
e38c149
 
 
 
 
ff8feee
 
e38c149
ff8feee
 
e38c149
 
ff8feee
 
e38c149

import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Title of the Streamlit app
st.title("Load Forecasting Application")

# File upload section
uploaded_file = st.file_uploader("Upload a CSV file containing historical load data", type=["csv"])

if uploaded_file is not None:
    # Load the dataset
    data = pd.read_csv(uploaded_file)
    st.write("Preview of the uploaded data:")
    st.write(data.head())

    # Ensure the date column is in datetime format
    if 'date' in data.columns:
        data['date'] = pd.to_datetime(data['date'])
        
        # Extract useful features from the date column
        data['year'] = data['date'].dt.year
        data['month'] = data['date'].dt.month
        data['day'] = data['date'].dt.day
        data['day_of_week'] = data['date'].dt.dayofweek
        
        # Drop the original date column
        data = data.drop(columns=['date'])

    # Check for missing values
    if data.isnull().sum().sum() > 0:
        st.write("The dataset contains missing values. They will be filled with the mean.")
        data = data.fillna(data.mean())

    # Define features and target variable
    X = data.drop(columns=['load'])  # Features (exclude the target 'load')
    y = data['load']  # Target variable

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Model training
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Model prediction
    y_pred = model.predict(X_test)

    # Calculate and display performance metrics
    mse = mean_squared_error(y_test, y_pred)
    st.write(f"Mean Squared Error (MSE): {mse:.2f}")

    # Feature importance
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)

    st.write("Feature Importance:")
    st.write(feature_importance)

    # Future prediction
    st.write("## Predict Future Load")
    user_input = {}
    for feature in X.columns:
        user_input[feature] = st.number_input(f"Enter value for {feature}")

    if st.button("Predict"):
        input_data = np.array([list(user_input.values())]).reshape(1, -1)
        prediction = model.predict(input_data)
        st.write(f"Predicted Load: {prediction[0]:.2f}")