File size: 2,553 Bytes
ff8feee
 
 
 
 
e38c149
ff8feee
e38c149
 
 
 
 
ff8feee
 
e38c149
ff8feee
e38c149
ff8feee
 
e38c149
 
 
 
 
 
 
 
 
 
 
 
ff8feee
e38c149
 
 
 
ff8feee
e38c149
 
 
 
 
ff8feee
 
e38c149
ff8feee
 
e38c149
 
ff8feee
 
e38c149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Title of the Streamlit app
st.title("Load Forecasting Application")

# File upload section
uploaded_file = st.file_uploader("Upload a CSV file containing historical load data", type=["csv"])

if uploaded_file is not None:
    # Load the dataset
    data = pd.read_csv(uploaded_file)
    st.write("Preview of the uploaded data:")
    st.write(data.head())

    # Ensure the date column is in datetime format
    if 'date' in data.columns:
        data['date'] = pd.to_datetime(data['date'])
        
        # Extract useful features from the date column
        data['year'] = data['date'].dt.year
        data['month'] = data['date'].dt.month
        data['day'] = data['date'].dt.day
        data['day_of_week'] = data['date'].dt.dayofweek
        
        # Drop the original date column
        data = data.drop(columns=['date'])

    # Check for missing values
    if data.isnull().sum().sum() > 0:
        st.write("The dataset contains missing values. They will be filled with the mean.")
        data = data.fillna(data.mean())

    # Define features and target variable
    X = data.drop(columns=['load'])  # Features (exclude the target 'load')
    y = data['load']  # Target variable

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Model training
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Model prediction
    y_pred = model.predict(X_test)

    # Calculate and display performance metrics
    mse = mean_squared_error(y_test, y_pred)
    st.write(f"Mean Squared Error (MSE): {mse:.2f}")

    # Feature importance
    feature_importance = pd.DataFrame({
        'Feature': X.columns,
        'Importance': model.feature_importances_
    }).sort_values(by='Importance', ascending=False)

    st.write("Feature Importance:")
    st.write(feature_importance)

    # Future prediction
    st.write("## Predict Future Load")
    user_input = {}
    for feature in X.columns:
        user_input[feature] = st.number_input(f"Enter value for {feature}")

    if st.button("Predict"):
        input_data = np.array([list(user_input.values())]).reshape(1, -1)
        prediction = model.predict(input_data)
        st.write(f"Predicted Load: {prediction[0]:.2f}")