sunnynazir commited on
Commit
e38c149
·
verified ·
1 Parent(s): 55ba687

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -41
app.py CHANGED
@@ -3,58 +3,72 @@ import pandas as pd
3
  import numpy as np
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.ensemble import RandomForestRegressor
6
- from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
7
- import matplotlib.pyplot as plt
8
 
9
- # Title
10
- st.title("AI-Powered Load Forecasting")
 
 
 
11
 
12
- # Upload Dataset
13
- st.sidebar.header("Upload Data")
14
- uploaded_file = st.sidebar.file_uploader("Upload your CSV file", type=["csv"])
15
  if uploaded_file is not None:
 
16
  data = pd.read_csv(uploaded_file)
17
- st.write("Data Preview")
18
- st.write(data.head())
19
- else:
20
- st.info("Awaiting CSV file upload. You can use the sample dataset.")
21
- # Load sample data
22
- data = pd.read_csv("sample_data.csv")
23
- st.write("Using Sample Data")
24
  st.write(data.head())
25
 
26
- # Feature Selection
27
- st.sidebar.header("Feature Selection")
28
- target_variable = st.sidebar.selectbox("Select Target Variable", options=data.columns, index=len(data.columns) - 1)
29
- predictors = st.sidebar.multiselect("Select Predictor Variables", options=[col for col in data.columns if col != target_variable], default=data.columns[:-1])
 
 
 
 
 
 
 
 
30
 
31
- # Model Training
32
- if st.sidebar.button("Train Model"):
33
- st.subheader("Training the Model...")
 
34
 
35
- # Split data
36
- X = data[predictors]
37
- y = data[target_variable]
 
 
38
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39
 
40
- # Train Random Forest Regressor
41
  model = RandomForestRegressor(n_estimators=100, random_state=42)
42
  model.fit(X_train, y_train)
 
 
43
  y_pred = model.predict(X_test)
44
 
45
- # Model Evaluation
46
- st.write("**Evaluation Metrics:**")
47
- st.write(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
48
- st.write(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.2f}")
49
- st.write(f"R² Score: {r2_score(y_test, y_pred):.2f}")
50
-
51
- # Plot Results
52
- fig, ax = plt.subplots()
53
- ax.plot(y_test.values, label="Actual", marker="o")
54
- ax.plot(y_pred, label="Predicted", marker="x")
55
- ax.legend()
56
- ax.set_title("Actual vs. Predicted Load")
57
- st.pyplot(fig)
58
-
59
- # Footer
60
- st.sidebar.markdown("Developed by [Sunny Nazir](https://huggingface.co/spaces)")
 
 
 
 
 
 
 
 
3
  import numpy as np
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.ensemble import RandomForestRegressor
6
+ from sklearn.metrics import mean_squared_error
 
7
 
8
+ # Title of the Streamlit app
9
+ st.title("Load Forecasting Application")
10
+
11
+ # File upload section
12
+ uploaded_file = st.file_uploader("Upload a CSV file containing historical load data", type=["csv"])
13
 
 
 
 
14
  if uploaded_file is not None:
15
+ # Load the dataset
16
  data = pd.read_csv(uploaded_file)
17
+ st.write("Preview of the uploaded data:")
 
 
 
 
 
 
18
  st.write(data.head())
19
 
20
+ # Ensure the date column is in datetime format
21
+ if 'date' in data.columns:
22
+ data['date'] = pd.to_datetime(data['date'])
23
+
24
+ # Extract useful features from the date column
25
+ data['year'] = data['date'].dt.year
26
+ data['month'] = data['date'].dt.month
27
+ data['day'] = data['date'].dt.day
28
+ data['day_of_week'] = data['date'].dt.dayofweek
29
+
30
+ # Drop the original date column
31
+ data = data.drop(columns=['date'])
32
 
33
+ # Check for missing values
34
+ if data.isnull().sum().sum() > 0:
35
+ st.write("The dataset contains missing values. They will be filled with the mean.")
36
+ data = data.fillna(data.mean())
37
 
38
+ # Define features and target variable
39
+ X = data.drop(columns=['load']) # Features (exclude the target 'load')
40
+ y = data['load'] # Target variable
41
+
42
+ # Split the data into training and testing sets
43
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
44
 
45
+ # Model training
46
  model = RandomForestRegressor(n_estimators=100, random_state=42)
47
  model.fit(X_train, y_train)
48
+
49
+ # Model prediction
50
  y_pred = model.predict(X_test)
51
 
52
+ # Calculate and display performance metrics
53
+ mse = mean_squared_error(y_test, y_pred)
54
+ st.write(f"Mean Squared Error (MSE): {mse:.2f}")
55
+
56
+ # Feature importance
57
+ feature_importance = pd.DataFrame({
58
+ 'Feature': X.columns,
59
+ 'Importance': model.feature_importances_
60
+ }).sort_values(by='Importance', ascending=False)
61
+
62
+ st.write("Feature Importance:")
63
+ st.write(feature_importance)
64
+
65
+ # Future prediction
66
+ st.write("## Predict Future Load")
67
+ user_input = {}
68
+ for feature in X.columns:
69
+ user_input[feature] = st.number_input(f"Enter value for {feature}")
70
+
71
+ if st.button("Predict"):
72
+ input_data = np.array([list(user_input.values())]).reshape(1, -1)
73
+ prediction = model.predict(input_data)
74
+ st.write(f"Predicted Load: {prediction[0]:.2f}")