V8055 commited on
Commit
faa3ed6
·
verified ·
1 Parent(s): 43dadb7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -149
app.py CHANGED
@@ -1,12 +1,33 @@
1
- # app.py
2
- import streamlit as st
3
- import pandas as pd
4
- import numpy as np
5
- from sklearn.model_selection import train_test_split
6
- from sklearn.preprocessing import StandardScaler
7
- from sklearn.ensemble import RandomForestRegressor
8
- import matplotlib.pyplot as plt
9
- import seaborn as sns
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  # Seed for reproducibility
12
  np.random.seed(42)
@@ -40,143 +61,149 @@ def generate_student_data(n_samples=500):
40
 
41
  return pd.DataFrame(data)
42
 
43
- # Generate dataset and prepare model
44
- df = generate_student_data()
45
- X = df.drop('final_grade', axis=1)
46
- y = df['final_grade']
47
-
48
- # Split and scale data
49
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
50
- scaler = StandardScaler()
51
- X_train_scaled = scaler.fit_transform(X_train)
52
- X_test_scaled = scaler.transform(X_test)
53
-
54
- # Train Random Forest model
55
- rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
56
- rf_model.fit(X_train_scaled, y_train)
57
-
58
- # Streamlit App
59
- st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")
60
-
61
- # App Title
62
- st.title("🎓 Student Performance Prediction Model")
63
-
64
- # Sidebar for Input
65
- st.sidebar.header("Student Information")
66
-
67
- # Input Sliders and Selectors
68
- attendance_rate = st.sidebar.slider(
69
- "Attendance Rate",
70
- min_value=0.5,
71
- max_value=1.0,
72
- value=0.85,
73
- step=0.05
74
- )
75
-
76
- study_hours = st.sidebar.slider(
77
- "Weekly Study Hours",
78
- min_value=0,
79
- max_value=40,
80
- value=25
81
- )
82
-
83
- prev_gpa = st.sidebar.slider(
84
- "Previous Semester GPA",
85
- min_value=2.0,
86
- max_value=4.0,
87
- value=3.5,
88
- step=0.1
89
- )
90
-
91
- socioeconomic_status = st.sidebar.selectbox(
92
- "Socioeconomic Status",
93
- [1, 2, 3],
94
- index=1,
95
- format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
96
- )
97
-
98
- extracurricular_activities = st.sidebar.slider(
99
- "Extracurricular Activities",
100
- min_value=0,
101
- max_value=4,
102
- value=2
103
- )
104
-
105
- family_support = st.sidebar.selectbox(
106
- "Family Support",
107
- [0, 1],
108
- index=1,
109
- format_func=lambda x: "Yes" if x == 1 else "No"
110
- )
111
-
112
- # Prediction Function
113
- def predict_student_performance(input_data):
114
- input_scaled = scaler.transform(input_data)
115
- prediction = rf_model.predict(input_scaled)[0]
116
- return np.round(prediction, 2)
117
-
118
- # Predict Button
119
- if st.sidebar.button("Predict Performance"):
120
- # Prepare input data
121
- input_data = np.array([[
122
- attendance_rate,
123
- study_hours,
124
- prev_gpa,
125
- socioeconomic_status,
126
- extracurricular_activities,
127
- family_support
128
- ]])
129
-
130
- # Make prediction
131
- predicted_grade = predict_student_performance(input_data)
132
-
133
- # Display prediction
134
- st.subheader("Prediction Results")
135
-
136
- # Color-coded performance indicator
137
- if predicted_grade >= 8:
138
- color = "green"
139
- performance = "Excellent"
140
- elif predicted_grade >= 6:
141
- color = "blue"
142
- performance = "Good"
143
- elif predicted_grade >= 4:
144
- color = "orange"
145
- performance = "Average"
146
- else:
147
- color = "red"
148
- performance = "Needs Improvement"
149
-
150
- st.markdown(f"""
151
- <div style="background-color:{color}; padding:10px; border-radius:10px;">
152
- <h2 style="color:white; text-align:center;">
153
- Predicted Final Grade: {predicted_grade}/10
154
- <br>Performance: {performance}
155
- </h2>
156
- </div>
157
- """, unsafe_allow_html=True)
158
-
159
- # Feature Importance Visualization
160
- st.subheader("Feature Importance")
161
- feature_importance = pd.DataFrame({
162
- 'feature': X.columns,
163
- 'importance': rf_model.feature_importances_
164
- }).sort_values('importance', ascending=False)
165
-
166
- fig, ax = plt.subplots(figsize=(10, 6))
167
- sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
168
- plt.title('Factors Influencing Student Performance')
169
- plt.xlabel('Importance Score')
170
- st.pyplot(fig)
171
-
172
- # Additional Information
173
- st.markdown("""
174
- ### About the Model
175
- This machine learning model predicts student performance based on:
176
- - Attendance Rate
177
- - Weekly Study Hours
178
- - Previous Semester GPA
179
- - Socioeconomic Status
180
- - Extracurricular Activities
181
- - Family Support
182
- """)
 
 
 
 
 
 
 
1
+ # Comprehensive imports with error handling
2
+ import sys
3
+ import subprocess
4
+
5
+ # Ensure required packages are installed
6
+ def install(package):
7
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package])
8
+
9
+ try:
10
+ import streamlit as st
11
+ import pandas as pd
12
+ import numpy as np
13
+ from sklearn.model_selection import train_test_split
14
+ from sklearn.preprocessing import StandardScaler
15
+ from sklearn.ensemble import RandomForestRegressor
16
+ import matplotlib.pyplot as plt
17
+ import seaborn as sns
18
+ except ImportError as e:
19
+ print(f"Missing package: {e}")
20
+ package = str(e).split("'")[1]
21
+ install(package)
22
+ # Retry imports
23
+ import streamlit as st
24
+ import pandas as pd
25
+ import numpy as np
26
+ from sklearn.model_selection import train_test_split
27
+ from sklearn.preprocessing import StandardScaler
28
+ from sklearn.ensemble import RandomForestRegressor
29
+ import matplotlib.pyplot as plt
30
+ import seaborn as sns
31
 
32
  # Seed for reproducibility
33
  np.random.seed(42)
 
61
 
62
  return pd.DataFrame(data)
63
 
64
+ # Main application logic
65
+ def main():
66
+ # Generate dataset and prepare model
67
+ df = generate_student_data()
68
+ X = df.drop('final_grade', axis=1)
69
+ y = df['final_grade']
70
+
71
+ # Split and scale data
72
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
73
+ scaler = StandardScaler()
74
+ X_train_scaled = scaler.fit_transform(X_train)
75
+ X_test_scaled = scaler.transform(X_test)
76
+
77
+ # Train Random Forest model
78
+ rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
79
+ rf_model.fit(X_train_scaled, y_train)
80
+
81
+ # Streamlit App Configuration
82
+ st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")
83
+
84
+ # App Title
85
+ st.title("🎓 Student Performance Prediction Model")
86
+
87
+ # Sidebar for Input
88
+ st.sidebar.header("Student Information")
89
+
90
+ # Input Sliders and Selectors
91
+ attendance_rate = st.sidebar.slider(
92
+ "Attendance Rate",
93
+ min_value=0.5,
94
+ max_value=1.0,
95
+ value=0.85,
96
+ step=0.05
97
+ )
98
+
99
+ study_hours = st.sidebar.slider(
100
+ "Weekly Study Hours",
101
+ min_value=0,
102
+ max_value=40,
103
+ value=25
104
+ )
105
+
106
+ prev_gpa = st.sidebar.slider(
107
+ "Previous Semester GPA",
108
+ min_value=2.0,
109
+ max_value=4.0,
110
+ value=3.5,
111
+ step=0.1
112
+ )
113
+
114
+ socioeconomic_status = st.sidebar.selectbox(
115
+ "Socioeconomic Status",
116
+ [1, 2, 3],
117
+ index=1,
118
+ format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
119
+ )
120
+
121
+ extracurricular_activities = st.sidebar.slider(
122
+ "Extracurricular Activities",
123
+ min_value=0,
124
+ max_value=4,
125
+ value=2
126
+ )
127
+
128
+ family_support = st.sidebar.selectbox(
129
+ "Family Support",
130
+ [0, 1],
131
+ index=1,
132
+ format_func=lambda x: "Yes" if x == 1 else "No"
133
+ )
134
+
135
+ # Prediction Function
136
+ def predict_student_performance(input_data):
137
+ input_scaled = scaler.transform(input_data)
138
+ prediction = rf_model.predict(input_scaled)[0]
139
+ return np.round(prediction, 2)
140
+
141
+ # Predict Button
142
+ if st.sidebar.button("Predict Performance"):
143
+ # Prepare input data
144
+ input_data = np.array([[
145
+ attendance_rate,
146
+ study_hours,
147
+ prev_gpa,
148
+ socioeconomic_status,
149
+ extracurricular_activities,
150
+ family_support
151
+ ]])
152
+
153
+ # Make prediction
154
+ predicted_grade = predict_student_performance(input_data)
155
+
156
+ # Display prediction
157
+ st.subheader("Prediction Results")
158
+
159
+ # Color-coded performance indicator
160
+ if predicted_grade >= 8:
161
+ color = "green"
162
+ performance = "Excellent"
163
+ elif predicted_grade >= 6:
164
+ color = "blue"
165
+ performance = "Good"
166
+ elif predicted_grade >= 4:
167
+ color = "orange"
168
+ performance = "Average"
169
+ else:
170
+ color = "red"
171
+ performance = "Needs Improvement"
172
+
173
+ st.markdown(f"""
174
+ <div style="background-color:{color}; padding:10px; border-radius:10px;">
175
+ <h2 style="color:white; text-align:center;">
176
+ Predicted Final Grade: {predicted_grade}/10
177
+ <br>Performance: {performance}
178
+ </h2>
179
+ </div>
180
+ """, unsafe_allow_html=True)
181
+
182
+ # Feature Importance Visualization
183
+ st.subheader("Feature Importance")
184
+ feature_importance = pd.DataFrame({
185
+ 'feature': X.columns,
186
+ 'importance': rf_model.feature_importances_
187
+ }).sort_values('importance', ascending=False)
188
+
189
+ fig, ax = plt.subplots(figsize=(10, 6))
190
+ sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
191
+ plt.title('Factors Influencing Student Performance')
192
+ plt.xlabel('Importance Score')
193
+ st.pyplot(fig)
194
+
195
+ # Additional Information
196
+ st.markdown("""
197
+ ### About the Model
198
+ This machine learning model predicts student performance based on:
199
+ - Attendance Rate
200
+ - Weekly Study Hours
201
+ - Previous Semester GPA
202
+ - Socioeconomic Status
203
+ - Extracurricular Activities
204
+ - Family Support
205
+ """)
206
+
207
+ # Run the main application
208
+ if __name__ == "__main__":
209
+ main()