V8055 commited on
Commit
59f6d73
·
verified ·
1 Parent(s): b81e5a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +182 -0
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.ensemble import RandomForestRegressor
8
+ import matplotlib.pyplot as plt
9
+ import seaborn as sns
10
+
11
+ # Seed for reproducibility
12
+ np.random.seed(42)
13
+
14
+ # Function to generate student data
15
+ def generate_student_data(n_samples=500):
16
+ data = {
17
+ 'attendance_rate': np.random.uniform(0.5, 1, n_samples),
18
+ 'study_hours_per_week': np.random.uniform(0, 40, n_samples),
19
+ 'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
20
+ 'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
21
+ 'extracurricular_activities': np.random.randint(0, 5, n_samples),
22
+ 'family_support': np.random.choice([0, 1], n_samples),
23
+ 'final_grade': None
24
+ }
25
+
26
+ # Create target variable with dependencies
27
+ data['final_grade'] = (
28
+ 0.3 * data['attendance_rate'] * 10 +
29
+ 0.25 * data['study_hours_per_week'] +
30
+ 0.2 * data['previous_semester_gpa'] * 2 +
31
+ 0.1 * (data['socioeconomic_status'] * 2) +
32
+ 0.05 * data['extracurricular_activities'] +
33
+ 0.1 * (data['family_support'] * 3) +
34
+ np.random.normal(0, 1, n_samples)
35
+ )
36
+
37
+ # Normalize final grade to be between 0 and 10
38
+ data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) /
39
+ (np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)
40
+
41
+ return pd.DataFrame(data)
42
+
43
+ # Generate dataset and prepare model
44
+ df = generate_student_data()
45
+ X = df.drop('final_grade', axis=1)
46
+ y = df['final_grade']
47
+
48
+ # Split and scale data
49
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
50
+ scaler = StandardScaler()
51
+ X_train_scaled = scaler.fit_transform(X_train)
52
+ X_test_scaled = scaler.transform(X_test)
53
+
54
+ # Train Random Forest model
55
+ rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
56
+ rf_model.fit(X_train_scaled, y_train)
57
+
58
+ # Streamlit App
59
+ st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")
60
+
61
+ # App Title
62
+ st.title("🎓 Student Performance Prediction Model")
63
+
64
+ # Sidebar for Input
65
+ st.sidebar.header("Student Information")
66
+
67
+ # Input Sliders and Selectors
68
+ attendance_rate = st.sidebar.slider(
69
+ "Attendance Rate",
70
+ min_value=0.5,
71
+ max_value=1.0,
72
+ value=0.85,
73
+ step=0.05
74
+ )
75
+
76
+ study_hours = st.sidebar.slider(
77
+ "Weekly Study Hours",
78
+ min_value=0,
79
+ max_value=40,
80
+ value=25
81
+ )
82
+
83
+ prev_gpa = st.sidebar.slider(
84
+ "Previous Semester GPA",
85
+ min_value=2.0,
86
+ max_value=4.0,
87
+ value=3.5,
88
+ step=0.1
89
+ )
90
+
91
+ socioeconomic_status = st.sidebar.selectbox(
92
+ "Socioeconomic Status",
93
+ [1, 2, 3],
94
+ index=1,
95
+ format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
96
+ )
97
+
98
+ extracurricular_activities = st.sidebar.slider(
99
+ "Extracurricular Activities",
100
+ min_value=0,
101
+ max_value=4,
102
+ value=2
103
+ )
104
+
105
+ family_support = st.sidebar.selectbox(
106
+ "Family Support",
107
+ [0, 1],
108
+ index=1,
109
+ format_func=lambda x: "Yes" if x == 1 else "No"
110
+ )
111
+
112
+ # Prediction Function
113
+ def predict_student_performance(input_data):
114
+ input_scaled = scaler.transform(input_data)
115
+ prediction = rf_model.predict(input_scaled)[0]
116
+ return np.round(prediction, 2)
117
+
118
+ # Predict Button
119
+ if st.sidebar.button("Predict Performance"):
120
+ # Prepare input data
121
+ input_data = np.array([[
122
+ attendance_rate,
123
+ study_hours,
124
+ prev_gpa,
125
+ socioeconomic_status,
126
+ extracurricular_activities,
127
+ family_support
128
+ ]])
129
+
130
+ # Make prediction
131
+ predicted_grade = predict_student_performance(input_data)
132
+
133
+ # Display prediction
134
+ st.subheader("Prediction Results")
135
+
136
+ # Color-coded performance indicator
137
+ if predicted_grade >= 8:
138
+ color = "green"
139
+ performance = "Excellent"
140
+ elif predicted_grade >= 6:
141
+ color = "blue"
142
+ performance = "Good"
143
+ elif predicted_grade >= 4:
144
+ color = "orange"
145
+ performance = "Average"
146
+ else:
147
+ color = "red"
148
+ performance = "Needs Improvement"
149
+
150
+ st.markdown(f"""
151
+ <div style="background-color:{color}; padding:10px; border-radius:10px;">
152
+ <h2 style="color:white; text-align:center;">
153
+ Predicted Final Grade: {predicted_grade}/10
154
+ <br>Performance: {performance}
155
+ </h2>
156
+ </div>
157
+ """, unsafe_allow_html=True)
158
+
159
+ # Feature Importance Visualization
160
+ st.subheader("Feature Importance")
161
+ feature_importance = pd.DataFrame({
162
+ 'feature': X.columns,
163
+ 'importance': rf_model.feature_importances_
164
+ }).sort_values('importance', ascending=False)
165
+
166
+ fig, ax = plt.subplots(figsize=(10, 6))
167
+ sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
168
+ plt.title('Factors Influencing Student Performance')
169
+ plt.xlabel('Importance Score')
170
+ st.pyplot(fig)
171
+
172
+ # Additional Information
173
+ st.markdown("""
174
+ ### About the Model
175
+ This machine learning model predicts student performance based on:
176
+ - Attendance Rate
177
+ - Weekly Study Hours
178
+ - Previous Semester GPA
179
+ - Socioeconomic Status
180
+ - Extracurricular Activities
181
+ - Family Support
182
+ """)