File size: 6,454 Bytes
faa3ed6 59f6d73 faa3ed6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# Comprehensive imports with error handling
import sys
import subprocess
# Ensure required packages are installed
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
try:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
except ImportError as e:
print(f"Missing package: {e}")
package = str(e).split("'")[1]
install(package)
# Retry imports
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
# Seed for reproducibility
np.random.seed(42)
# Function to generate student data
def generate_student_data(n_samples=500):
data = {
'attendance_rate': np.random.uniform(0.5, 1, n_samples),
'study_hours_per_week': np.random.uniform(0, 40, n_samples),
'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
'extracurricular_activities': np.random.randint(0, 5, n_samples),
'family_support': np.random.choice([0, 1], n_samples),
'final_grade': None
}
# Create target variable with dependencies
data['final_grade'] = (
0.3 * data['attendance_rate'] * 10 +
0.25 * data['study_hours_per_week'] +
0.2 * data['previous_semester_gpa'] * 2 +
0.1 * (data['socioeconomic_status'] * 2) +
0.05 * data['extracurricular_activities'] +
0.1 * (data['family_support'] * 3) +
np.random.normal(0, 1, n_samples)
)
# Normalize final grade to be between 0 and 10
data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) /
(np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)
return pd.DataFrame(data)
# Main application logic
def main():
# Generate dataset and prepare model
df = generate_student_data()
X = df.drop('final_grade', axis=1)
y = df['final_grade']
# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
# Streamlit App Configuration
st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")
# App Title
st.title("🎓 Student Performance Prediction Model")
# Sidebar for Input
st.sidebar.header("Student Information")
# Input Sliders and Selectors
attendance_rate = st.sidebar.slider(
"Attendance Rate",
min_value=0.5,
max_value=1.0,
value=0.85,
step=0.05
)
study_hours = st.sidebar.slider(
"Weekly Study Hours",
min_value=0,
max_value=40,
value=25
)
prev_gpa = st.sidebar.slider(
"Previous Semester GPA",
min_value=2.0,
max_value=4.0,
value=3.5,
step=0.1
)
socioeconomic_status = st.sidebar.selectbox(
"Socioeconomic Status",
[1, 2, 3],
index=1,
format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
)
extracurricular_activities = st.sidebar.slider(
"Extracurricular Activities",
min_value=0,
max_value=4,
value=2
)
family_support = st.sidebar.selectbox(
"Family Support",
[0, 1],
index=1,
format_func=lambda x: "Yes" if x == 1 else "No"
)
# Prediction Function
def predict_student_performance(input_data):
input_scaled = scaler.transform(input_data)
prediction = rf_model.predict(input_scaled)[0]
return np.round(prediction, 2)
# Predict Button
if st.sidebar.button("Predict Performance"):
# Prepare input data
input_data = np.array([[
attendance_rate,
study_hours,
prev_gpa,
socioeconomic_status,
extracurricular_activities,
family_support
]])
# Make prediction
predicted_grade = predict_student_performance(input_data)
# Display prediction
st.subheader("Prediction Results")
# Color-coded performance indicator
if predicted_grade >= 8:
color = "green"
performance = "Excellent"
elif predicted_grade >= 6:
color = "blue"
performance = "Good"
elif predicted_grade >= 4:
color = "orange"
performance = "Average"
else:
color = "red"
performance = "Needs Improvement"
st.markdown(f"""
<div style="background-color:{color}; padding:10px; border-radius:10px;">
<h2 style="color:white; text-align:center;">
Predicted Final Grade: {predicted_grade}/10
<br>Performance: {performance}
</h2>
</div>
""", unsafe_allow_html=True)
# Feature Importance Visualization
st.subheader("Feature Importance")
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
plt.title('Factors Influencing Student Performance')
plt.xlabel('Importance Score')
st.pyplot(fig)
# Additional Information
st.markdown("""
### About the Model
This machine learning model predicts student performance based on:
- Attendance Rate
- Weekly Study Hours
- Previous Semester GPA
- Socioeconomic Status
- Extracurricular Activities
- Family Support
""")
# Run the main application
if __name__ == "__main__":
main() |