project4 / app.py
V8055's picture
Update app.py
faa3ed6 verified
# Comprehensive imports with error handling
import sys
import subprocess
# Ensure required packages are installed
def install(package):
subprocess.check_call([sys.executable, "-m", "pip", "install", package])
try:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
except ImportError as e:
print(f"Missing package: {e}")
package = str(e).split("'")[1]
install(package)
# Retry imports
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
# Seed for reproducibility
np.random.seed(42)
# Function to generate student data
def generate_student_data(n_samples=500):
data = {
'attendance_rate': np.random.uniform(0.5, 1, n_samples),
'study_hours_per_week': np.random.uniform(0, 40, n_samples),
'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
'extracurricular_activities': np.random.randint(0, 5, n_samples),
'family_support': np.random.choice([0, 1], n_samples),
'final_grade': None
}
# Create target variable with dependencies
data['final_grade'] = (
0.3 * data['attendance_rate'] * 10 +
0.25 * data['study_hours_per_week'] +
0.2 * data['previous_semester_gpa'] * 2 +
0.1 * (data['socioeconomic_status'] * 2) +
0.05 * data['extracurricular_activities'] +
0.1 * (data['family_support'] * 3) +
np.random.normal(0, 1, n_samples)
)
# Normalize final grade to be between 0 and 10
data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) /
(np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)
return pd.DataFrame(data)
# Main application logic
def main():
# Generate dataset and prepare model
df = generate_student_data()
X = df.drop('final_grade', axis=1)
y = df['final_grade']
# Split and scale data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
# Streamlit App Configuration
st.set_page_config(page_title="Student Performance Predictor", page_icon="๐ŸŽ“")
# App Title
st.title("๐ŸŽ“ Student Performance Prediction Model")
# Sidebar for Input
st.sidebar.header("Student Information")
# Input Sliders and Selectors
attendance_rate = st.sidebar.slider(
"Attendance Rate",
min_value=0.5,
max_value=1.0,
value=0.85,
step=0.05
)
study_hours = st.sidebar.slider(
"Weekly Study Hours",
min_value=0,
max_value=40,
value=25
)
prev_gpa = st.sidebar.slider(
"Previous Semester GPA",
min_value=2.0,
max_value=4.0,
value=3.5,
step=0.1
)
socioeconomic_status = st.sidebar.selectbox(
"Socioeconomic Status",
[1, 2, 3],
index=1,
format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
)
extracurricular_activities = st.sidebar.slider(
"Extracurricular Activities",
min_value=0,
max_value=4,
value=2
)
family_support = st.sidebar.selectbox(
"Family Support",
[0, 1],
index=1,
format_func=lambda x: "Yes" if x == 1 else "No"
)
# Prediction Function
def predict_student_performance(input_data):
input_scaled = scaler.transform(input_data)
prediction = rf_model.predict(input_scaled)[0]
return np.round(prediction, 2)
# Predict Button
if st.sidebar.button("Predict Performance"):
# Prepare input data
input_data = np.array([[
attendance_rate,
study_hours,
prev_gpa,
socioeconomic_status,
extracurricular_activities,
family_support
]])
# Make prediction
predicted_grade = predict_student_performance(input_data)
# Display prediction
st.subheader("Prediction Results")
# Color-coded performance indicator
if predicted_grade >= 8:
color = "green"
performance = "Excellent"
elif predicted_grade >= 6:
color = "blue"
performance = "Good"
elif predicted_grade >= 4:
color = "orange"
performance = "Average"
else:
color = "red"
performance = "Needs Improvement"
st.markdown(f"""
<div style="background-color:{color}; padding:10px; border-radius:10px;">
<h2 style="color:white; text-align:center;">
Predicted Final Grade: {predicted_grade}/10
<br>Performance: {performance}
</h2>
</div>
""", unsafe_allow_html=True)
# Feature Importance Visualization
st.subheader("Feature Importance")
feature_importance = pd.DataFrame({
'feature': X.columns,
'importance': rf_model.feature_importances_
}).sort_values('importance', ascending=False)
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
plt.title('Factors Influencing Student Performance')
plt.xlabel('Importance Score')
st.pyplot(fig)
# Additional Information
st.markdown("""
### About the Model
This machine learning model predicts student performance based on:
- Attendance Rate
- Weekly Study Hours
- Previous Semester GPA
- Socioeconomic Status
- Extracurricular Activities
- Family Support
""")
# Run the main application
if __name__ == "__main__":
main()