|
|
|
|
|
import sys |
|
|
import subprocess |
|
|
|
|
|
|
|
|
def install(package): |
|
|
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) |
|
|
|
|
|
try: |
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.ensemble import RandomForestRegressor |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
except ImportError as e: |
|
|
print(f"Missing package: {e}") |
|
|
package = str(e).split("'")[1] |
|
|
install(package) |
|
|
|
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.ensemble import RandomForestRegressor |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
|
|
|
|
|
|
np.random.seed(42) |
|
|
|
|
|
|
|
|
def generate_student_data(n_samples=500): |
|
|
data = { |
|
|
'attendance_rate': np.random.uniform(0.5, 1, n_samples), |
|
|
'study_hours_per_week': np.random.uniform(0, 40, n_samples), |
|
|
'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples), |
|
|
'socioeconomic_status': np.random.choice([1, 2, 3], n_samples), |
|
|
'extracurricular_activities': np.random.randint(0, 5, n_samples), |
|
|
'family_support': np.random.choice([0, 1], n_samples), |
|
|
'final_grade': None |
|
|
} |
|
|
|
|
|
|
|
|
data['final_grade'] = ( |
|
|
0.3 * data['attendance_rate'] * 10 + |
|
|
0.25 * data['study_hours_per_week'] + |
|
|
0.2 * data['previous_semester_gpa'] * 2 + |
|
|
0.1 * (data['socioeconomic_status'] * 2) + |
|
|
0.05 * data['extracurricular_activities'] + |
|
|
0.1 * (data['family_support'] * 3) + |
|
|
np.random.normal(0, 1, n_samples) |
|
|
) |
|
|
|
|
|
|
|
|
data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) / |
|
|
(np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10) |
|
|
|
|
|
return pd.DataFrame(data) |
|
|
|
|
|
|
|
|
def main(): |
|
|
|
|
|
df = generate_student_data() |
|
|
X = df.drop('final_grade', axis=1) |
|
|
y = df['final_grade'] |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
scaler = StandardScaler() |
|
|
X_train_scaled = scaler.fit_transform(X_train) |
|
|
X_test_scaled = scaler.transform(X_test) |
|
|
|
|
|
|
|
|
rf_model = RandomForestRegressor(n_estimators=100, random_state=42) |
|
|
rf_model.fit(X_train_scaled, y_train) |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="Student Performance Predictor", page_icon="๐") |
|
|
|
|
|
|
|
|
st.title("๐ Student Performance Prediction Model") |
|
|
|
|
|
|
|
|
st.sidebar.header("Student Information") |
|
|
|
|
|
|
|
|
attendance_rate = st.sidebar.slider( |
|
|
"Attendance Rate", |
|
|
min_value=0.5, |
|
|
max_value=1.0, |
|
|
value=0.85, |
|
|
step=0.05 |
|
|
) |
|
|
|
|
|
study_hours = st.sidebar.slider( |
|
|
"Weekly Study Hours", |
|
|
min_value=0, |
|
|
max_value=40, |
|
|
value=25 |
|
|
) |
|
|
|
|
|
prev_gpa = st.sidebar.slider( |
|
|
"Previous Semester GPA", |
|
|
min_value=2.0, |
|
|
max_value=4.0, |
|
|
value=3.5, |
|
|
step=0.1 |
|
|
) |
|
|
|
|
|
socioeconomic_status = st.sidebar.selectbox( |
|
|
"Socioeconomic Status", |
|
|
[1, 2, 3], |
|
|
index=1, |
|
|
format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x] |
|
|
) |
|
|
|
|
|
extracurricular_activities = st.sidebar.slider( |
|
|
"Extracurricular Activities", |
|
|
min_value=0, |
|
|
max_value=4, |
|
|
value=2 |
|
|
) |
|
|
|
|
|
family_support = st.sidebar.selectbox( |
|
|
"Family Support", |
|
|
[0, 1], |
|
|
index=1, |
|
|
format_func=lambda x: "Yes" if x == 1 else "No" |
|
|
) |
|
|
|
|
|
|
|
|
def predict_student_performance(input_data): |
|
|
input_scaled = scaler.transform(input_data) |
|
|
prediction = rf_model.predict(input_scaled)[0] |
|
|
return np.round(prediction, 2) |
|
|
|
|
|
|
|
|
if st.sidebar.button("Predict Performance"): |
|
|
|
|
|
input_data = np.array([[ |
|
|
attendance_rate, |
|
|
study_hours, |
|
|
prev_gpa, |
|
|
socioeconomic_status, |
|
|
extracurricular_activities, |
|
|
family_support |
|
|
]]) |
|
|
|
|
|
|
|
|
predicted_grade = predict_student_performance(input_data) |
|
|
|
|
|
|
|
|
st.subheader("Prediction Results") |
|
|
|
|
|
|
|
|
if predicted_grade >= 8: |
|
|
color = "green" |
|
|
performance = "Excellent" |
|
|
elif predicted_grade >= 6: |
|
|
color = "blue" |
|
|
performance = "Good" |
|
|
elif predicted_grade >= 4: |
|
|
color = "orange" |
|
|
performance = "Average" |
|
|
else: |
|
|
color = "red" |
|
|
performance = "Needs Improvement" |
|
|
|
|
|
st.markdown(f""" |
|
|
<div style="background-color:{color}; padding:10px; border-radius:10px;"> |
|
|
<h2 style="color:white; text-align:center;"> |
|
|
Predicted Final Grade: {predicted_grade}/10 |
|
|
<br>Performance: {performance} |
|
|
</h2> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.subheader("Feature Importance") |
|
|
feature_importance = pd.DataFrame({ |
|
|
'feature': X.columns, |
|
|
'importance': rf_model.feature_importances_ |
|
|
}).sort_values('importance', ascending=False) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax) |
|
|
plt.title('Factors Influencing Student Performance') |
|
|
plt.xlabel('Importance Score') |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
### About the Model |
|
|
This machine learning model predicts student performance based on: |
|
|
- Attendance Rate |
|
|
- Weekly Study Hours |
|
|
- Previous Semester GPA |
|
|
- Socioeconomic Status |
|
|
- Extracurricular Activities |
|
|
- Family Support |
|
|
""") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |