File size: 6,454 Bytes
faa3ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59f6d73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
faa3ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# Comprehensive imports with error handling
import sys
import subprocess

# Ensure required packages are installed
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

try:
    import streamlit as st
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestRegressor
    import matplotlib.pyplot as plt
    import seaborn as sns
except ImportError as e:
    print(f"Missing package: {e}")
    package = str(e).split("'")[1]
    install(package)
    # Retry imports
    import streamlit as st
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestRegressor
    import matplotlib.pyplot as plt
    import seaborn as sns

# Seed for reproducibility
np.random.seed(42)

# Function to generate student data
def generate_student_data(n_samples=500):
    data = {
        'attendance_rate': np.random.uniform(0.5, 1, n_samples),
        'study_hours_per_week': np.random.uniform(0, 40, n_samples),
        'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
        'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
        'extracurricular_activities': np.random.randint(0, 5, n_samples),
        'family_support': np.random.choice([0, 1], n_samples),
        'final_grade': None
    }
    
    # Create target variable with dependencies
    data['final_grade'] = (
        0.3 * data['attendance_rate'] * 10 + 
        0.25 * data['study_hours_per_week'] + 
        0.2 * data['previous_semester_gpa'] * 2 + 
        0.1 * (data['socioeconomic_status'] * 2) + 
        0.05 * data['extracurricular_activities'] + 
        0.1 * (data['family_support'] * 3) + 
        np.random.normal(0, 1, n_samples)
    )
    
    # Normalize final grade to be between 0 and 10
    data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) / 
                                   (np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)
    
    return pd.DataFrame(data)

# Main application logic
def main():
    # Generate dataset and prepare model
    df = generate_student_data()
    X = df.drop('final_grade', axis=1)
    y = df['final_grade']

    # Split and scale data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train_scaled, y_train)

    # Streamlit App Configuration
    st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")

    # App Title
    st.title("🎓 Student Performance Prediction Model")

    # Sidebar for Input
    st.sidebar.header("Student Information")

    # Input Sliders and Selectors
    attendance_rate = st.sidebar.slider(
        "Attendance Rate", 
        min_value=0.5, 
        max_value=1.0, 
        value=0.85, 
        step=0.05
    )

    study_hours = st.sidebar.slider(
        "Weekly Study Hours", 
        min_value=0, 
        max_value=40, 
        value=25
    )

    prev_gpa = st.sidebar.slider(
        "Previous Semester GPA", 
        min_value=2.0, 
        max_value=4.0, 
        value=3.5, 
        step=0.1
    )

    socioeconomic_status = st.sidebar.selectbox(
        "Socioeconomic Status", 
        [1, 2, 3], 
        index=1,
        format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
    )

    extracurricular_activities = st.sidebar.slider(
        "Extracurricular Activities", 
        min_value=0, 
        max_value=4, 
        value=2
    )

    family_support = st.sidebar.selectbox(
        "Family Support", 
        [0, 1], 
        index=1,
        format_func=lambda x: "Yes" if x == 1 else "No"
    )

    # Prediction Function
    def predict_student_performance(input_data):
        input_scaled = scaler.transform(input_data)
        prediction = rf_model.predict(input_scaled)[0]
        return np.round(prediction, 2)

    # Predict Button
    if st.sidebar.button("Predict Performance"):
        # Prepare input data
        input_data = np.array([[
            attendance_rate, 
            study_hours, 
            prev_gpa, 
            socioeconomic_status, 
            extracurricular_activities, 
            family_support
        ]])
        
        # Make prediction
        predicted_grade = predict_student_performance(input_data)
        
        # Display prediction
        st.subheader("Prediction Results")
        
        # Color-coded performance indicator
        if predicted_grade >= 8:
            color = "green"
            performance = "Excellent"
        elif predicted_grade >= 6:
            color = "blue"
            performance = "Good"
        elif predicted_grade >= 4:
            color = "orange"
            performance = "Average"
        else:
            color = "red"
            performance = "Needs Improvement"
        
        st.markdown(f"""
        <div style="background-color:{color}; padding:10px; border-radius:10px;">
        <h2 style="color:white; text-align:center;">
        Predicted Final Grade: {predicted_grade}/10
        <br>Performance: {performance}
        </h2>
        </div>
        """, unsafe_allow_html=True)

    # Feature Importance Visualization
    st.subheader("Feature Importance")
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
    plt.title('Factors Influencing Student Performance')
    plt.xlabel('Importance Score')
    st.pyplot(fig)

    # Additional Information
    st.markdown("""
    ### About the Model
    This machine learning model predicts student performance based on:
    - Attendance Rate
    - Weekly Study Hours
    - Previous Semester GPA
    - Socioeconomic Status
    - Extracurricular Activities
    - Family Support
    """)

# Run the main application
if __name__ == "__main__":
    main()