# Comprehensive imports with error handling
import sys
import subprocess

# Ensure required packages are installed
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

try:
    import streamlit as st
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestRegressor
    import matplotlib.pyplot as plt
    import seaborn as sns
except ImportError as e:
    print(f"Missing package: {e}")
    package = str(e).split("'")[1]
    install(package)
    # Retry imports
    import streamlit as st
    import pandas as pd
    import numpy as np
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler
    from sklearn.ensemble import RandomForestRegressor
    import matplotlib.pyplot as plt
    import seaborn as sns

# Seed for reproducibility
np.random.seed(42)

# Function to generate student data
def generate_student_data(n_samples=500):
    data = {
        'attendance_rate': np.random.uniform(0.5, 1, n_samples),
        'study_hours_per_week': np.random.uniform(0, 40, n_samples),
        'previous_semester_gpa': np.random.uniform(2.0, 4.0, n_samples),
        'socioeconomic_status': np.random.choice([1, 2, 3], n_samples),
        'extracurricular_activities': np.random.randint(0, 5, n_samples),
        'family_support': np.random.choice([0, 1], n_samples),
        'final_grade': None
    }
    
    # Create target variable with dependencies
    data['final_grade'] = (
        0.3 * data['attendance_rate'] * 10 + 
        0.25 * data['study_hours_per_week'] + 
        0.2 * data['previous_semester_gpa'] * 2 + 
        0.1 * (data['socioeconomic_status'] * 2) + 
        0.05 * data['extracurricular_activities'] + 
        0.1 * (data['family_support'] * 3) + 
        np.random.normal(0, 1, n_samples)
    )
    
    # Normalize final grade to be between 0 and 10
    data['final_grade'] = np.clip((data['final_grade'] - np.min(data['final_grade'])) / 
                                   (np.max(data['final_grade']) - np.min(data['final_grade'])) * 10, 0, 10)
    
    return pd.DataFrame(data)

# Main application logic
def main():
    # Generate dataset and prepare model
    df = generate_student_data()
    X = df.drop('final_grade', axis=1)
    y = df['final_grade']

    # Split and scale data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train Random Forest model
    rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_model.fit(X_train_scaled, y_train)

    # Streamlit App Configuration
    st.set_page_config(page_title="Student Performance Predictor", page_icon="🎓")

    # App Title
    st.title("🎓 Student Performance Prediction Model")

    # Sidebar for Input
    st.sidebar.header("Student Information")

    # Input Sliders and Selectors
    attendance_rate = st.sidebar.slider(
        "Attendance Rate", 
        min_value=0.5, 
        max_value=1.0, 
        value=0.85, 
        step=0.05
    )

    study_hours = st.sidebar.slider(
        "Weekly Study Hours", 
        min_value=0, 
        max_value=40, 
        value=25
    )

    prev_gpa = st.sidebar.slider(
        "Previous Semester GPA", 
        min_value=2.0, 
        max_value=4.0, 
        value=3.5, 
        step=0.1
    )

    socioeconomic_status = st.sidebar.selectbox(
        "Socioeconomic Status", 
        [1, 2, 3], 
        index=1,
        format_func=lambda x: {1:"Low", 2:"Medium", 3:"High"}[x]
    )

    extracurricular_activities = st.sidebar.slider(
        "Extracurricular Activities", 
        min_value=0, 
        max_value=4, 
        value=2
    )

    family_support = st.sidebar.selectbox(
        "Family Support", 
        [0, 1], 
        index=1,
        format_func=lambda x: "Yes" if x == 1 else "No"
    )

    # Prediction Function
    def predict_student_performance(input_data):
        input_scaled = scaler.transform(input_data)
        prediction = rf_model.predict(input_scaled)[0]
        return np.round(prediction, 2)

    # Predict Button
    if st.sidebar.button("Predict Performance"):
        # Prepare input data
        input_data = np.array([[
            attendance_rate, 
            study_hours, 
            prev_gpa, 
            socioeconomic_status, 
            extracurricular_activities, 
            family_support
        ]])
        
        # Make prediction
        predicted_grade = predict_student_performance(input_data)
        
        # Display prediction
        st.subheader("Prediction Results")
        
        # Color-coded performance indicator
        if predicted_grade >= 8:
            color = "green"
            performance = "Excellent"
        elif predicted_grade >= 6:
            color = "blue"
            performance = "Good"
        elif predicted_grade >= 4:
            color = "orange"
            performance = "Average"
        else:
            color = "red"
            performance = "Needs Improvement"
        
        st.markdown(f"""
        <div style="background-color:{color}; padding:10px; border-radius:10px;">
        <h2 style="color:white; text-align:center;">
        Predicted Final Grade: {predicted_grade}/10
        <br>Performance: {performance}
        </h2>
        </div>
        """, unsafe_allow_html=True)

    # Feature Importance Visualization
    st.subheader("Feature Importance")
    feature_importance = pd.DataFrame({
        'feature': X.columns,
        'importance': rf_model.feature_importances_
    }).sort_values('importance', ascending=False)

    fig, ax = plt.subplots(figsize=(10, 6))
    sns.barplot(x='importance', y='feature', data=feature_importance, ax=ax)
    plt.title('Factors Influencing Student Performance')
    plt.xlabel('Importance Score')
    st.pyplot(fig)

    # Additional Information
    st.markdown("""
    ### About the Model
    This machine learning model predicts student performance based on:
    - Attendance Rate
    - Weekly Study Hours
    - Previous Semester GPA
    - Socioeconomic Status
    - Extracurricular Activities
    - Family Support
    """)

# Run the main application
if __name__ == "__main__":
    main()