import joblib
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Load your trained model
try:
    model = joblib.load('trained_model.joblib')
except Exception as e:
    print(f"Error loading model: {e}")
    print("Creating a placeholder model for interface testing")
    # Create a simple placeholder model for testing the interface
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier()
    model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]
    
    # Add dummy predict and predict_proba methods if needed
    def dummy_predict(X):
        return np.array(["BSc Computer Science"])
    
    def dummy_predict_proba(X):
        return np.array([[0.7, 0.1, 0.1, 0.1]])
    
    # Only add these methods if they don't exist
    if not hasattr(model, 'predict'):
        model.predict = dummy_predict
    
    if not hasattr(model, 'predict_proba'):
        model.predict_proba = dummy_predict_proba

# Define all_traits dictionary (needed for preprocessing)
all_traits = {
    'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
                     'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
    'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills', 
                     'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
}

# Function to convert grades to numerical values
def grade_to_numeric(grade):
    if pd.isna(grade) or grade == "":
        return np.nan

    grade_map = {
        "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
        "D7": 7, "E8": 8, "F9": 9
    }
    return grade_map.get(grade, np.nan)

# Function to extract interests and strengths into separate columns
def extract_traits(df, column_name, prefix, all_traits=None):
    """
    Extracts traits from a column, creating binary columns for each trait.
    """
    # Split the comma-separated values
    trait_series = df[column_name].str.split(',', expand=True)

    # Infer all_traits if not provided
    if all_traits is None:
        all_traits = set()
        for col in trait_series.columns:
            all_traits.update(trait_series[col].dropna().unique())

    # Create binary columns for each trait
    for trait in all_traits:
        col_name = f"{prefix}_{trait.strip()}"
        df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)

    return df

def preprocess_data(df, all_traits=None):
    """
    Preprocesses the student data.
    """
    # Create a copy to avoid modifying the original
    processed_df = df.copy()

    if all_traits is None:
        all_traits = {
            'Interests': set(),
            'Strengths': set()
        }
        for _, row in processed_df.iterrows():
            all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
            all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])

    processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
    processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))

    return processed_df

# Create features and target variable
def prepare_model_data(processed_df):
    # Identify categorical and numerical features
    categorical_features = ["Desired_Career"]
    numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
                          "Physics", "Biology", "Elective Maths", "Chemistry",
                          "Economics", "E-ICT", "Literature", "Geography", 
                          "Business Management", "Visual Arts", "Government"]

    # Add the binary interest and strength columns
    interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]

    # Convert grade columns to numerical values
    for col in numerical_features:
        if col in processed_df.columns:  # Check if column exists
            processed_df[col] = processed_df[col].apply(grade_to_numeric)
        else:
            # Add missing columns with NaN values
            processed_df[col] = np.nan

    # Get features (using processed_df with numerical grades)
    X = processed_df[categorical_features + numerical_features + interest_strength_cols]

    return X

def get_course_recommendation(student_info):
    """
    Get course recommendations for a student based on their information.
    """
    try:
        # Convert student data to DataFrame
        student_df = pd.DataFrame([student_info])
        
        # Preprocess student data
        processed_student = preprocess_data(student_df, all_traits)
        
        # Extract features
        student_features = prepare_model_data(processed_student)
        
        # Make prediction
        recommended_course = model.predict(student_features)[0]
        probabilities = model.predict_proba(student_features)[0]
        
        # Get top 3 recommendations with probabilities
        class_indices = np.argsort(probabilities)[::-1][:3]
        classes = model.classes_
        top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
        
        # Format the output
        result = "Top Course Recommendations:\n\n"
        for i, (course, prob) in enumerate(top_recommendations, 1):
            result += f"{i}. {course} (Confidence: {prob})\n\n"
        
        return result
    except Exception as e:
        return f"Error generating recommendations: {str(e)}"

def explain_recommendation(student_info, top_recommendation):
    """
    Provide an explanation for why a particular course was recommended.
    """
    try:
        course = top_recommendation[0]

        explanation = f"The course '{course}' was recommended based on:\n"

        # Career alignment
        explanation += f"- Your career interest in {student_info['Desired_Career']}\n"

        # Interests match
        explanation += f"- Your interests in {student_info['Interests']}\n"
        explanation += f"- Your strengths in {student_info['Strengths']}\n"

        # Aggregate score context
        if student_info['Aggregate'] < 15:
            explanation += "- Your excellent aggregate score\n"
        elif student_info['Aggregate'] < 20:
            explanation += "- Your good aggregate score\n"

        return explanation
    except Exception as e:
        return f"Error generating explanation: {str(e)}"

def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                  elective_maths, physics, biology, chemistry):
    
    try:
        # Create student data dictionary with all required fields
        student_info = {
            "StudentID": "STU_TEMP",
            "Desired_Career": desired_career,
            "Recommended_Course": "",  # Will be predicted
            "Aggregate": aggregate,
            "Interests": interests,
            "Strengths": strengths,
            "English": english,
            "Core Maths": core_maths,
            "Science": science,
            "Social Studies": social_studies,
            "Elective Maths": elective_maths,
            "Physics": physics,
            "Biology": biology,
            "Chemistry": chemistry,
            # Add empty values for other subjects that were in the training data
            "Economics": "",
            "E-ICT": "",
            "Literature": "",
            "Geography": "",
            "Business Management": "",
            "Visual Arts": "",
            "Government": ""
        }
        
        # Get recommendations
        recommendations = get_course_recommendation(student_info)
        
        # Get top recommendation for explanation
        student_df = pd.DataFrame([student_info])
        processed_student = preprocess_data(student_df, all_traits)
        student_features = prepare_model_data(processed_student)
        probabilities = model.predict_proba(student_features)[0]
        class_indices = np.argsort(probabilities)[::-1][:1]
        classes = model.classes_
        top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
        
        # Get explanation
        explanation = explain_recommendation(student_info, top_recommendation)
        
        return recommendations + "\n" + explanation
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"

# Define grade options - fixing format to work with Gradio
grade_options = [
    "", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
]

# Create Gradio interface
with gr.Blocks(title="Career Course Recommendation System") as demo:
    gr.Markdown("# Career Course Recommendation System")
    gr.Markdown("Enter student information to get course recommendations")
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### Student Information")
            desired_career = gr.Textbox(
                label="Desired Career",
                placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
                info="Enter your desired career path"
            )
            aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
            interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
            strengths = gr.Textbox(
                label="Strengths (comma separated)", 
                placeholder="Communication,Creativity", 
                info="List your strengths or skills separated by commas",
                value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
            )
            
            gr.Markdown("### Core Subjects (Required)")
            with gr.Row():
                with gr.Column(scale=1):
                    english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
                    core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
                
                with gr.Column(scale=1):
                    science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
                    social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
            
            gr.Markdown("### Elective Subjects")
            with gr.Row():
                with gr.Column(scale=1):
                    elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
                    physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
                
                with gr.Column(scale=1):
                    chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
                    biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
        
        with gr.Column(scale=1):
            gr.Markdown("### Grade Scale Reference")
            gr.Markdown("""
            - A1: Excellent (1 point)
            - B2: Very Good (2 points)
            - B3: Good (3 points)
            - C4: Credit (4 points)
            - C5: Credit (5 points)
            - C6: Credit (6 points)
            - D7: Pass (7 points)
            - E8: Pass (8 points)
            - F9: Fail (9 points)
            
            *Lower points are better. Aggregate is the sum of your best subjects.*
            """)
            
            submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
            output = gr.Textbox(label="Recommendations", lines=20)
    
    submit_btn.click(
        fn=predict_career,
        inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                elective_maths, physics, biology, chemistry],
        outputs=output
    )

# Launch the app
if __name__ == "__main__":
    try:
        demo.launch()
    except Exception as e:
        print(f"Error launching app: {e}")
        # Try alternative launch method
        import sys
        print("Trying alternative launch method...")
        if 'google.colab' in sys.modules:
            demo.launch(share=True)
        else:
            demo.launch(server_name="0.0.0.0", server_port=7860)