Spaces:

msgasu
/

career-recommender

Sleeping

File size: 12,937 Bytes

8416eb2
 
 
fa6e48f
 
8416eb2
f3c7cc9
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8416eb2
fa6e48f
 
053cdda
 
 
 
fa6e48f
6cbbb17
fa6e48f
8416eb2
f3c7cc9
8416eb2
fa6e48f
8416eb2
 
 
 
 
 
fa6e48f
 
34fb376
fa6e48f
34fb376
fa6e48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
053cdda
 
 
fa6e48f
 
 
 
 
 
 
 
053cdda
 
 
fa6e48f
 
 
 
 
 
 
 
 
 
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa6e48f
 
 
 
 
dc945d2
 
fa6e48f
dc945d2
6cbbb17
dc945d2
 
fa6e48f
331e788
dc945d2
 
fa6e48f
dc945d2
 
 
 
 
fa6e48f
dc945d2
 
 
34fb376
331e788
fa6e48f
 
dc945d2
053cdda
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
053cdda
 
 
 
 
 
 
 
 
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331e788
dc945d2
053cdda
 
 
fa6e48f
dc945d2
fa6e48f
dc945d2
fa6e48f
8416eb2
fa6e48f
 
 
 
8416eb2
5acb9a8
fa6e48f
 
dfbc1a2
fa6e48f
dfbc1a2
 
5acb9a8
331e788
fa6e48f
331e788
 
 
2c20c04
331e788
 
5acb9a8
fa6e48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331e788
fa6e48f
 
 
 
601a5f7
fa6e48f
 
331e788
fa6e48f
5acb9a8
601a5f7
8416eb2
fa6e48f
 
dc945d2

import joblib
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Load your trained model
try:
    model = joblib.load('trained_model.joblib')
except Exception as e:
    print(f"Error loading model: {e}")
    print("Creating a placeholder model for interface testing")
    # Create a simple placeholder model for testing the interface
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier()
    model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]
    
    # Add dummy predict and predict_proba methods if needed
    def dummy_predict(X):
        return np.array(["BSc Computer Science"])
    
    def dummy_predict_proba(X):
        return np.array([[0.7, 0.1, 0.1, 0.1]])
    
    # Only add these methods if they don't exist
    if not hasattr(model, 'predict'):
        model.predict = dummy_predict
    
    if not hasattr(model, 'predict_proba'):
        model.predict_proba = dummy_predict_proba

# Define all_traits dictionary (needed for preprocessing)
all_traits = {
    'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
                     'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
    'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills', 
                     'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
}

# Function to convert grades to numerical values
def grade_to_numeric(grade):
    if pd.isna(grade) or grade == "":
        return np.nan

    grade_map = {
        "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
        "D7": 7, "E8": 8, "F9": 9
    }
    return grade_map.get(grade, np.nan)

# Function to extract interests and strengths into separate columns
def extract_traits(df, column_name, prefix, all_traits=None):
    """
    Extracts traits from a column, creating binary columns for each trait.
    """
    # Split the comma-separated values
    trait_series = df[column_name].str.split(',', expand=True)

    # Infer all_traits if not provided
    if all_traits is None:
        all_traits = set()
        for col in trait_series.columns:
            all_traits.update(trait_series[col].dropna().unique())

    # Create binary columns for each trait
    for trait in all_traits:
        col_name = f"{prefix}_{trait.strip()}"
        df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)

    return df

def preprocess_data(df, all_traits=None):
    """
    Preprocesses the student data.
    """
    # Create a copy to avoid modifying the original
    processed_df = df.copy()

    if all_traits is None:
        all_traits = {
            'Interests': set(),
            'Strengths': set()
        }
        for _, row in processed_df.iterrows():
            all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
            all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])

    processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
    processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))

    return processed_df

# Create features and target variable
def prepare_model_data(processed_df):
    # Identify categorical and numerical features
    categorical_features = ["Desired_Career"]
    numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
                          "Physics", "Biology", "Elective Maths", "Chemistry",
                          "Economics", "E-ICT", "Literature", "Geography", 
                          "Business Management", "Visual Arts", "Government"]

    # Add the binary interest and strength columns
    interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]

    # Convert grade columns to numerical values
    for col in numerical_features:
        if col in processed_df.columns:  # Check if column exists
            processed_df[col] = processed_df[col].apply(grade_to_numeric)
        else:
            # Add missing columns with NaN values
            processed_df[col] = np.nan

    # Get features (using processed_df with numerical grades)
    X = processed_df[categorical_features + numerical_features + interest_strength_cols]

    return X

def get_course_recommendation(student_info):
    """
    Get course recommendations for a student based on their information.
    """
    try:
        # Convert student data to DataFrame
        student_df = pd.DataFrame([student_info])
        
        # Preprocess student data
        processed_student = preprocess_data(student_df, all_traits)
        
        # Extract features
        student_features = prepare_model_data(processed_student)
        
        # Make prediction
        recommended_course = model.predict(student_features)[0]
        probabilities = model.predict_proba(student_features)[0]
        
        # Get top 3 recommendations with probabilities
        class_indices = np.argsort(probabilities)[::-1][:3]
        classes = model.classes_
        top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
        
        # Format the output
        result = "Top Course Recommendations:\n\n"
        for i, (course, prob) in enumerate(top_recommendations, 1):
            result += f"{i}. {course} (Confidence: {prob})\n\n"
        
        return result
    except Exception as e:
        return f"Error generating recommendations: {str(e)}"

def explain_recommendation(student_info, top_recommendation):
    """
    Provide an explanation for why a particular course was recommended.
    """
    try:
        course = top_recommendation[0]

        explanation = f"The course '{course}' was recommended based on:\n"

        # Career alignment
        explanation += f"- Your career interest in {student_info['Desired_Career']}\n"

        # Interests match
        explanation += f"- Your interests in {student_info['Interests']}\n"
        explanation += f"- Your strengths in {student_info['Strengths']}\n"

        # Aggregate score context
        if student_info['Aggregate'] < 15:
            explanation += "- Your excellent aggregate score\n"
        elif student_info['Aggregate'] < 20:
            explanation += "- Your good aggregate score\n"

        return explanation
    except Exception as e:
        return f"Error generating explanation: {str(e)}"

def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                  elective_maths, physics, biology, chemistry):
    
    try:
        # Create student data dictionary with all required fields
        student_info = {
            "StudentID": "STU_TEMP",
            "Desired_Career": desired_career,
            "Recommended_Course": "",  # Will be predicted
            "Aggregate": aggregate,
            "Interests": interests,
            "Strengths": strengths,
            "English": english,
            "Core Maths": core_maths,
            "Science": science,
            "Social Studies": social_studies,
            "Elective Maths": elective_maths,
            "Physics": physics,
            "Biology": biology,
            "Chemistry": chemistry,
            # Add empty values for other subjects that were in the training data
            "Economics": "",
            "E-ICT": "",
            "Literature": "",
            "Geography": "",
            "Business Management": "",
            "Visual Arts": "",
            "Government": ""
        }
        
        # Get recommendations
        recommendations = get_course_recommendation(student_info)
        
        # Get top recommendation for explanation
        student_df = pd.DataFrame([student_info])
        processed_student = preprocess_data(student_df, all_traits)
        student_features = prepare_model_data(processed_student)
        probabilities = model.predict_proba(student_features)[0]
        class_indices = np.argsort(probabilities)[::-1][:1]
        classes = model.classes_
        top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
        
        # Get explanation
        explanation = explain_recommendation(student_info, top_recommendation)
        
        return recommendations + "\n" + explanation
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"

# Define grade options - fixing format to work with Gradio
grade_options = [
    "", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
]

# Create Gradio interface
with gr.Blocks(title="Career Course Recommendation System") as demo:
    gr.Markdown("# Career Course Recommendation System")
    gr.Markdown("Enter student information to get course recommendations")
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### Student Information")
            desired_career = gr.Textbox(
                label="Desired Career",
                placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
                info="Enter your desired career path"
            )
            aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
            interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
            strengths = gr.Textbox(
                label="Strengths (comma separated)", 
                placeholder="Communication,Creativity", 
                info="List your strengths or skills separated by commas",
                value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
            )
            
            gr.Markdown("### Core Subjects (Required)")
            with gr.Row():
                with gr.Column(scale=1):
                    english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
                    core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
                
                with gr.Column(scale=1):
                    science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
                    social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
            
            gr.Markdown("### Elective Subjects")
            with gr.Row():
                with gr.Column(scale=1):
                    elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
                    physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
                
                with gr.Column(scale=1):
                    chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
                    biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
        
        with gr.Column(scale=1):
            gr.Markdown("### Grade Scale Reference")
            gr.Markdown("""
            - A1: Excellent (1 point)
            - B2: Very Good (2 points)
            - B3: Good (3 points)
            - C4: Credit (4 points)
            - C5: Credit (5 points)
            - C6: Credit (6 points)
            - D7: Pass (7 points)
            - E8: Pass (8 points)
            - F9: Fail (9 points)
            
            *Lower points are better. Aggregate is the sum of your best subjects.*
            """)
            
            submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
            output = gr.Textbox(label="Recommendations", lines=20)
    
    submit_btn.click(
        fn=predict_career,
        inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                elective_maths, physics, biology, chemistry],
        outputs=output
    )

# Launch the app
if __name__ == "__main__":
    try:
        demo.launch()
    except Exception as e:
        print(f"Error launching app: {e}")
        # Try alternative launch method
        import sys
        print("Trying alternative launch method...")
        if 'google.colab' in sys.modules:
            demo.launch(share=True)
        else:
            demo.launch(server_name="0.0.0.0", server_port=7860)