Spaces:

msgasu
/

career-recommender

Sleeping

App Files Files Community

msgasu commited on May 11, 2025

Commit

fa6e48f

verified ·

1 Parent(s): 34fb376

Update app.py

Browse files

Files changed (1) hide show

app.py +221 -158

app.py CHANGED Viewed

@@ -1,206 +1,269 @@
-import gradio as gr
 import joblib
 import pandas as pd
 import numpy as np
 # Load your trained model
 model = joblib.load('trained_model.joblib')
-# Define the four fixed electives
-FIXED_ELECTIVES = ["Elective Maths", "Biology", "Chemistry", "Physics"]
-# Define grade to numeric conversion
 def grade_to_numeric(grade):
     if pd.isna(grade) or grade == "":
         return np.nan
     grade_map = {
         "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
         "D7": 7, "E8": 8, "F9": 9
     }
     return grade_map.get(grade, np.nan)
-def create_student_data(desired_career, aggregate, english, core_maths, science,
-                       social_studies, interests, strengths, elective_grades):
     """
-    Creates a complete student data record with:
-    - The four fixed electives
-    - Properly formatted grades
-    - Processed interests and strengths
     """
-    # Start with core academic data
-    student_data = {
-        "StudentID": "STU_GRADIO",
-        "Desired_Career": desired_career,
-        "Aggregate": aggregate,
-        "English": english,
-        "Core Maths": core_maths,
-        "Science": science,
-        "Social Studies": social_studies,
-        "Interests": interests,
-        "Strengths": strengths,
-        "Electives": ", ".join(FIXED_ELECTIVES)  # Fixed electives list
-    }
-    # Add all four elective grades
-    for elective in FIXED_ELECTIVES:
-        student_data[elective] = elective_grades.get(elective, "")
-    # Convert to DataFrame
-    df = pd.DataFrame([student_data])
-    # Convert all grades to numerical
-    grade_cols = ['English', 'Core Maths', 'Science', 'Social Studies'] + FIXED_ELECTIVES
-    for col in grade_cols:
-        df[col] = df[col].apply(grade_to_numeric)
-    # Process interests and strengths into binary features
-    for interest in [i.strip() for i in interests.split(',')]:
-        df[f"interest_{interest}"] = 1
-    for strength in [s.strip() for s in strengths.split(',')]:
-        df[f"strength_{strength}"] = 1
-    # Fill missing binary features with 0
-    for col in df.columns:
-        if col.startswith(('interest_', 'strength_')):
-            df[col] = df[col].fillna(0)
-    return df
-def get_recommendations(student_df):
-    """Get course recommendations from the model"""
-    try:
-        probabilities = model.predict_proba(student_df)[0]
-        classes = model.classes_
-        top5_idx = np.argsort(probabilities)[::-1][:5]
-        return [(classes[i], float(probabilities[i])) for i in top5_idx]
-    except Exception as e:
-        return [(f"Error: {str(e)}", 0.0)]
-def explain_recommendation(student_df, top_course):
-    """Generate explanation for the top recommended course"""
-    explanation = f"## Recommended Course: {top_course[0]}\n\n"
-    explanation += "### Basis for Recommendation:\n"
     # Career alignment
-    explanation += f"- Career Interest: {student_df['Desired_Career'].iloc[0]}\n"
     # Academic strengths
-    strong_subjects = []
-    for subject in ['Core Maths', 'English', 'Science'] + FIXED_ELECTIVES:
-        if subject in student_df.columns and student_df[subject].iloc[0] <= 3:  # Good grades (A1-B3)
-            strong_subjects.append(subject)
-    if strong_subjects:
-        explanation += f"- Strong Performance in: {', '.join(strong_subjects)}\n"
-    # Interests and strengths
-    explanation += f"- Key Interests: {student_df['Interests'].iloc[0]}\n"
-    explanation += f"- Core Strengths: {student_df['Strengths'].iloc[0]}\n"
-    # Aggregate score
-    agg = student_df['Aggregate'].iloc[0]
-    if agg < 10:
-        explanation += f"- Exceptional Academic Performance (Aggregate: {agg})\n"
-    elif agg < 15:
-        explanation += f"- Strong Academic Performance (Aggregate: {agg})\n"
-    else:
-        explanation += f"- Good Academic Foundation (Aggregate: {agg})\n"
     return explanation
-def predict_and_explain(desired_career, aggregate, english, core_maths, science,
-                       social_studies, interests, strengths,
-                       elective_maths, chemistry, physics, biology):
-    # Prepare elective grades dictionary
-    elective_grades = {
         "Elective Maths": elective_maths,
-        "Chemistry": chemistry,
         "Physics": physics,
-        "Biology": biology
     }
-    # Create complete student data
-    student_df = create_student_data(
-        desired_career=desired_career,
-        aggregate=aggregate,
-        english=english,
-        core_maths=core_maths,
-        science=science,
-        social_studies=social_studies,
-        interests=interests,
-        strengths=strengths,
-        elective_grades=elective_grades
-    )
     # Get recommendations
-    recommendations = get_recommendations(student_df)
-    # Prepare output
-    output = "## Top 5 Course Recommendations\n\n"
-    for course, prob in recommendations:
-        output += f"- {course} ({prob*100:.1f}% match)\n"
-    # Add explanation for top recommendation if available
-    if recommendations and recommendations[0][1] > 0:
-        output += "\n" + explain_recommendation(student_df, recommendations[0])
-    return output
-# Gradio Interface
-with gr.Blocks(title="Career Advisor") as interface:
-    gr.Markdown("# University Course Recommendation System")
     with gr.Row():
-        with gr.Column():
             desired_career = gr.Dropdown(
-                ["Medicine", "Engineering", "Computer Science", "Business",
-                 "Law", "Agriculture", "Journalism", "Education"],
-                label="Desired Career"
             )
-            aggregate = gr.Number(label="Aggregate Score", minimum=6, maximum=36)
-            interests = gr.Textbox(label="Your Interests (comma separated)")
-            strengths = gr.Textbox(label="Your Strengths (comma separated)")
-        with gr.Column():
-            english = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"], label="English")
-            core_maths = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"], label="Core Maths")
-            science = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"], label="Science")
-            social_studies = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"], label="Social Studies")
-    # Electives section with the four fixed options
-    gr.Markdown("## Elective Subjects")
-    with gr.Row():
-        elective_maths = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9", ""], label="Elective Maths")
-        chemistry = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9", ""], label="Chemistry")
-        physics = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9", ""], label="Physics")
-        biology = gr.Dropdown(["A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9", ""], label="Biology")
-    submit = gr.Button("Get Recommendations")
-    output = gr.Markdown()
-    submit.click(
-        fn=predict_and_explain,
-        inputs=[
-            desired_career, aggregate, english, core_maths, science,
-            social_studies, interests, strengths,
-            elective_maths, chemistry, physics, biology
-        ],
         outputs=output
     )
-    # Medical school example matching your Colab test case
-    gr.Examples(
-        examples=[[
-            "Medicine", 6, "A1", "A1", "A1", "A1",
-            "research, medicine, helping people",
-            "analytical thinking, attention to detail",
-            "A1", "A1", "A1", "A1"  # Elective grades in order: Maths, Chem, Physics, Bio
-        ]],
-        inputs=[
-            desired_career, aggregate, english, core_maths, science,
-            social_studies, interests, strengths,
-            elective_maths, chemistry, physics, biology
-        ]
-    )
-interface.launch()

 import joblib
 import pandas as pd
 import numpy as np
+import gradio as gr
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 # Load your trained model
 model = joblib.load('trained_model.joblib')
+# Define all_traits dictionary (needed for preprocessing)
+all_traits = {
+    'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing']),
+    'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills', 'Analytical Thinking'])
+}
+# Function to convert grades to numerical values
 def grade_to_numeric(grade):
     if pd.isna(grade) or grade == "":
         return np.nan
     grade_map = {
         "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
         "D7": 7, "E8": 8, "F9": 9
     }
     return grade_map.get(grade, np.nan)
+# Function to extract interests and strengths into separate columns
+def extract_traits(df, column_name, prefix, all_traits=None):
     """
+    Extracts traits from a column, creating binary columns for each trait.
     """
+    # Split the comma-separated values
+    trait_series = df[column_name].str.split(',', expand=True)
+    # Infer all_traits if not provided
+    if all_traits is None:
+        all_traits = set()
+        for col in trait_series.columns:
+            all_traits.update(trait_series[col].dropna().unique())
+    # Create binary columns for each trait
+    for trait in all_traits:
+        col_name = f"{prefix}_{trait.strip()}"
+        df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)
+    return df
+def preprocess_data(df, all_traits=None):
+    """
+    Preprocesses the student data.
+    """
+    # Create a copy to avoid modifying the original
+    processed_df = df.copy()
+    if all_traits is None:
+        all_traits = {
+            'Interests': set(),
+            'Strengths': set()
+        }
+        for _, row in processed_df.iterrows():
+            all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
+            all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])
+    processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
+    processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))
+    return processed_df
+# Create features and target variable
+def prepare_model_data(processed_df):
+    # Identify categorical and numerical features
+    categorical_features = ["Desired_Career"]
+    numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
+                          "Physics", "Biology", "Elective Maths", "Chemistry"]
+    # Add the binary interest and strength columns
+    interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]
+    # Convert grade columns to numerical values
+    for col in numerical_features:
+        if col in processed_df.columns:  # Check if column exists
+            processed_df[col] = processed_df[col].apply(grade_to_numeric)
+    # Get features (using processed_df with numerical grades)
+    X = processed_df[categorical_features + numerical_features + interest_strength_cols]
+    return X
+def get_course_recommendation(student_info):
+    """
+    Get course recommendations for a student based on their information.
+    """
+    # Convert student data to DataFrame
+    student_df = pd.DataFrame([student_info])
+    # Preprocess student data
+    processed_student = preprocess_data(student_df, all_traits)
+    # Extract features
+    student_features = prepare_model_data(processed_student)
+    # Make prediction
+    recommended_course = model.predict(student_features)[0]
+    probabilities = model.predict_proba(student_features)[0]
+    # Get top 3 recommendations with probabilities
+    class_indices = np.argsort(probabilities)[::-1][:3]
+    classes = model.classes_
+    top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
+    # Format the output
+    result = "Top Course Recommendations:\n\n"
+    for i, (course, prob) in enumerate(top_recommendations, 1):
+        result += f"{i}. {course} (Confidence: {prob})\n\n"
+    return result
+def explain_recommendation(student_info, top_recommendation):
+    """
+    Provide an explanation for why a particular course was recommended.
+    """
+    course = top_recommendation[0]
+    explanation = f"The course '{course}' was recommended based on:\n"
     # Career alignment
+    explanation += f"- Your career interest in {student_info['Desired_Career']}\n"
     # Academic strengths
+    subjects = []
+    if grade_to_numeric(student_info.get('Core Maths', '')) <= 3:
+        subjects.append("Mathematics")
+    if grade_to_numeric(student_info.get('English', '')) <= 3:
+        subjects.append("English")
+    if grade_to_numeric(student_info.get('Science', '')) <= 3:
+        subjects.append("Science")
+    if subjects:
+        explanation += f"- Your strong performance in {', '.join(subjects)}\n"
+    # Interests and strengths match
+    explanation += f"- Your interests in {student_info['Interests']}\n"
+    explanation += f"- Your strengths in {student_info['Strengths']}\n"
+    # Aggregate score context
+    if student_info['Aggregate'] < 15:
+        explanation += "- Your excellent aggregate score\n"
+    elif student_info['Aggregate'] < 20:
+        explanation += "- Your good aggregate score\n"
     return explanation
+def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
+                  elective_maths, physics, biology, chemistry):
+    # Create student data dictionary
+    student_info = {
+        "StudentID": "STU_TEMP",
+        "Desired_Career": desired_career,
+        "Recommended_Course": "",  # Will be predicted
+        "Aggregate": aggregate,
+        "Interests": interests,
+        "Strengths": strengths,
+        "English": english,
+        "Core Maths": core_maths,
+        "Science": science,
+        "Social Studies": social_studies,
         "Elective Maths": elective_maths,
         "Physics": physics,
+        "Biology": biology,
+        "Chemistry": chemistry
     }
     # Get recommendations
+    recommendations = get_course_recommendation(student_info)
+    # Get top recommendation for explanation
+    student_df = pd.DataFrame([student_info])
+    processed_student = preprocess_data(student_df, all_traits)
+    student_features = prepare_model_data(processed_student)
+    probabilities = model.predict_proba(student_features)[0]
+    class_indices = np.argsort(probabilities)[::-1][:1]
+    classes = model.classes_
+    top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
+    # Get explanation
+    explanation = explain_recommendation(student_info, top_recommendation)
+    return recommendations + "\n" + explanation
+# Define grade options with descriptions
+grade_options = [
+    ("", "Select Grade"),
+    ("A1", "A1 - Excellent (1)"),
+    ("B2", "B2 - Very Good (2)"),
+    ("B3", "B3 - Good (3)"),
+    ("C4", "C4 - Credit (4)"),
+    ("C5", "C5 - Credit (5)"),
+    ("C6", "C6 - Credit (6)"),
+    ("D7", "D7 - Pass (7)"),
+    ("E8", "E8 - Pass (8)"),
+    ("F9", "F9 - Fail (9)")
+]
+# Create Gradio interface
+with gr.Blocks(title="Career Course Recommendation System") as demo:
+    gr.Markdown("# Career Course Recommendation System")
+    gr.Markdown("Enter student information to get course recommendations")
     with gr.Row():
+        with gr.Column(scale=2):
+            gr.Markdown("### Student Information")
             desired_career = gr.Dropdown(
+                choices=["Medicine", "Pharmacy", "Law", "Computer Science", "Engineering", "Business", "Nursing", "Agriculture", "Journalism", "Education"],
+                label="Desired Career",
+                info="Select your desired career path"
             )
+            aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
+            interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
+            strengths = gr.Textbox(label="Strengths (comma separated)", placeholder="Communication,Creativity", info="List your strengths separated by commas")
+            gr.Markdown("### Core Subjects (Required)")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
+                    core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
+                with gr.Column(scale=1):
+                    science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
+                    social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
+            gr.Markdown("### Elective Subjects")
+            with gr.Row():
+                with gr.Column(scale=1):
+                    elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
+                    physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
+                with gr.Column(scale=1):
+                    chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
+                    biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
+        with gr.Column(scale=1):
+            gr.Markdown("### Grade Scale Reference")
+            gr.Markdown("""
+            - A1: Excellent (1 point)
+            - B2: Very Good (2 points)
+            - B3: Good (3 points)
+            - C4: Credit (4 points)
+            - C5: Credit (5 points)
+            - C6: Credit (6 points)
+            - D7: Pass (7 points)
+            - E8: Pass (8 points)
+            - F9: Fail (9 points)
+            *Lower points are better. Aggregate is the sum of your best subjects.*
+            """)
+            submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
+            output = gr.Textbox(label="Recommendations", lines=20)
+    submit_btn.click(
+        fn=predict_career,
+        inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
+                elective_maths, physics, biology, chemistry],
         outputs=output
     )
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()