import joblib import pandas as pd import numpy as np import gradio as gr from sklearn.preprocessing import OneHotEncoder, StandardScaler # Load your trained model try: model = joblib.load('trained_model.joblib') except Exception as e: print(f"Error loading model: {e}") print("Creating a placeholder model for interface testing") # Create a simple placeholder model for testing the interface from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier() model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"] # Add dummy predict and predict_proba methods if needed def dummy_predict(X): return np.array(["BSc Computer Science"]) def dummy_predict_proba(X): return np.array([[0.7, 0.1, 0.1, 0.1]]) # Only add these methods if they don't exist if not hasattr(model, 'predict'): model.predict = dummy_predict if not hasattr(model, 'predict_proba'): model.predict_proba = dummy_predict_proba # Define all_traits dictionary (needed for preprocessing) all_traits = { 'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing', 'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']), 'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills', 'Analytical Thinking', 'Leadership', 'Detail-Oriented']) } # Function to convert grades to numerical values def grade_to_numeric(grade): if pd.isna(grade) or grade == "": return np.nan grade_map = { "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6, "D7": 7, "E8": 8, "F9": 9 } return grade_map.get(grade, np.nan) # Function to extract interests and strengths into separate columns def extract_traits(df, column_name, prefix, all_traits=None): """ Extracts traits from a column, creating binary columns for each trait. """ # Split the comma-separated values trait_series = df[column_name].str.split(',', expand=True) # Infer all_traits if not provided if all_traits is None: all_traits = set() for col in trait_series.columns: all_traits.update(trait_series[col].dropna().unique()) # Create binary columns for each trait for trait in all_traits: col_name = f"{prefix}_{trait.strip()}" df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int) return df def preprocess_data(df, all_traits=None): """ Preprocesses the student data. """ # Create a copy to avoid modifying the original processed_df = df.copy() if all_traits is None: all_traits = { 'Interests': set(), 'Strengths': set() } for _, row in processed_df.iterrows(): all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else []) all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else []) processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests')) processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths')) return processed_df # Create features and target variable def prepare_model_data(processed_df): # Identify categorical and numerical features categorical_features = ["Desired_Career"] numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies", "Physics", "Biology", "Elective Maths", "Chemistry", "Economics", "E-ICT", "Literature", "Geography", "Business Management", "Visual Arts", "Government"] # Add the binary interest and strength columns interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')] # Convert grade columns to numerical values for col in numerical_features: if col in processed_df.columns: # Check if column exists processed_df[col] = processed_df[col].apply(grade_to_numeric) else: # Add missing columns with NaN values processed_df[col] = np.nan # Get features (using processed_df with numerical grades) X = processed_df[categorical_features + numerical_features + interest_strength_cols] return X def get_course_recommendation(student_info): """ Get course recommendations for a student based on their information. """ try: # Convert student data to DataFrame student_df = pd.DataFrame([student_info]) # Preprocess student data processed_student = preprocess_data(student_df, all_traits) # Extract features student_features = prepare_model_data(processed_student) # Make prediction recommended_course = model.predict(student_features)[0] probabilities = model.predict_proba(student_features)[0] # Get top 3 recommendations with probabilities class_indices = np.argsort(probabilities)[::-1][:3] classes = model.classes_ top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices] # Format the output result = "Top Course Recommendations:\n\n" for i, (course, prob) in enumerate(top_recommendations, 1): result += f"{i}. {course} (Confidence: {prob})\n\n" return result except Exception as e: return f"Error generating recommendations: {str(e)}" def explain_recommendation(student_info, top_recommendation): """ Provide an explanation for why a particular course was recommended. """ try: course = top_recommendation[0] explanation = f"The course '{course}' was recommended based on:\n" # Career alignment explanation += f"- Your career interest in {student_info['Desired_Career']}\n" # Interests match explanation += f"- Your interests in {student_info['Interests']}\n" explanation += f"- Your strengths in {student_info['Strengths']}\n" # Aggregate score context if student_info['Aggregate'] < 15: explanation += "- Your excellent aggregate score\n" elif student_info['Aggregate'] < 20: explanation += "- Your good aggregate score\n" return explanation except Exception as e: return f"Error generating explanation: {str(e)}" def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, elective_maths, physics, biology, chemistry): try: # Create student data dictionary with all required fields student_info = { "StudentID": "STU_TEMP", "Desired_Career": desired_career, "Recommended_Course": "", # Will be predicted "Aggregate": aggregate, "Interests": interests, "Strengths": strengths, "English": english, "Core Maths": core_maths, "Science": science, "Social Studies": social_studies, "Elective Maths": elective_maths, "Physics": physics, "Biology": biology, "Chemistry": chemistry, # Add empty values for other subjects that were in the training data "Economics": "", "E-ICT": "", "Literature": "", "Geography": "", "Business Management": "", "Visual Arts": "", "Government": "" } # Get recommendations recommendations = get_course_recommendation(student_info) # Get top recommendation for explanation student_df = pd.DataFrame([student_info]) processed_student = preprocess_data(student_df, all_traits) student_features = prepare_model_data(processed_student) probabilities = model.predict_proba(student_features)[0] class_indices = np.argsort(probabilities)[::-1][:1] classes = model.classes_ top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0] # Get explanation explanation = explain_recommendation(student_info, top_recommendation) return recommendations + "\n" + explanation except Exception as e: import traceback error_details = traceback.format_exc() return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}" # Define grade options - fixing format to work with Gradio grade_options = [ "", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9" ] # Create Gradio interface with gr.Blocks(title="Career Course Recommendation System") as demo: gr.Markdown("# Career Course Recommendation System") gr.Markdown("Enter student information to get course recommendations") with gr.Row(): with gr.Column(scale=2): gr.Markdown("### Student Information") desired_career = gr.Textbox( label="Desired Career", placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)", info="Enter your desired career path" ) aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)") interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas") strengths = gr.Textbox( label="Strengths (comma separated)", placeholder="Communication,Creativity", info="List your strengths or skills separated by commas", value="Communication,Creativity,Logical Reasoning,Analytical Thinking" ) gr.Markdown("### Core Subjects (Required)") with gr.Row(): with gr.Column(scale=1): english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English") core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics") with gr.Column(scale=1): science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science") social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies") gr.Markdown("### Elective Subjects") with gr.Row(): with gr.Column(scale=1): elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics") physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics") with gr.Column(scale=1): chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry") biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology") with gr.Column(scale=1): gr.Markdown("### Grade Scale Reference") gr.Markdown(""" - A1: Excellent (1 point) - B2: Very Good (2 points) - B3: Good (3 points) - C4: Credit (4 points) - C5: Credit (5 points) - C6: Credit (6 points) - D7: Pass (7 points) - E8: Pass (8 points) - F9: Fail (9 points) *Lower points are better. Aggregate is the sum of your best subjects.* """) submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg") output = gr.Textbox(label="Recommendations", lines=20) submit_btn.click( fn=predict_career, inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, elective_maths, physics, biology, chemistry], outputs=output ) # Launch the app if __name__ == "__main__": try: demo.launch() except Exception as e: print(f"Error launching app: {e}") # Try alternative launch method import sys print("Trying alternative launch method...") if 'google.colab' in sys.modules: demo.launch(share=True) else: demo.launch(server_name="0.0.0.0", server_port=7860)