Spaces:

msgasu
/

career-recommender

Sleeping

App Files Files Community

career-recommender / app.py

msgasu

Update app.py

2c20c04 verified 9 months ago

raw

history blame contribute delete

12.9 kB

	import joblib
	import pandas as pd
	import numpy as np
	import gradio as gr
	from sklearn.preprocessing import OneHotEncoder, StandardScaler

	# Load your trained model
	try:
	model = joblib.load('trained_model.joblib')
	except Exception as e:
	print(f"Error loading model: {e}")
	print("Creating a placeholder model for interface testing")
	# Create a simple placeholder model for testing the interface
	from sklearn.ensemble import RandomForestClassifier
	model = RandomForestClassifier()
	model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]

	# Add dummy predict and predict_proba methods if needed
	def dummy_predict(X):
	return np.array(["BSc Computer Science"])

	def dummy_predict_proba(X):
	return np.array([[0.7, 0.1, 0.1, 0.1]])

	# Only add these methods if they don't exist
	if not hasattr(model, 'predict'):
	model.predict = dummy_predict

	if not hasattr(model, 'predict_proba'):
	model.predict_proba = dummy_predict_proba

	# Define all_traits dictionary (needed for preprocessing)
	all_traits = {
	'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
	'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
	'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills',
	'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
	}

	# Function to convert grades to numerical values
	def grade_to_numeric(grade):
	if pd.isna(grade) or grade == "":
	return np.nan

	grade_map = {
	"A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
	"D7": 7, "E8": 8, "F9": 9
	}
	return grade_map.get(grade, np.nan)

	# Function to extract interests and strengths into separate columns
	def extract_traits(df, column_name, prefix, all_traits=None):
	"""
	Extracts traits from a column, creating binary columns for each trait.
	"""
	# Split the comma-separated values
	trait_series = df[column_name].str.split(',', expand=True)

	# Infer all_traits if not provided
	if all_traits is None:
	all_traits = set()
	for col in trait_series.columns:
	all_traits.update(trait_series[col].dropna().unique())

	# Create binary columns for each trait
	for trait in all_traits:
	col_name = f"{prefix}_{trait.strip()}"
	df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)

	return df

	def preprocess_data(df, all_traits=None):
	"""
	Preprocesses the student data.
	"""
	# Create a copy to avoid modifying the original
	processed_df = df.copy()

	if all_traits is None:
	all_traits = {
	'Interests': set(),
	'Strengths': set()
	}
	for _, row in processed_df.iterrows():
	all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
	all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])

	processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
	processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))

	return processed_df

	# Create features and target variable
	def prepare_model_data(processed_df):
	# Identify categorical and numerical features
	categorical_features = ["Desired_Career"]
	numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
	"Physics", "Biology", "Elective Maths", "Chemistry",
	"Economics", "E-ICT", "Literature", "Geography",
	"Business Management", "Visual Arts", "Government"]

	# Add the binary interest and strength columns
	interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]

	# Convert grade columns to numerical values
	for col in numerical_features:
	if col in processed_df.columns: # Check if column exists
	processed_df[col] = processed_df[col].apply(grade_to_numeric)
	else:
	# Add missing columns with NaN values
	processed_df[col] = np.nan

	# Get features (using processed_df with numerical grades)
	X = processed_df[categorical_features + numerical_features + interest_strength_cols]

	return X

	def get_course_recommendation(student_info):
	"""
	Get course recommendations for a student based on their information.
	"""
	try:
	# Convert student data to DataFrame
	student_df = pd.DataFrame([student_info])

	# Preprocess student data
	processed_student = preprocess_data(student_df, all_traits)

	# Extract features
	student_features = prepare_model_data(processed_student)

	# Make prediction
	recommended_course = model.predict(student_features)[0]
	probabilities = model.predict_proba(student_features)[0]

	# Get top 3 recommendations with probabilities
	class_indices = np.argsort(probabilities)[::-1][:3]
	classes = model.classes_
	top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]

	# Format the output
	result = "Top Course Recommendations:\n\n"
	for i, (course, prob) in enumerate(top_recommendations, 1):
	result += f"{i}. {course} (Confidence: {prob})\n\n"

	return result
	except Exception as e:
	return f"Error generating recommendations: {str(e)}"

	def explain_recommendation(student_info, top_recommendation):
	"""
	Provide an explanation for why a particular course was recommended.
	"""
	try:
	course = top_recommendation[0]

	explanation = f"The course '{course}' was recommended based on:\n"

	# Career alignment
	explanation += f"- Your career interest in {student_info['Desired_Career']}\n"

	# Interests match
	explanation += f"- Your interests in {student_info['Interests']}\n"
	explanation += f"- Your strengths in {student_info['Strengths']}\n"

	# Aggregate score context
	if student_info['Aggregate'] < 15:
	explanation += "- Your excellent aggregate score\n"
	elif student_info['Aggregate'] < 20:
	explanation += "- Your good aggregate score\n"

	return explanation
	except Exception as e:
	return f"Error generating explanation: {str(e)}"

	def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
	elective_maths, physics, biology, chemistry):

	try:
	# Create student data dictionary with all required fields
	student_info = {
	"StudentID": "STU_TEMP",
	"Desired_Career": desired_career,
	"Recommended_Course": "", # Will be predicted
	"Aggregate": aggregate,
	"Interests": interests,
	"Strengths": strengths,
	"English": english,
	"Core Maths": core_maths,
	"Science": science,
	"Social Studies": social_studies,
	"Elective Maths": elective_maths,
	"Physics": physics,
	"Biology": biology,
	"Chemistry": chemistry,
	# Add empty values for other subjects that were in the training data
	"Economics": "",
	"E-ICT": "",
	"Literature": "",
	"Geography": "",
	"Business Management": "",
	"Visual Arts": "",
	"Government": ""
	}

	# Get recommendations
	recommendations = get_course_recommendation(student_info)

	# Get top recommendation for explanation
	student_df = pd.DataFrame([student_info])
	processed_student = preprocess_data(student_df, all_traits)
	student_features = prepare_model_data(processed_student)
	probabilities = model.predict_proba(student_features)[0]
	class_indices = np.argsort(probabilities)[::-1][:1]
	classes = model.classes_
	top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]

	# Get explanation
	explanation = explain_recommendation(student_info, top_recommendation)

	return recommendations + "\n" + explanation
	except Exception as e:
	import traceback
	error_details = traceback.format_exc()
	return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"

	# Define grade options - fixing format to work with Gradio
	grade_options = [
	"", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
	]

	# Create Gradio interface
	with gr.Blocks(title="Career Course Recommendation System") as demo:
	gr.Markdown("# Career Course Recommendation System")
	gr.Markdown("Enter student information to get course recommendations")

	with gr.Row():
	with gr.Column(scale=2):
	gr.Markdown("### Student Information")
	desired_career = gr.Textbox(
	label="Desired Career",
	placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
	info="Enter your desired career path"
	)
	aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
	interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
	strengths = gr.Textbox(
	label="Strengths (comma separated)",
	placeholder="Communication,Creativity",
	info="List your strengths or skills separated by commas",
	value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
	)

	gr.Markdown("### Core Subjects (Required)")
	with gr.Row():
	with gr.Column(scale=1):
	english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
	core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")

	with gr.Column(scale=1):
	science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
	social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")

	gr.Markdown("### Elective Subjects")
	with gr.Row():
	with gr.Column(scale=1):
	elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
	physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")

	with gr.Column(scale=1):
	chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
	biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")

	with gr.Column(scale=1):
	gr.Markdown("### Grade Scale Reference")
	gr.Markdown("""
	- A1: Excellent (1 point)
	- B2: Very Good (2 points)
	- B3: Good (3 points)
	- C4: Credit (4 points)
	- C5: Credit (5 points)
	- C6: Credit (6 points)
	- D7: Pass (7 points)
	- E8: Pass (8 points)
	- F9: Fail (9 points)

	Lower points are better. Aggregate is the sum of your best subjects.
	""")

	submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
	output = gr.Textbox(label="Recommendations", lines=20)

	submit_btn.click(
	fn=predict_career,
	inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
	elective_maths, physics, biology, chemistry],
	outputs=output
	)

	# Launch the app
	if __name__ == "__main__":
	try:
	demo.launch()
	except Exception as e:
	print(f"Error launching app: {e}")
	# Try alternative launch method
	import sys
	print("Trying alternative launch method...")
	if 'google.colab' in sys.modules:
	demo.launch(share=True)
	else:
	demo.launch(server_name="0.0.0.0", server_port=7860)