msgasu's picture
Update app.py
2c20c04 verified
import joblib
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.preprocessing import OneHotEncoder, StandardScaler
# Load your trained model
try:
model = joblib.load('trained_model.joblib')
except Exception as e:
print(f"Error loading model: {e}")
print("Creating a placeholder model for interface testing")
# Create a simple placeholder model for testing the interface
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]
# Add dummy predict and predict_proba methods if needed
def dummy_predict(X):
return np.array(["BSc Computer Science"])
def dummy_predict_proba(X):
return np.array([[0.7, 0.1, 0.1, 0.1]])
# Only add these methods if they don't exist
if not hasattr(model, 'predict'):
model.predict = dummy_predict
if not hasattr(model, 'predict_proba'):
model.predict_proba = dummy_predict_proba
# Define all_traits dictionary (needed for preprocessing)
all_traits = {
'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills',
'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
}
# Function to convert grades to numerical values
def grade_to_numeric(grade):
if pd.isna(grade) or grade == "":
return np.nan
grade_map = {
"A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
"D7": 7, "E8": 8, "F9": 9
}
return grade_map.get(grade, np.nan)
# Function to extract interests and strengths into separate columns
def extract_traits(df, column_name, prefix, all_traits=None):
"""
Extracts traits from a column, creating binary columns for each trait.
"""
# Split the comma-separated values
trait_series = df[column_name].str.split(',', expand=True)
# Infer all_traits if not provided
if all_traits is None:
all_traits = set()
for col in trait_series.columns:
all_traits.update(trait_series[col].dropna().unique())
# Create binary columns for each trait
for trait in all_traits:
col_name = f"{prefix}_{trait.strip()}"
df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)
return df
def preprocess_data(df, all_traits=None):
"""
Preprocesses the student data.
"""
# Create a copy to avoid modifying the original
processed_df = df.copy()
if all_traits is None:
all_traits = {
'Interests': set(),
'Strengths': set()
}
for _, row in processed_df.iterrows():
all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])
processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))
return processed_df
# Create features and target variable
def prepare_model_data(processed_df):
# Identify categorical and numerical features
categorical_features = ["Desired_Career"]
numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
"Physics", "Biology", "Elective Maths", "Chemistry",
"Economics", "E-ICT", "Literature", "Geography",
"Business Management", "Visual Arts", "Government"]
# Add the binary interest and strength columns
interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]
# Convert grade columns to numerical values
for col in numerical_features:
if col in processed_df.columns: # Check if column exists
processed_df[col] = processed_df[col].apply(grade_to_numeric)
else:
# Add missing columns with NaN values
processed_df[col] = np.nan
# Get features (using processed_df with numerical grades)
X = processed_df[categorical_features + numerical_features + interest_strength_cols]
return X
def get_course_recommendation(student_info):
"""
Get course recommendations for a student based on their information.
"""
try:
# Convert student data to DataFrame
student_df = pd.DataFrame([student_info])
# Preprocess student data
processed_student = preprocess_data(student_df, all_traits)
# Extract features
student_features = prepare_model_data(processed_student)
# Make prediction
recommended_course = model.predict(student_features)[0]
probabilities = model.predict_proba(student_features)[0]
# Get top 3 recommendations with probabilities
class_indices = np.argsort(probabilities)[::-1][:3]
classes = model.classes_
top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
# Format the output
result = "Top Course Recommendations:\n\n"
for i, (course, prob) in enumerate(top_recommendations, 1):
result += f"{i}. {course} (Confidence: {prob})\n\n"
return result
except Exception as e:
return f"Error generating recommendations: {str(e)}"
def explain_recommendation(student_info, top_recommendation):
"""
Provide an explanation for why a particular course was recommended.
"""
try:
course = top_recommendation[0]
explanation = f"The course '{course}' was recommended based on:\n"
# Career alignment
explanation += f"- Your career interest in {student_info['Desired_Career']}\n"
# Interests match
explanation += f"- Your interests in {student_info['Interests']}\n"
explanation += f"- Your strengths in {student_info['Strengths']}\n"
# Aggregate score context
if student_info['Aggregate'] < 15:
explanation += "- Your excellent aggregate score\n"
elif student_info['Aggregate'] < 20:
explanation += "- Your good aggregate score\n"
return explanation
except Exception as e:
return f"Error generating explanation: {str(e)}"
def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
elective_maths, physics, biology, chemistry):
try:
# Create student data dictionary with all required fields
student_info = {
"StudentID": "STU_TEMP",
"Desired_Career": desired_career,
"Recommended_Course": "", # Will be predicted
"Aggregate": aggregate,
"Interests": interests,
"Strengths": strengths,
"English": english,
"Core Maths": core_maths,
"Science": science,
"Social Studies": social_studies,
"Elective Maths": elective_maths,
"Physics": physics,
"Biology": biology,
"Chemistry": chemistry,
# Add empty values for other subjects that were in the training data
"Economics": "",
"E-ICT": "",
"Literature": "",
"Geography": "",
"Business Management": "",
"Visual Arts": "",
"Government": ""
}
# Get recommendations
recommendations = get_course_recommendation(student_info)
# Get top recommendation for explanation
student_df = pd.DataFrame([student_info])
processed_student = preprocess_data(student_df, all_traits)
student_features = prepare_model_data(processed_student)
probabilities = model.predict_proba(student_features)[0]
class_indices = np.argsort(probabilities)[::-1][:1]
classes = model.classes_
top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
# Get explanation
explanation = explain_recommendation(student_info, top_recommendation)
return recommendations + "\n" + explanation
except Exception as e:
import traceback
error_details = traceback.format_exc()
return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"
# Define grade options - fixing format to work with Gradio
grade_options = [
"", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
]
# Create Gradio interface
with gr.Blocks(title="Career Course Recommendation System") as demo:
gr.Markdown("# Career Course Recommendation System")
gr.Markdown("Enter student information to get course recommendations")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Student Information")
desired_career = gr.Textbox(
label="Desired Career",
placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
info="Enter your desired career path"
)
aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
strengths = gr.Textbox(
label="Strengths (comma separated)",
placeholder="Communication,Creativity",
info="List your strengths or skills separated by commas",
value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
)
gr.Markdown("### Core Subjects (Required)")
with gr.Row():
with gr.Column(scale=1):
english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
with gr.Column(scale=1):
science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
gr.Markdown("### Elective Subjects")
with gr.Row():
with gr.Column(scale=1):
elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
with gr.Column(scale=1):
chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
with gr.Column(scale=1):
gr.Markdown("### Grade Scale Reference")
gr.Markdown("""
- A1: Excellent (1 point)
- B2: Very Good (2 points)
- B3: Good (3 points)
- C4: Credit (4 points)
- C5: Credit (5 points)
- C6: Credit (6 points)
- D7: Pass (7 points)
- E8: Pass (8 points)
- F9: Fail (9 points)
*Lower points are better. Aggregate is the sum of your best subjects.*
""")
submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
output = gr.Textbox(label="Recommendations", lines=20)
submit_btn.click(
fn=predict_career,
inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
elective_maths, physics, biology, chemistry],
outputs=output
)
# Launch the app
if __name__ == "__main__":
try:
demo.launch()
except Exception as e:
print(f"Error launching app: {e}")
# Try alternative launch method
import sys
print("Trying alternative launch method...")
if 'google.colab' in sys.modules:
demo.launch(share=True)
else:
demo.launch(server_name="0.0.0.0", server_port=7860)