Spaces:
Sleeping
Sleeping
File size: 12,937 Bytes
8416eb2 fa6e48f 8416eb2 f3c7cc9 dc945d2 8416eb2 fa6e48f 053cdda fa6e48f 6cbbb17 fa6e48f 8416eb2 f3c7cc9 8416eb2 fa6e48f 8416eb2 fa6e48f 34fb376 fa6e48f 34fb376 fa6e48f 053cdda fa6e48f 053cdda fa6e48f dc945d2 fa6e48f dc945d2 fa6e48f dc945d2 6cbbb17 dc945d2 fa6e48f 331e788 dc945d2 fa6e48f dc945d2 fa6e48f dc945d2 34fb376 331e788 fa6e48f dc945d2 053cdda dc945d2 053cdda dc945d2 331e788 dc945d2 053cdda fa6e48f dc945d2 fa6e48f dc945d2 fa6e48f 8416eb2 fa6e48f 8416eb2 5acb9a8 fa6e48f dfbc1a2 fa6e48f dfbc1a2 5acb9a8 331e788 fa6e48f 331e788 2c20c04 331e788 5acb9a8 fa6e48f 331e788 fa6e48f 601a5f7 fa6e48f 331e788 fa6e48f 5acb9a8 601a5f7 8416eb2 fa6e48f dc945d2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
import joblib
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.preprocessing import OneHotEncoder, StandardScaler
# Load your trained model
try:
model = joblib.load('trained_model.joblib')
except Exception as e:
print(f"Error loading model: {e}")
print("Creating a placeholder model for interface testing")
# Create a simple placeholder model for testing the interface
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]
# Add dummy predict and predict_proba methods if needed
def dummy_predict(X):
return np.array(["BSc Computer Science"])
def dummy_predict_proba(X):
return np.array([[0.7, 0.1, 0.1, 0.1]])
# Only add these methods if they don't exist
if not hasattr(model, 'predict'):
model.predict = dummy_predict
if not hasattr(model, 'predict_proba'):
model.predict_proba = dummy_predict_proba
# Define all_traits dictionary (needed for preprocessing)
all_traits = {
'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills',
'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
}
# Function to convert grades to numerical values
def grade_to_numeric(grade):
if pd.isna(grade) or grade == "":
return np.nan
grade_map = {
"A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
"D7": 7, "E8": 8, "F9": 9
}
return grade_map.get(grade, np.nan)
# Function to extract interests and strengths into separate columns
def extract_traits(df, column_name, prefix, all_traits=None):
"""
Extracts traits from a column, creating binary columns for each trait.
"""
# Split the comma-separated values
trait_series = df[column_name].str.split(',', expand=True)
# Infer all_traits if not provided
if all_traits is None:
all_traits = set()
for col in trait_series.columns:
all_traits.update(trait_series[col].dropna().unique())
# Create binary columns for each trait
for trait in all_traits:
col_name = f"{prefix}_{trait.strip()}"
df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)
return df
def preprocess_data(df, all_traits=None):
"""
Preprocesses the student data.
"""
# Create a copy to avoid modifying the original
processed_df = df.copy()
if all_traits is None:
all_traits = {
'Interests': set(),
'Strengths': set()
}
for _, row in processed_df.iterrows():
all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])
processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))
return processed_df
# Create features and target variable
def prepare_model_data(processed_df):
# Identify categorical and numerical features
categorical_features = ["Desired_Career"]
numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
"Physics", "Biology", "Elective Maths", "Chemistry",
"Economics", "E-ICT", "Literature", "Geography",
"Business Management", "Visual Arts", "Government"]
# Add the binary interest and strength columns
interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]
# Convert grade columns to numerical values
for col in numerical_features:
if col in processed_df.columns: # Check if column exists
processed_df[col] = processed_df[col].apply(grade_to_numeric)
else:
# Add missing columns with NaN values
processed_df[col] = np.nan
# Get features (using processed_df with numerical grades)
X = processed_df[categorical_features + numerical_features + interest_strength_cols]
return X
def get_course_recommendation(student_info):
"""
Get course recommendations for a student based on their information.
"""
try:
# Convert student data to DataFrame
student_df = pd.DataFrame([student_info])
# Preprocess student data
processed_student = preprocess_data(student_df, all_traits)
# Extract features
student_features = prepare_model_data(processed_student)
# Make prediction
recommended_course = model.predict(student_features)[0]
probabilities = model.predict_proba(student_features)[0]
# Get top 3 recommendations with probabilities
class_indices = np.argsort(probabilities)[::-1][:3]
classes = model.classes_
top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
# Format the output
result = "Top Course Recommendations:\n\n"
for i, (course, prob) in enumerate(top_recommendations, 1):
result += f"{i}. {course} (Confidence: {prob})\n\n"
return result
except Exception as e:
return f"Error generating recommendations: {str(e)}"
def explain_recommendation(student_info, top_recommendation):
"""
Provide an explanation for why a particular course was recommended.
"""
try:
course = top_recommendation[0]
explanation = f"The course '{course}' was recommended based on:\n"
# Career alignment
explanation += f"- Your career interest in {student_info['Desired_Career']}\n"
# Interests match
explanation += f"- Your interests in {student_info['Interests']}\n"
explanation += f"- Your strengths in {student_info['Strengths']}\n"
# Aggregate score context
if student_info['Aggregate'] < 15:
explanation += "- Your excellent aggregate score\n"
elif student_info['Aggregate'] < 20:
explanation += "- Your good aggregate score\n"
return explanation
except Exception as e:
return f"Error generating explanation: {str(e)}"
def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
elective_maths, physics, biology, chemistry):
try:
# Create student data dictionary with all required fields
student_info = {
"StudentID": "STU_TEMP",
"Desired_Career": desired_career,
"Recommended_Course": "", # Will be predicted
"Aggregate": aggregate,
"Interests": interests,
"Strengths": strengths,
"English": english,
"Core Maths": core_maths,
"Science": science,
"Social Studies": social_studies,
"Elective Maths": elective_maths,
"Physics": physics,
"Biology": biology,
"Chemistry": chemistry,
# Add empty values for other subjects that were in the training data
"Economics": "",
"E-ICT": "",
"Literature": "",
"Geography": "",
"Business Management": "",
"Visual Arts": "",
"Government": ""
}
# Get recommendations
recommendations = get_course_recommendation(student_info)
# Get top recommendation for explanation
student_df = pd.DataFrame([student_info])
processed_student = preprocess_data(student_df, all_traits)
student_features = prepare_model_data(processed_student)
probabilities = model.predict_proba(student_features)[0]
class_indices = np.argsort(probabilities)[::-1][:1]
classes = model.classes_
top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
# Get explanation
explanation = explain_recommendation(student_info, top_recommendation)
return recommendations + "\n" + explanation
except Exception as e:
import traceback
error_details = traceback.format_exc()
return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"
# Define grade options - fixing format to work with Gradio
grade_options = [
"", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
]
# Create Gradio interface
with gr.Blocks(title="Career Course Recommendation System") as demo:
gr.Markdown("# Career Course Recommendation System")
gr.Markdown("Enter student information to get course recommendations")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### Student Information")
desired_career = gr.Textbox(
label="Desired Career",
placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
info="Enter your desired career path"
)
aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
strengths = gr.Textbox(
label="Strengths (comma separated)",
placeholder="Communication,Creativity",
info="List your strengths or skills separated by commas",
value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
)
gr.Markdown("### Core Subjects (Required)")
with gr.Row():
with gr.Column(scale=1):
english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
with gr.Column(scale=1):
science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
gr.Markdown("### Elective Subjects")
with gr.Row():
with gr.Column(scale=1):
elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
with gr.Column(scale=1):
chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
with gr.Column(scale=1):
gr.Markdown("### Grade Scale Reference")
gr.Markdown("""
- A1: Excellent (1 point)
- B2: Very Good (2 points)
- B3: Good (3 points)
- C4: Credit (4 points)
- C5: Credit (5 points)
- C6: Credit (6 points)
- D7: Pass (7 points)
- E8: Pass (8 points)
- F9: Fail (9 points)
*Lower points are better. Aggregate is the sum of your best subjects.*
""")
submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
output = gr.Textbox(label="Recommendations", lines=20)
submit_btn.click(
fn=predict_career,
inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies,
elective_maths, physics, biology, chemistry],
outputs=output
)
# Launch the app
if __name__ == "__main__":
try:
demo.launch()
except Exception as e:
print(f"Error launching app: {e}")
# Try alternative launch method
import sys
print("Trying alternative launch method...")
if 'google.colab' in sys.modules:
demo.launch(share=True)
else:
demo.launch(server_name="0.0.0.0", server_port=7860) |