File size: 12,937 Bytes
8416eb2
 
 
fa6e48f
 
8416eb2
f3c7cc9
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8416eb2
fa6e48f
 
053cdda
 
 
 
fa6e48f
6cbbb17
fa6e48f
8416eb2
f3c7cc9
8416eb2
fa6e48f
8416eb2
 
 
 
 
 
fa6e48f
 
34fb376
fa6e48f
34fb376
fa6e48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
053cdda
 
 
fa6e48f
 
 
 
 
 
 
 
053cdda
 
 
fa6e48f
 
 
 
 
 
 
 
 
 
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa6e48f
 
 
 
 
dc945d2
 
fa6e48f
dc945d2
6cbbb17
dc945d2
 
fa6e48f
331e788
dc945d2
 
fa6e48f
dc945d2
 
 
 
 
fa6e48f
dc945d2
 
 
34fb376
331e788
fa6e48f
 
dc945d2
053cdda
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
053cdda
 
 
 
 
 
 
 
 
dc945d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331e788
dc945d2
053cdda
 
 
fa6e48f
dc945d2
fa6e48f
dc945d2
fa6e48f
8416eb2
fa6e48f
 
 
 
8416eb2
5acb9a8
fa6e48f
 
dfbc1a2
fa6e48f
dfbc1a2
 
5acb9a8
331e788
fa6e48f
331e788
 
 
2c20c04
331e788
 
5acb9a8
fa6e48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331e788
fa6e48f
 
 
 
601a5f7
fa6e48f
 
331e788
fa6e48f
5acb9a8
601a5f7
8416eb2
fa6e48f
 
dc945d2
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
import joblib
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.preprocessing import OneHotEncoder, StandardScaler

# Load your trained model
try:
    model = joblib.load('trained_model.joblib')
except Exception as e:
    print(f"Error loading model: {e}")
    print("Creating a placeholder model for interface testing")
    # Create a simple placeholder model for testing the interface
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier()
    model.classes_ = ["BSc Computer Science", "BSc Engineering", "Medicine", "Other Course"]
    
    # Add dummy predict and predict_proba methods if needed
    def dummy_predict(X):
        return np.array(["BSc Computer Science"])
    
    def dummy_predict_proba(X):
        return np.array([[0.7, 0.1, 0.1, 0.1]])
    
    # Only add these methods if they don't exist
    if not hasattr(model, 'predict'):
        model.predict = dummy_predict
    
    if not hasattr(model, 'predict_proba'):
        model.predict_proba = dummy_predict_proba

# Define all_traits dictionary (needed for preprocessing)
all_traits = {
    'Interests': set(['Reading', 'Dancing', 'Physics', 'Research', 'Cooking', 'Art', 'Playing Football', 'Creativity', 'Writing',
                     'Technology', 'Public Speaking', 'Music', 'Mathematics', 'Leadership', 'Problem-Solving', 'Entrepreneurship']),
    'Strengths': set(['Communication', 'Creativity', 'Logical Reasoning', 'Innovative Thinking', 'Teamwork', 'Hands-on Skills', 
                     'Analytical Thinking', 'Leadership', 'Detail-Oriented'])
}

# Function to convert grades to numerical values
def grade_to_numeric(grade):
    if pd.isna(grade) or grade == "":
        return np.nan

    grade_map = {
        "A1": 1, "B2": 2, "B3": 3, "C4": 4, "C5": 5, "C6": 6,
        "D7": 7, "E8": 8, "F9": 9
    }
    return grade_map.get(grade, np.nan)

# Function to extract interests and strengths into separate columns
def extract_traits(df, column_name, prefix, all_traits=None):
    """
    Extracts traits from a column, creating binary columns for each trait.
    """
    # Split the comma-separated values
    trait_series = df[column_name].str.split(',', expand=True)

    # Infer all_traits if not provided
    if all_traits is None:
        all_traits = set()
        for col in trait_series.columns:
            all_traits.update(trait_series[col].dropna().unique())

    # Create binary columns for each trait
    for trait in all_traits:
        col_name = f"{prefix}_{trait.strip()}"
        df[col_name] = df[column_name].str.contains(trait, case=False, na=False).astype(int)

    return df

def preprocess_data(df, all_traits=None):
    """
    Preprocesses the student data.
    """
    # Create a copy to avoid modifying the original
    processed_df = df.copy()

    if all_traits is None:
        all_traits = {
            'Interests': set(),
            'Strengths': set()
        }
        for _, row in processed_df.iterrows():
            all_traits['Interests'].update(row['Interests'].split(',') if isinstance(row['Interests'], str) else [])
            all_traits['Strengths'].update(row['Strengths'].split(',') if isinstance(row['Strengths'], str) else [])

    processed_df = extract_traits(processed_df, "Interests", "interest", all_traits.get('Interests'))
    processed_df = extract_traits(processed_df, "Strengths", "strength", all_traits.get('Strengths'))

    return processed_df

# Create features and target variable
def prepare_model_data(processed_df):
    # Identify categorical and numerical features
    categorical_features = ["Desired_Career"]
    numerical_features = ["Aggregate", "English", "Core Maths", "Science", "Social Studies",
                          "Physics", "Biology", "Elective Maths", "Chemistry",
                          "Economics", "E-ICT", "Literature", "Geography", 
                          "Business Management", "Visual Arts", "Government"]

    # Add the binary interest and strength columns
    interest_strength_cols = [col for col in processed_df.columns if col.startswith('interest_') or col.startswith('strength_')]

    # Convert grade columns to numerical values
    for col in numerical_features:
        if col in processed_df.columns:  # Check if column exists
            processed_df[col] = processed_df[col].apply(grade_to_numeric)
        else:
            # Add missing columns with NaN values
            processed_df[col] = np.nan

    # Get features (using processed_df with numerical grades)
    X = processed_df[categorical_features + numerical_features + interest_strength_cols]

    return X

def get_course_recommendation(student_info):
    """
    Get course recommendations for a student based on their information.
    """
    try:
        # Convert student data to DataFrame
        student_df = pd.DataFrame([student_info])
        
        # Preprocess student data
        processed_student = preprocess_data(student_df, all_traits)
        
        # Extract features
        student_features = prepare_model_data(processed_student)
        
        # Make prediction
        recommended_course = model.predict(student_features)[0]
        probabilities = model.predict_proba(student_features)[0]
        
        # Get top 3 recommendations with probabilities
        class_indices = np.argsort(probabilities)[::-1][:3]
        classes = model.classes_
        top_recommendations = [(classes[idx], f"{probabilities[idx]:.2f}") for idx in class_indices]
        
        # Format the output
        result = "Top Course Recommendations:\n\n"
        for i, (course, prob) in enumerate(top_recommendations, 1):
            result += f"{i}. {course} (Confidence: {prob})\n\n"
        
        return result
    except Exception as e:
        return f"Error generating recommendations: {str(e)}"

def explain_recommendation(student_info, top_recommendation):
    """
    Provide an explanation for why a particular course was recommended.
    """
    try:
        course = top_recommendation[0]

        explanation = f"The course '{course}' was recommended based on:\n"

        # Career alignment
        explanation += f"- Your career interest in {student_info['Desired_Career']}\n"

        # Interests match
        explanation += f"- Your interests in {student_info['Interests']}\n"
        explanation += f"- Your strengths in {student_info['Strengths']}\n"

        # Aggregate score context
        if student_info['Aggregate'] < 15:
            explanation += "- Your excellent aggregate score\n"
        elif student_info['Aggregate'] < 20:
            explanation += "- Your good aggregate score\n"

        return explanation
    except Exception as e:
        return f"Error generating explanation: {str(e)}"

def predict_career(desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                  elective_maths, physics, biology, chemistry):
    
    try:
        # Create student data dictionary with all required fields
        student_info = {
            "StudentID": "STU_TEMP",
            "Desired_Career": desired_career,
            "Recommended_Course": "",  # Will be predicted
            "Aggregate": aggregate,
            "Interests": interests,
            "Strengths": strengths,
            "English": english,
            "Core Maths": core_maths,
            "Science": science,
            "Social Studies": social_studies,
            "Elective Maths": elective_maths,
            "Physics": physics,
            "Biology": biology,
            "Chemistry": chemistry,
            # Add empty values for other subjects that were in the training data
            "Economics": "",
            "E-ICT": "",
            "Literature": "",
            "Geography": "",
            "Business Management": "",
            "Visual Arts": "",
            "Government": ""
        }
        
        # Get recommendations
        recommendations = get_course_recommendation(student_info)
        
        # Get top recommendation for explanation
        student_df = pd.DataFrame([student_info])
        processed_student = preprocess_data(student_df, all_traits)
        student_features = prepare_model_data(processed_student)
        probabilities = model.predict_proba(student_features)[0]
        class_indices = np.argsort(probabilities)[::-1][:1]
        classes = model.classes_
        top_recommendation = [(classes[idx], probabilities[idx]) for idx in class_indices][0]
        
        # Get explanation
        explanation = explain_recommendation(student_info, top_recommendation)
        
        return recommendations + "\n" + explanation
    except Exception as e:
        import traceback
        error_details = traceback.format_exc()
        return f"Error processing request: {str(e)}\n\nDetails:\n{error_details}"

# Define grade options - fixing format to work with Gradio
grade_options = [
    "", "A1", "B2", "B3", "C4", "C5", "C6", "D7", "E8", "F9"
]

# Create Gradio interface
with gr.Blocks(title="Career Course Recommendation System") as demo:
    gr.Markdown("# Career Course Recommendation System")
    gr.Markdown("Enter student information to get course recommendations")
    
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown("### Student Information")
            desired_career = gr.Textbox(
                label="Desired Career",
                placeholder="Enter your desired career path (e.g. Medicine, Computer Science, Engineering)",
                info="Enter your desired career path"
            )
            aggregate = gr.Slider(minimum=6, maximum=37, value=15, step=1, label="Aggregate Score", info="Lower is better (6 is best, 37 is worst)")
            interests = gr.Textbox(label="Interests (comma separated)", placeholder="Reading,Dancing,Physics", info="List your interests separated by commas")
            strengths = gr.Textbox(
                label="Strengths (comma separated)", 
                placeholder="Communication,Creativity", 
                info="List your strengths or skills separated by commas",
                value="Communication,Creativity,Logical Reasoning,Analytical Thinking"
            )
            
            gr.Markdown("### Core Subjects (Required)")
            with gr.Row():
                with gr.Column(scale=1):
                    english = gr.Dropdown(choices=grade_options, label="English", info="Grade in English")
                    core_maths = gr.Dropdown(choices=grade_options, label="Core Maths", info="Grade in Core Mathematics")
                
                with gr.Column(scale=1):
                    science = gr.Dropdown(choices=grade_options, label="Science", info="Grade in Integrated Science")
                    social_studies = gr.Dropdown(choices=grade_options, label="Social Studies", info="Grade in Social Studies")
            
            gr.Markdown("### Elective Subjects")
            with gr.Row():
                with gr.Column(scale=1):
                    elective_maths = gr.Dropdown(choices=grade_options, label="Elective Maths", info="Grade in Elective Mathematics")
                    physics = gr.Dropdown(choices=grade_options, label="Physics", info="Grade in Physics")
                
                with gr.Column(scale=1):
                    chemistry = gr.Dropdown(choices=grade_options, label="Chemistry", info="Grade in Chemistry")
                    biology = gr.Dropdown(choices=grade_options, label="Biology", info="Grade in Biology")
        
        with gr.Column(scale=1):
            gr.Markdown("### Grade Scale Reference")
            gr.Markdown("""
            - A1: Excellent (1 point)
            - B2: Very Good (2 points)
            - B3: Good (3 points)
            - C4: Credit (4 points)
            - C5: Credit (5 points)
            - C6: Credit (6 points)
            - D7: Pass (7 points)
            - E8: Pass (8 points)
            - F9: Fail (9 points)
            
            *Lower points are better. Aggregate is the sum of your best subjects.*
            """)
            
            submit_btn = gr.Button("Get Recommendations", variant="primary", size="lg")
            output = gr.Textbox(label="Recommendations", lines=20)
    
    submit_btn.click(
        fn=predict_career,
        inputs=[desired_career, aggregate, interests, strengths, english, core_maths, science, social_studies, 
                elective_maths, physics, biology, chemistry],
        outputs=output
    )

# Launch the app
if __name__ == "__main__":
    try:
        demo.launch()
    except Exception as e:
        print(f"Error launching app: {e}")
        # Try alternative launch method
        import sys
        print("Trying alternative launch method...")
        if 'google.colab' in sys.modules:
            demo.launch(share=True)
        else:
            demo.launch(server_name="0.0.0.0", server_port=7860)