markobinario commited on
Commit
0d57887
·
verified ·
1 Parent(s): 59efa55

Delete recommender.py

Browse files
Files changed (1) hide show
  1. recommender.py +0 -641
recommender.py DELETED
@@ -1,641 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
- from sklearn.neighbors import KNeighborsClassifier
4
- from sklearn.preprocessing import LabelEncoder, StandardScaler
5
- import joblib
6
- import json
7
- import os
8
- import requests
9
-
10
- class CourseRecommender:
11
- def __init__(self, database_url="https://database-dhe2.onrender.com"):
12
- self.model = None
13
- self.label_encoders = {}
14
- self.scaler = StandardScaler()
15
- self.database_url = database_url
16
- self.courses = self.get_courses()
17
- self.training_data = self.get_training_data()
18
- self.train_model()
19
-
20
- def get_courses(self):
21
- """Get course data for the provided courses only"""
22
- return {
23
- # CASTECH
24
- 'BSA': 'Bachelor of Science in Agriculture',
25
- 'BSFish': 'Bachelor of Science in Fisheries',
26
- 'BSFoodTech': 'Bachelor of Science in Food Technology',
27
-
28
- # CFA
29
- 'BSFo': 'Bachelor of Science in Forestry',
30
- 'BSAgFo': 'Bachelor of Science in Agroforestry',
31
-
32
- # CBEE
33
- 'BSHM': 'Bachelor of Science in Hospitality Management',
34
- 'BSEntrep': 'Bachelor of Science in Entrepreneurship',
35
- 'BSAgribus': 'Bachelor of Science in Agriculture Business',
36
- 'BSAgEcon': 'Bachelor of Science in Agricultural Economics',
37
-
38
- # CAS
39
- 'BSBio': 'Bachelor of Science in Biology',
40
- 'BSMath': 'Bachelor of Science in Mathematics',
41
- 'BAELS': 'Bachelor of Arts in English Language Studies',
42
- 'BSDevComm': 'Bachelor of Science in Development Communication',
43
-
44
- # COECS
45
- 'BSABE': 'Bachelor of Science in Agricultural and Biosystems Engineering',
46
- 'BSGE': 'Bachelor of Science in Geodetic Engineering',
47
- 'BSCE': 'Bachelor of Science in Civil Engineering',
48
- 'BSCpE': 'Bachelor of Science in Computer Engineering',
49
- 'BSIT': 'Bachelor of Science in Information Technology',
50
-
51
- # COED
52
- 'BTLEd': 'Bachelor of Technology and Livelihood Education',
53
- 'BSED': 'Bachelor of Secondary Education',
54
- 'BEED': 'Bachelor of Elementary Education',
55
- 'BPE': 'Bachelor of Physical Education',
56
-
57
- # CVM
58
- 'DVM': 'Doctor of Veterinary Medicine'
59
- }
60
-
61
- def save_student_data(self, stanine, gwa, strand, course, rating, hobbies=None):
62
- """Save student feedback (disabled - read-only mode)"""
63
- print(f"Student feedback (read-only mode): Stanine={stanine}, GWA={gwa}, Strand={strand}, Course={course}, Rating={rating}, Hobbies={hobbies}")
64
- return True
65
-
66
- def get_training_data(self):
67
- """Get training data from database, fallback to static data"""
68
- try:
69
- # Try to get data from database first
70
- db_data = self.get_training_data_from_database()
71
- if db_data is not None and not db_data.empty:
72
- print(f"Loaded {len(db_data)} training records from database")
73
- return db_data
74
- else:
75
- print("No database data available, using static training data")
76
- return self.get_static_training_data()
77
- except Exception as e:
78
- print(f"Error loading database data: {e}, using static training data")
79
- return self.get_static_training_data()
80
-
81
- def get_training_data_from_database(self):
82
- """Fetch training data from database using existing endpoints"""
83
- try:
84
- print(f"Attempting to fetch data from database: {self.database_url}/student_feedback_counts")
85
-
86
- # Try to get student feedback data from your existing endpoint
87
- response = requests.get(f"{self.database_url}/student_feedback_counts", timeout=10)
88
-
89
- print(f"Database response status: {response.status_code}")
90
-
91
- if response.status_code == 200:
92
- data = response.json()
93
- print(f"Response data keys: {list(data.keys())}")
94
-
95
- feedback_counts = data.get('feedback_counts', [])
96
- print(f"Number of records in database: {len(feedback_counts)}")
97
-
98
- if not feedback_counts:
99
- print("No student feedback data found in database")
100
- return None
101
-
102
- # Convert database data to training format
103
- training_data = []
104
- valid_records = 0
105
-
106
- for i, record in enumerate(feedback_counts):
107
- print(f"Processing record {i+1}: {record}")
108
-
109
- # Check if record has the required fields
110
- if all(key in record for key in ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies']):
111
- training_data.append((
112
- int(record['stanine']),
113
- float(record['gwa']),
114
- str(record['strand']),
115
- str(record['course']),
116
- int(record['rating']),
117
- str(record['hobbies'] or '')
118
- ))
119
- valid_records += 1
120
- else:
121
- missing_fields = [key for key in ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'] if key not in record]
122
- print(f"Record {i+1} missing fields: {missing_fields}")
123
-
124
- print(f"Valid records processed: {valid_records}/{len(feedback_counts)}")
125
-
126
- if training_data:
127
- print(f"Successfully loaded {len(training_data)} records from database")
128
- return pd.DataFrame(training_data, columns=['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'])
129
- else:
130
- print("No valid training records found in database")
131
- return None
132
- else:
133
- print(f"Database endpoint returned status {response.status_code}")
134
- print(f"Response text: {response.text}")
135
- return None
136
-
137
- except requests.exceptions.ConnectionError:
138
- print("Cannot connect to database - using static data")
139
- return None
140
- except Exception as e:
141
- print(f"Error fetching training data from database: {e}")
142
- import traceback
143
- traceback.print_exc()
144
- return None
145
-
146
- def get_static_training_data(self):
147
- """Get static training data for the provided courses only (including GAS, TVL)"""
148
- training_data = [
149
- # CASTECH - Agriculture Science and Technology
150
- (8, 92, 'STEM', 'BSA', 5, 'farming, agriculture, plants, environment'),
151
- (7, 88, 'STEM', 'BSFish', 4, 'fishing, marine life, aquaculture, water'),
152
- (8, 90, 'STEM', 'BSFoodTech', 5, 'food processing, nutrition, cooking, science'),
153
- (6, 80, 'STEM', 'BSA', 3, 'farming, plants, agriculture'),
154
- (7, 85, 'STEM', 'BSFish', 3, 'fishing, marine, aquaculture'),
155
- (8, 89, 'STEM', 'BSFoodTech', 4, 'food, nutrition, science'),
156
-
157
- # CFA - Forestry and Agroforestry
158
- (7, 85, 'STEM', 'BSFo', 4, 'forests, trees, environment, conservation'),
159
- (6, 82, 'STEM', 'BSAgFo', 3, 'forestry, agriculture, sustainable farming'),
160
- (8, 88, 'STEM', 'BSFo', 4, 'forestry, environment, conservation'),
161
- (7, 82, 'STEM', 'BSAgFo', 3, 'agroforestry, sustainable farming'),
162
-
163
- # CBEE - Business and Economics Education
164
- (8, 89, 'ABM', 'BSHM', 5, 'hospitality, tourism, service, management'),
165
- (7, 87, 'ABM', 'BSEntrep', 4, 'business, entrepreneurship, innovation, startups'),
166
- (8, 91, 'ABM', 'BSAgribus', 5, 'agriculture business, farming, economics'),
167
- (7, 86, 'ABM', 'BSAgEcon', 4, 'agriculture economics, farming, business'),
168
- (7, 83, 'ABM', 'BSHM', 3, 'hospitality, service, tourism'),
169
- (7, 84, 'ABM', 'BSEntrep', 3, 'business, entrepreneurship, innovation'),
170
- (8, 86, 'ABM', 'BSAgribus', 4, 'agriculture business, farming'),
171
- (8, 87, 'ABM', 'BSAgEcon', 4, 'agriculture economics, business'),
172
-
173
- # CAS - Arts and Sciences
174
- (8, 93, 'STEM', 'BSBio', 5, 'biology, science, research, nature'),
175
- (9, 95, 'STEM', 'BSMath', 5, 'mathematics, numbers, problem solving, logic'),
176
- (7, 85, 'HUMSS', 'BAELS', 4, 'english, language, literature, communication'),
177
- (7, 84, 'HUMSS', 'BSDevComm', 4, 'communication, media, development, social work'),
178
- (8, 88, 'STEM', 'BSBio', 4, 'biology, science, research'),
179
- (9, 94, 'STEM', 'BSMath', 4, 'mathematics, problem solving, logic'),
180
- (7, 82, 'HUMSS', 'BAELS', 3, 'english, language, communication'),
181
- (6, 80, 'HUMSS', 'BSDevComm', 3, 'communication, media, development'),
182
-
183
- # HUMSS students with programming interests
184
- (8, 90, 'HUMSS', 'BSIT', 5, 'programming, computers, technology, coding'),
185
- (7, 85, 'HUMSS', 'BSCpE', 4, 'programming, computer engineering, hardware'),
186
- (8, 88, 'HUMSS', 'BSDevComm', 4, 'programming, media, communication, technology'),
187
- (6, 80, 'HUMSS', 'BSIT', 3, 'programming, computers, ict'),
188
-
189
- # GAS - General Academic Strand (broad interests)
190
- (7, 85, 'GAS', 'BAELS', 4, 'communication, language, writing, literature'),
191
- (7, 84, 'GAS', 'BSDevComm', 4, 'media, community, development, communication'),
192
- (7, 83, 'GAS', 'BSHM', 3, 'hospitality, service, events'),
193
- (7, 86, 'GAS', 'BSIT', 3, 'computers, technology, ict basics'),
194
-
195
- # COECS - Engineering and Computer Studies
196
- (8, 92, 'STEM', 'BSABE', 5, 'engineering, agriculture, technology, machines'),
197
- (7, 88, 'STEM', 'BSGE', 4, 'surveying, mapping, engineering, technology'),
198
- (8, 90, 'STEM', 'BSCE', 5, 'civil engineering, construction, buildings, infrastructure'),
199
- (9, 94, 'STEM', 'BSCpE', 5, 'computer engineering, programming, hardware, technology'),
200
- (8, 91, 'STEM', 'BSIT', 5, 'information technology, computers, programming, software'),
201
- (7, 83, 'STEM', 'BSABE', 3, 'agricultural engineering, technology'),
202
- (7, 84, 'STEM', 'BSGE', 3, 'surveying, mapping, engineering'),
203
- (8, 90, 'STEM', 'BSCE', 4, 'engineering, construction, buildings'),
204
- (8, 88, 'STEM', 'BSCpE', 4, 'computer engineering, programming'),
205
- (8, 87, 'STEM', 'BSIT', 4, 'computers, programming, technology'),
206
-
207
- # COED - Education
208
- (7, 86, 'HUMSS', 'BTLEd', 4, 'teaching, technology, livelihood, education'),
209
- (8, 89, 'HUMSS', 'BSED', 5, 'teaching, education, helping students, secondary'),
210
- (7, 87, 'HUMSS', 'BEED', 4, 'teaching, elementary education, children, helping'),
211
- (7, 85, 'HUMSS', 'BPE', 4, 'physical education, sports, fitness, teaching'),
212
- (6, 79, 'HUMSS', 'BTLEd', 3, 'technology education, livelihood'),
213
- (6, 79, 'HUMSS', 'BSED', 3, 'teaching, education, helping'),
214
- (6, 81, 'HUMSS', 'BEED', 3, 'elementary education, teaching'),
215
- (8, 89, 'HUMSS', 'BPE', 4, 'physical education, sports, fitness'),
216
-
217
- # CVM - Veterinary Medicine
218
- (9, 96, 'STEM', 'DVM', 5, 'veterinary, animals, medicine, healthcare, pets'),
219
- (8, 91, 'STEM', 'DVM', 4, 'veterinary medicine, animals, healthcare')
220
-
221
- # TVL - Technical-Vocational-Livelihood (use hobbies to infer subtrack)
222
- ,(7, 84, 'TVL', 'BSIT', 4, 'ict, computers, programming, networking')
223
- ,(7, 83, 'TVL', 'BSCpE', 3, 'ict, hardware, electronics, robotics')
224
- ,(7, 82, 'TVL', 'BSHM', 3, 'he, hospitality, cooking, baking, services')
225
- ,(7, 82, 'TVL', 'BTLEd', 3, 'he, livelihood, crafts, home economics')
226
- ,(7, 83, 'TVL', 'BSABE', 3, 'ia, industrial, mechanics, tools, machining')
227
- ,(7, 83, 'TVL', 'BSCE', 3, 'ia, construction, drafting, carpentry, welding')
228
- ,(7, 84, 'TVL', 'BSA', 3, 'agri-fishery, farming, crops, livestock')
229
- ,(7, 84, 'TVL', 'BSFish', 3, 'agri-fishery, aquaculture, fishing')
230
- ,(7, 83, 'TVL', 'BSAgFo', 3, 'agri-fishery, agroforestry, environment')
231
- ]
232
-
233
- return pd.DataFrame(training_data, columns=['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'])
234
-
235
- def train_model(self):
236
- """Train the recommendation model using the training data"""
237
- try:
238
- training_data = self.get_training_data()
239
-
240
- if training_data.empty:
241
- print("No training data available - using default recommendations")
242
- return
243
-
244
- # Prepare features (hobbies required)
245
- feature_columns = ['stanine', 'gwa', 'strand', 'hobbies']
246
-
247
- # Create feature matrix
248
- X = training_data[feature_columns].copy()
249
- y = training_data['course']
250
-
251
- # Handle categorical variables
252
- categorical_columns = ['strand', 'hobbies']
253
-
254
- # Refit encoders every training to incorporate new categories
255
- for col in categorical_columns:
256
- if col in X.columns:
257
- X[col] = X[col].fillna('unknown')
258
- self.label_encoders[col] = LabelEncoder()
259
- X[col] = self.label_encoders[col].fit_transform(X[col])
260
-
261
- # Scale numerical features
262
- numerical_columns = ['stanine', 'gwa']
263
- if not X[numerical_columns].empty:
264
- X[numerical_columns] = self.scaler.fit_transform(X[numerical_columns])
265
-
266
- # Train KNN model
267
- self.model = KNeighborsClassifier(n_neighbors=3, weights='distance')
268
- self.model.fit(X, y)
269
-
270
- print("Model trained successfully")
271
- print(f"Model classes: {self.model.classes_}")
272
- print(f"Training data size: {len(training_data)} records")
273
-
274
- except Exception as e:
275
- print(f"Error training model: {e}")
276
- self.model = None
277
-
278
- def refresh_training_data(self):
279
- """Refresh training data from database (read-only mode)"""
280
- try:
281
- print("Refreshing training data from database...")
282
- self.training_data = self.get_training_data()
283
- self.train_model()
284
- return True
285
- except Exception as e:
286
- print(f"Error refreshing training data: {e}")
287
- return False
288
-
289
- def test_database_connection(self):
290
- """Test database connection and show detailed info"""
291
- print("=== DATABASE CONNECTION TEST ===")
292
- try:
293
- response = requests.get(f"{self.database_url}/student_feedback_counts", timeout=10)
294
- print(f"Database URL: {self.database_url}")
295
- print(f"Response Status: {response.status_code}")
296
-
297
- if response.status_code == 200:
298
- data = response.json()
299
- print(f"Response Keys: {list(data.keys())}")
300
-
301
- if 'feedback_counts' in data:
302
- records = data['feedback_counts']
303
- print(f"Number of Records: {len(records)}")
304
-
305
- if len(records) > 0:
306
- print(f"Sample Record: {records[0]}")
307
-
308
- # Check required fields
309
- required_fields = ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies']
310
- sample_record = records[0]
311
- missing_fields = [field for field in required_fields if field not in sample_record]
312
-
313
- if missing_fields:
314
- print(f"Missing Required Fields: {missing_fields}")
315
- print(f"Available Fields: {list(sample_record.keys())}")
316
- else:
317
- print("All required fields present")
318
- else:
319
- print("Database is empty")
320
- else:
321
- print("'feedback_counts' key not found in response")
322
- else:
323
- print(f"Database Error: {response.status_code}")
324
- print(f"Response: {response.text}")
325
-
326
- except Exception as e:
327
- print(f"Connection Error: {e}")
328
- import traceback
329
- traceback.print_exc()
330
-
331
- def get_default_recommendations(self, stanine, gwa, strand, hobbies: str = ""):
332
- """Provide default recommendations based on basic rules when no training data is available"""
333
- courses = self.courses
334
- recommendations = []
335
- hobbies_lc = (hobbies or "").lower()
336
-
337
- # Basic rules for recommendations based on strand and performance
338
- if strand == 'STEM':
339
- if stanine >= 8 and gwa >= 90:
340
- priority_courses = ['DVM', 'BSMath', 'BSCpE', 'BSIT', 'BSBio']
341
- elif stanine >= 6 and gwa >= 80:
342
- priority_courses = ['BSIT', 'BSCE', 'BSABE', 'BSA', 'BSFoodTech']
343
- else:
344
- priority_courses = ['BSIT', 'BSGE', 'BSFish', 'BSFo', 'BSAgFo']
345
- elif strand == 'ABM':
346
- if stanine >= 8 and gwa >= 90:
347
- priority_courses = ['BSAgribus', 'BSHM', 'BSEntrep', 'BSAgEcon']
348
- else:
349
- priority_courses = ['BSHM', 'BSEntrep', 'BSAgribus']
350
- elif strand == 'HUMSS':
351
- # HUMSS students with programming interests should get tech courses
352
- if any(k in hobbies_lc for k in ['programming', 'computer', 'ict', 'tech', 'coding']):
353
- priority_courses = ['BSIT', 'BSCpE', 'BSDevComm']
354
- elif any(k in hobbies_lc for k in ['teaching', 'education', 'helping', 'children']):
355
- priority_courses = ['BSED', 'BEED', 'BTLEd']
356
- elif any(k in hobbies_lc for k in ['communication', 'media', 'writing', 'language']):
357
- priority_courses = ['BAELS', 'BSDevComm', 'BSHM']
358
- elif stanine >= 8 and gwa >= 90:
359
- priority_courses = ['BSED', 'BEED', 'BAELS', 'BSDevComm', 'BPE']
360
- else:
361
- priority_courses = ['BSED', 'BEED', 'BAELS', 'BTLEd']
362
- elif strand == 'GAS':
363
- # Broad options leaning to comms/IT/hospitality based on hobbies
364
- if any(k in hobbies_lc for k in ['ict', 'computer', 'programming', 'tech']):
365
- priority_courses = ['BSIT', 'BSCpE', 'BSDevComm']
366
- elif any(k in hobbies_lc for k in ['hospitality', 'events', 'service', 'tourism']):
367
- priority_courses = ['BSHM', 'BSEntrep', 'BAELS']
368
- else:
369
- priority_courses = ['BAELS', 'BSDevComm', 'BSIT']
370
- elif strand == 'TVL':
371
- # Use hobbies to infer subtrack: ICT/HE/IA/Agri-Fishery
372
- if any(k in hobbies_lc for k in ['ict', 'computer', 'network', 'programming', 'coding']):
373
- priority_courses = ['BSIT', 'BSCpE']
374
- elif any(k in hobbies_lc for k in ['he', 'cook', 'bake', 'hospitality', 'food']):
375
- priority_courses = ['BSHM', 'BTLEd', 'BSFoodTech']
376
- elif any(k in hobbies_lc for k in ['ia', 'industrial', 'mechanic', 'weld', 'draft']):
377
- priority_courses = ['BSABE', 'BSCE', 'BTLEd']
378
- elif any(k in hobbies_lc for k in ['agri', 'farm', 'fish', 'aquaculture', 'forestry']):
379
- priority_courses = ['BSA', 'BSFish', 'BSAgFo']
380
- else:
381
- priority_courses = ['BSIT', 'BSHM', 'BSA']
382
- else:
383
- priority_courses = ['BSIT', 'BSHM', 'BSED', 'BSA']
384
-
385
- # Add courses with calculated ratings based on performance
386
- for i, course in enumerate(priority_courses[:3]): # Only take top 3
387
- if course in courses:
388
- # Calculate rating based on academic performance
389
- base_rating = 60 # Base rating
390
- stanine_bonus = (stanine - 5) * 5 # Bonus for stanine
391
- gwa_bonus = (gwa - 80) * 0.5 # Bonus for GWA
392
- final_rating = min(95, base_rating + stanine_bonus + gwa_bonus - (i * 10))
393
-
394
- recommendations.append({
395
- 'code': course,
396
- 'name': courses[course],
397
- 'rating': round(max(10, final_rating), 1) # Minimum 10% rating
398
- })
399
-
400
- return recommendations
401
-
402
- def recommend_courses(self, stanine, gwa, strand, hobbies=None, top_n=5):
403
- """Recommend courses based on student profile"""
404
- try:
405
- if self.model is None:
406
- return self.get_default_recommendations(stanine, gwa, strand, hobbies or "")
407
-
408
- # Prepare input features
409
- input_data = pd.DataFrame([{
410
- 'stanine': stanine,
411
- 'gwa': gwa,
412
- 'strand': strand,
413
- 'hobbies': (hobbies or '').strip()
414
- }])
415
-
416
- # Validate hobbies
417
- if not input_data['hobbies'].iloc[0]:
418
- raise ValueError('hobbies is required for recommendations')
419
-
420
- # Encode categorical variables
421
- for col in ['strand', 'hobbies']:
422
- if col in input_data.columns and col in self.label_encoders:
423
- value = input_data[col].iloc[0]
424
- if value not in self.label_encoders[col].classes_:
425
- # For hobbies, try to find a similar existing hobby
426
- if col == 'hobbies':
427
- similar_hobby = self._find_similar_hobby(value)
428
- if similar_hobby:
429
- print(f"Warning: Unseen hobby '{value}', using similar: '{similar_hobby}'")
430
- input_data[col] = self.label_encoders[col].transform([similar_hobby])[0]
431
- else:
432
- print(f"Warning: Unseen hobby '{value}', using default encoding")
433
- input_data[col] = 0
434
- else:
435
- print(f"Warning: Unseen value '{value}' in {col}, using default encoding")
436
- input_data[col] = 0
437
- else:
438
- input_data[col] = self.label_encoders[col].transform([value])[0]
439
-
440
- # Scale numerical features
441
- numerical_columns = ['stanine', 'gwa']
442
- if not input_data[numerical_columns].empty:
443
- input_data[numerical_columns] = self.scaler.transform(input_data[numerical_columns])
444
-
445
- # Get predictions
446
- predictions = self.model.predict_proba(input_data)
447
- courses = self.model.classes_
448
-
449
- print(f"Prediction probabilities: {predictions[0]}")
450
- print(f"Available courses: {courses}")
451
-
452
- # Get top recommendations with non-zero probabilities
453
- top_indices = np.argsort(predictions[0])[-top_n:][::-1]
454
- recommendations = []
455
-
456
- course_map = self.courses
457
- for idx in top_indices:
458
- code = courses[idx]
459
- confidence = predictions[0][idx]
460
-
461
- # Only include courses with meaningful probabilities
462
- if confidence > 0.01: # Threshold for meaningful recommendations
463
- recommendations.append({
464
- 'code': code,
465
- 'name': course_map.get(code, code),
466
- 'rating': round(confidence * 100, 1)
467
- })
468
-
469
- # If no meaningful recommendations, fall back to default
470
- if not recommendations:
471
- print("No meaningful predictions, using default recommendations")
472
- return self.get_default_recommendations(stanine, gwa, strand, hobbies or "")
473
-
474
- return recommendations
475
-
476
- except Exception as e:
477
- print(f"Error recommending courses: {e}")
478
- return self.get_default_recommendations(stanine, gwa, strand, hobbies or "")
479
-
480
- def _find_similar_hobby(self, hobby):
481
- """Find a similar hobby from training data"""
482
- hobby_lower = hobby.lower()
483
-
484
- # Define keyword mappings for similar hobbies
485
- keyword_mappings = {
486
- 'programming': ['programming', 'computers', 'technology', 'coding', 'ict'],
487
- 'computers': ['programming', 'computers', 'technology', 'coding', 'ict'],
488
- 'technology': ['programming', 'computers', 'technology', 'coding', 'ict'],
489
- 'teaching': ['teaching', 'education', 'helping', 'children'],
490
- 'business': ['business', 'entrepreneurship', 'management', 'leadership'],
491
- 'hospitality': ['hospitality', 'tourism', 'service', 'events'],
492
- 'agriculture': ['farming', 'agriculture', 'plants', 'environment'],
493
- 'communication': ['communication', 'media', 'writing', 'language']
494
- }
495
-
496
- # Check if any keyword matches
497
- for key, keywords in keyword_mappings.items():
498
- if any(keyword in hobby_lower for keyword in keywords):
499
- # Find the most similar training hobby
500
- training_hobbies = self.label_encoders['hobbies'].classes_
501
- for training_hobby in training_hobbies:
502
- if any(keyword in training_hobby.lower() for keyword in keywords):
503
- return training_hobby
504
-
505
- return None
506
-
507
- def _get_recommendation_reason(self, course, stanine, gwa, strand, hobbies, interests, personality_type, learning_style, career_goals):
508
- """Generate personalized reason for recommendation"""
509
- reasons = []
510
-
511
- # Academic performance reasons
512
- if stanine >= 8:
513
- reasons.append("Excellent academic performance")
514
- elif stanine >= 6:
515
- reasons.append("Good academic foundation")
516
-
517
- if gwa >= 85:
518
- reasons.append("High academic achievement")
519
- elif gwa >= 80:
520
- reasons.append("Strong academic record")
521
-
522
- # Strand alignment
523
- if strand == "STEM" and course in ["BSCS", "BSIT", "BSArch", "BSIE", "BSN"]:
524
- reasons.append("Perfect match with your STEM background")
525
- elif strand == "ABM" and course in ["BSBA", "BSA"]:
526
- reasons.append("Excellent alignment with your ABM strand")
527
- elif strand == "HUMSS" and course in ["BSED", "BSPsych"]:
528
- reasons.append("Great fit with your HUMSS background")
529
-
530
- # Hobbies and interests alignment
531
- if hobbies and any(hobby in hobbies.lower() for hobby in ["gaming", "programming", "technology", "computers"]):
532
- if course in ["BSCS", "BSIT"]:
533
- reasons.append("Matches your technology interests")
534
-
535
- if hobbies and any(hobby in hobbies.lower() for hobby in ["business", "leadership", "management"]):
536
- if course in ["BSBA", "BSA"]:
537
- reasons.append("Aligns with your business interests")
538
-
539
- if hobbies and any(hobby in hobbies.lower() for hobby in ["helping", "teaching", "caring"]):
540
- if course in ["BSED", "BSN", "BSPsych"]:
541
- reasons.append("Perfect for your helping nature")
542
-
543
- # Personality type alignment
544
- if personality_type == "introvert" and course in ["BSCS", "BSA", "BSArch"]:
545
- reasons.append("Suits your introverted personality")
546
- elif personality_type == "extrovert" and course in ["BSBA", "BSED", "BSHM"]:
547
- reasons.append("Great for your outgoing personality")
548
-
549
- # Learning style alignment
550
- if learning_style == "hands-on" and course in ["BSIT", "BSHM", "BSAgri"]:
551
- reasons.append("Matches your hands-on learning preference")
552
- elif learning_style == "visual" and course in ["BSArch", "BSCS"]:
553
- reasons.append("Perfect for your visual learning style")
554
-
555
- # Career goals alignment
556
- if career_goals and any(goal in career_goals.lower() for goal in ["developer", "programmer", "software"]):
557
- if course in ["BSCS", "BSIT"]:
558
- reasons.append("Direct path to your career goals")
559
-
560
- if career_goals and any(goal in career_goals.lower() for goal in ["business", "entrepreneur", "manager"]):
561
- if course in ["BSBA", "BSA"]:
562
- reasons.append("Direct path to your business goals")
563
-
564
- # Default reason if no specific matches
565
- if not reasons:
566
- reasons.append("Good academic and personal fit")
567
-
568
- return " • ".join(reasons[:3]) # Limit to top 3 reasons
569
-
570
- def save_model(self, model_path='course_recommender_model.joblib'):
571
- """Save the trained model"""
572
- if self.model is None:
573
- raise Exception("No model to save!")
574
-
575
- model_data = {
576
- 'model': self.model,
577
- 'scaler': self.scaler,
578
- 'label_encoders': self.label_encoders
579
- }
580
- joblib.dump(model_data, model_path)
581
-
582
- def load_model(self, model_path='course_recommender_model.joblib'):
583
- """Load a trained model"""
584
- model_data = joblib.load(model_path)
585
- self.model = model_data['model']
586
- self.scaler = model_data['scaler']
587
- self.label_encoders = model_data['label_encoders']
588
-
589
-
590
- # ===== UI helper for Hugging Face integration =====
591
- def get_course_recommendations_ui(recommender: "CourseRecommender", stanine, gwa, strand, hobbies) -> str:
592
- if recommender is None:
593
- return "Sorry, the recommendation system is not available at the moment. Please try again later."
594
- try:
595
- try:
596
- stanine = int(stanine.strip()) if isinstance(stanine, str) else int(stanine)
597
- except (ValueError, TypeError, AttributeError):
598
- return "Stanine score must be a valid number between 1 and 9"
599
- try:
600
- gwa = float(gwa.strip()) if isinstance(gwa, str) else float(gwa)
601
- except (ValueError, TypeError, AttributeError):
602
- return "GWA must be a valid number between 75 and 100"
603
- if not (1 <= stanine <= 9):
604
- return "Stanine score must be between 1 and 9"
605
- if not (75 <= gwa <= 100):
606
- return "GWA must be between 75 and 100"
607
- if not strand:
608
- return "Please select a strand"
609
- if not hobbies or not str(hobbies).strip():
610
- return "Please enter your hobbies/interests"
611
-
612
- recommendations = recommender.recommend_courses(
613
- stanine=stanine,
614
- gwa=gwa,
615
- strand=strand,
616
- hobbies=str(hobbies)
617
- )
618
- if not recommendations:
619
- return "No recommendations available at the moment."
620
- response = f"## Course Recommendations for You\n\n"
621
- response += f"**Profile:** Stanine {stanine}, GWA {gwa}, {strand} Strand\n"
622
- response += f"**Interests:** {hobbies}\n\n"
623
- for i, rec in enumerate(recommendations, 1):
624
- response += f"### {i}. {rec['code']} - {rec['name']}\n"
625
- response += f"**Match Score:** {rec.get('rating', rec.get('probability', 0)):.1f}%\n\n"
626
- return response
627
- except Exception as e:
628
- return f"Error getting recommendations: {str(e)}"
629
-
630
- # Example usage
631
- if __name__ == "__main__":
632
- recommender = CourseRecommender()
633
-
634
- # Example recommendation
635
- recommendations = recommender.recommend_courses(
636
- stanine=8,
637
- gwa=95,
638
- strand='STEM',
639
- hobbies='programming, gaming, technology'
640
- )
641
- print("Recommended courses:", json.dumps(recommendations, indent=2))