markobinario commited on
Commit
0562a5c
·
verified ·
1 Parent(s): 2a8adba

Update recommender.py

Browse files
Files changed (1) hide show
  1. recommender.py +205 -26
recommender.py CHANGED
@@ -8,10 +8,11 @@ import os
8
  import requests
9
 
10
  class CourseRecommender:
11
- def __init__(self):
12
  self.model = None
13
  self.label_encoders = {}
14
  self.scaler = StandardScaler()
 
15
  self.courses = self.get_courses()
16
  self.training_data = self.get_training_data()
17
  self.train_model()
@@ -58,17 +59,92 @@ class CourseRecommender:
58
  }
59
 
60
  def save_student_data(self, stanine, gwa, strand, course, rating, hobbies=None):
61
- """Save student feedback to in-memory storage (for demonstration purposes)"""
62
- try:
63
- # In a real implementation, you could save this to a file or external storage
64
- print(f"Student feedback saved: Stanine={stanine}, GWA={gwa}, Strand={strand}, Course={course}, Rating={rating}, Hobbies={hobbies}")
65
  return True
66
- except Exception as e:
67
- print(f"Error saving student feedback: {e}")
68
- return False
69
 
70
  def get_training_data(self):
71
- """Get training data for the provided courses only (including GAS, TVL)"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  training_data = [
73
  # CASTECH - Agriculture Science and Technology
74
  (8, 92, 'STEM', 'BSA', 5, 'farming, agriculture, plants, environment'),
@@ -103,6 +179,12 @@ class CourseRecommender:
103
  (9, 94, 'STEM', 'BSMath', 4, 'mathematics, problem solving, logic'),
104
  (7, 82, 'HUMSS', 'BAELS', 3, 'english, language, communication'),
105
  (6, 80, 'HUMSS', 'BSDevComm', 3, 'communication, media, development'),
 
 
 
 
 
 
106
 
107
  # GAS - General Academic Strand (broad interests)
108
  (7, 85, 'GAS', 'BAELS', 4, 'communication, language, writing, literature'),
@@ -187,11 +269,65 @@ class CourseRecommender:
187
 
188
  print("Model trained successfully")
189
  print(f"Model classes: {self.model.classes_}")
 
190
 
191
  except Exception as e:
192
  print(f"Error training model: {e}")
193
  self.model = None
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  def get_default_recommendations(self, stanine, gwa, strand, hobbies: str = ""):
196
  """Provide default recommendations based on basic rules when no training data is available"""
197
  courses = self.courses
@@ -212,7 +348,14 @@ class CourseRecommender:
212
  else:
213
  priority_courses = ['BSHM', 'BSEntrep', 'BSAgribus']
214
  elif strand == 'HUMSS':
215
- if stanine >= 8 and gwa >= 90:
 
 
 
 
 
 
 
216
  priority_courses = ['BSED', 'BEED', 'BAELS', 'BSDevComm', 'BPE']
217
  else:
218
  priority_courses = ['BSED', 'BEED', 'BAELS', 'BTLEd']
@@ -279,9 +422,18 @@ class CourseRecommender:
279
  if col in input_data.columns and col in self.label_encoders:
280
  value = input_data[col].iloc[0]
281
  if value not in self.label_encoders[col].classes_:
282
- # Use default encoding for unseen values
283
- print(f"Warning: Unseen value '{value}' in {col}, using default encoding")
284
- input_data[col] = 0 # Default to first encoded value
 
 
 
 
 
 
 
 
 
285
  else:
286
  input_data[col] = self.label_encoders[col].transform([value])[0]
287
 
@@ -308,11 +460,11 @@ class CourseRecommender:
308
 
309
  # Only include courses with meaningful probabilities
310
  if confidence > 0.01: # Threshold for meaningful recommendations
311
- recommendations.append({
312
- 'code': code,
313
- 'name': course_map.get(code, code),
314
- 'rating': round(confidence * 100, 1)
315
- })
316
 
317
  # If no meaningful recommendations, fall back to default
318
  if not recommendations:
@@ -325,6 +477,33 @@ class CourseRecommender:
325
  print(f"Error recommending courses: {e}")
326
  return self.get_default_recommendations(stanine, gwa, strand, hobbies or "")
327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  def _get_recommendation_reason(self, course, stanine, gwa, strand, hobbies, interests, personality_type, learning_style, career_goals):
329
  """Generate personalized reason for recommendation"""
330
  reasons = []
@@ -416,19 +595,19 @@ def get_course_recommendations_ui(recommender: "CourseRecommender", stanine, gwa
416
  try:
417
  stanine = int(stanine.strip()) if isinstance(stanine, str) else int(stanine)
418
  except (ValueError, TypeError, AttributeError):
419
- return "Stanine score must be a valid number between 1 and 9"
420
  try:
421
  gwa = float(gwa.strip()) if isinstance(gwa, str) else float(gwa)
422
  except (ValueError, TypeError, AttributeError):
423
- return "GWA must be a valid number between 75 and 100"
424
  if not (1 <= stanine <= 9):
425
- return "Stanine score must be between 1 and 9"
426
  if not (75 <= gwa <= 100):
427
- return "GWA must be between 75 and 100"
428
  if not strand:
429
- return "Please select a strand"
430
  if not hobbies or not str(hobbies).strip():
431
- return "Please enter your hobbies/interests"
432
 
433
  recommendations = recommender.recommend_courses(
434
  stanine=stanine,
@@ -438,7 +617,7 @@ def get_course_recommendations_ui(recommender: "CourseRecommender", stanine, gwa
438
  )
439
  if not recommendations:
440
  return "No recommendations available at the moment."
441
- response = f"## 🎯 Course Recommendations for You\n\n"
442
  response += f"**Profile:** Stanine {stanine}, GWA {gwa}, {strand} Strand\n"
443
  response += f"**Interests:** {hobbies}\n\n"
444
  for i, rec in enumerate(recommendations, 1):
@@ -446,7 +625,7 @@ def get_course_recommendations_ui(recommender: "CourseRecommender", stanine, gwa
446
  response += f"**Match Score:** {rec.get('rating', rec.get('probability', 0)):.1f}%\n\n"
447
  return response
448
  except Exception as e:
449
- return f"Error getting recommendations: {str(e)}"
450
 
451
  # Example usage
452
  if __name__ == "__main__":
 
8
  import requests
9
 
10
  class CourseRecommender:
11
+ def __init__(self, database_url="https://database-dhe2.onrender.com"):
12
  self.model = None
13
  self.label_encoders = {}
14
  self.scaler = StandardScaler()
15
+ self.database_url = database_url
16
  self.courses = self.get_courses()
17
  self.training_data = self.get_training_data()
18
  self.train_model()
 
59
  }
60
 
61
  def save_student_data(self, stanine, gwa, strand, course, rating, hobbies=None):
62
+ """Save student feedback (disabled - read-only mode)"""
63
+ print(f"Student feedback (read-only mode): Stanine={stanine}, GWA={gwa}, Strand={strand}, Course={course}, Rating={rating}, Hobbies={hobbies}")
 
 
64
  return True
 
 
 
65
 
66
  def get_training_data(self):
67
+ """Get training data from database, fallback to static data"""
68
+ try:
69
+ # Try to get data from database first
70
+ db_data = self.get_training_data_from_database()
71
+ if db_data is not None and not db_data.empty:
72
+ print(f"Loaded {len(db_data)} training records from database")
73
+ return db_data
74
+ else:
75
+ print("No database data available, using static training data")
76
+ return self.get_static_training_data()
77
+ except Exception as e:
78
+ print(f"Error loading database data: {e}, using static training data")
79
+ return self.get_static_training_data()
80
+
81
+ def get_training_data_from_database(self):
82
+ """Fetch training data from database using existing endpoints"""
83
+ try:
84
+ print(f"Attempting to fetch data from database: {self.database_url}/student_feedback_counts")
85
+
86
+ # Try to get student feedback data from your existing endpoint
87
+ response = requests.get(f"{self.database_url}/student_feedback_counts", timeout=10)
88
+
89
+ print(f"Database response status: {response.status_code}")
90
+
91
+ if response.status_code == 200:
92
+ data = response.json()
93
+ print(f"Response data keys: {list(data.keys())}")
94
+
95
+ feedback_counts = data.get('feedback_counts', [])
96
+ print(f"Number of records in database: {len(feedback_counts)}")
97
+
98
+ if not feedback_counts:
99
+ print("No student feedback data found in database")
100
+ return None
101
+
102
+ # Convert database data to training format
103
+ training_data = []
104
+ valid_records = 0
105
+
106
+ for i, record in enumerate(feedback_counts):
107
+ print(f"Processing record {i+1}: {record}")
108
+
109
+ # Check if record has the required fields
110
+ if all(key in record for key in ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies']):
111
+ training_data.append((
112
+ int(record['stanine']),
113
+ float(record['gwa']),
114
+ str(record['strand']),
115
+ str(record['course']),
116
+ int(record['rating']),
117
+ str(record['hobbies'] or '')
118
+ ))
119
+ valid_records += 1
120
+ else:
121
+ missing_fields = [key for key in ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'] if key not in record]
122
+ print(f"Record {i+1} missing fields: {missing_fields}")
123
+
124
+ print(f"Valid records processed: {valid_records}/{len(feedback_counts)}")
125
+
126
+ if training_data:
127
+ print(f"Successfully loaded {len(training_data)} records from database")
128
+ return pd.DataFrame(training_data, columns=['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies'])
129
+ else:
130
+ print("No valid training records found in database")
131
+ return None
132
+ else:
133
+ print(f"Database endpoint returned status {response.status_code}")
134
+ print(f"Response text: {response.text}")
135
+ return None
136
+
137
+ except requests.exceptions.ConnectionError:
138
+ print("Cannot connect to database - using static data")
139
+ return None
140
+ except Exception as e:
141
+ print(f"Error fetching training data from database: {e}")
142
+ import traceback
143
+ traceback.print_exc()
144
+ return None
145
+
146
+ def get_static_training_data(self):
147
+ """Get static training data for the provided courses only (including GAS, TVL)"""
148
  training_data = [
149
  # CASTECH - Agriculture Science and Technology
150
  (8, 92, 'STEM', 'BSA', 5, 'farming, agriculture, plants, environment'),
 
179
  (9, 94, 'STEM', 'BSMath', 4, 'mathematics, problem solving, logic'),
180
  (7, 82, 'HUMSS', 'BAELS', 3, 'english, language, communication'),
181
  (6, 80, 'HUMSS', 'BSDevComm', 3, 'communication, media, development'),
182
+
183
+ # HUMSS students with programming interests
184
+ (8, 90, 'HUMSS', 'BSIT', 5, 'programming, computers, technology, coding'),
185
+ (7, 85, 'HUMSS', 'BSCpE', 4, 'programming, computer engineering, hardware'),
186
+ (8, 88, 'HUMSS', 'BSDevComm', 4, 'programming, media, communication, technology'),
187
+ (6, 80, 'HUMSS', 'BSIT', 3, 'programming, computers, ict'),
188
 
189
  # GAS - General Academic Strand (broad interests)
190
  (7, 85, 'GAS', 'BAELS', 4, 'communication, language, writing, literature'),
 
269
 
270
  print("Model trained successfully")
271
  print(f"Model classes: {self.model.classes_}")
272
+ print(f"Training data size: {len(training_data)} records")
273
 
274
  except Exception as e:
275
  print(f"Error training model: {e}")
276
  self.model = None
277
 
278
+ def refresh_training_data(self):
279
+ """Refresh training data from database (read-only mode)"""
280
+ try:
281
+ print("Refreshing training data from database...")
282
+ self.training_data = self.get_training_data()
283
+ self.train_model()
284
+ return True
285
+ except Exception as e:
286
+ print(f"Error refreshing training data: {e}")
287
+ return False
288
+
289
+ def test_database_connection(self):
290
+ """Test database connection and show detailed info"""
291
+ print("=== DATABASE CONNECTION TEST ===")
292
+ try:
293
+ response = requests.get(f"{self.database_url}/student_feedback_counts", timeout=10)
294
+ print(f"Database URL: {self.database_url}")
295
+ print(f"Response Status: {response.status_code}")
296
+
297
+ if response.status_code == 200:
298
+ data = response.json()
299
+ print(f"Response Keys: {list(data.keys())}")
300
+
301
+ if 'feedback_counts' in data:
302
+ records = data['feedback_counts']
303
+ print(f"Number of Records: {len(records)}")
304
+
305
+ if len(records) > 0:
306
+ print(f"Sample Record: {records[0]}")
307
+
308
+ # Check required fields
309
+ required_fields = ['stanine', 'gwa', 'strand', 'course', 'rating', 'hobbies']
310
+ sample_record = records[0]
311
+ missing_fields = [field for field in required_fields if field not in sample_record]
312
+
313
+ if missing_fields:
314
+ print(f"Missing Required Fields: {missing_fields}")
315
+ print(f"Available Fields: {list(sample_record.keys())}")
316
+ else:
317
+ print("All required fields present")
318
+ else:
319
+ print("Database is empty")
320
+ else:
321
+ print("'feedback_counts' key not found in response")
322
+ else:
323
+ print(f"Database Error: {response.status_code}")
324
+ print(f"Response: {response.text}")
325
+
326
+ except Exception as e:
327
+ print(f"Connection Error: {e}")
328
+ import traceback
329
+ traceback.print_exc()
330
+
331
  def get_default_recommendations(self, stanine, gwa, strand, hobbies: str = ""):
332
  """Provide default recommendations based on basic rules when no training data is available"""
333
  courses = self.courses
 
348
  else:
349
  priority_courses = ['BSHM', 'BSEntrep', 'BSAgribus']
350
  elif strand == 'HUMSS':
351
+ # HUMSS students with programming interests should get tech courses
352
+ if any(k in hobbies_lc for k in ['programming', 'computer', 'ict', 'tech', 'coding']):
353
+ priority_courses = ['BSIT', 'BSCpE', 'BSDevComm']
354
+ elif any(k in hobbies_lc for k in ['teaching', 'education', 'helping', 'children']):
355
+ priority_courses = ['BSED', 'BEED', 'BTLEd']
356
+ elif any(k in hobbies_lc for k in ['communication', 'media', 'writing', 'language']):
357
+ priority_courses = ['BAELS', 'BSDevComm', 'BSHM']
358
+ elif stanine >= 8 and gwa >= 90:
359
  priority_courses = ['BSED', 'BEED', 'BAELS', 'BSDevComm', 'BPE']
360
  else:
361
  priority_courses = ['BSED', 'BEED', 'BAELS', 'BTLEd']
 
422
  if col in input_data.columns and col in self.label_encoders:
423
  value = input_data[col].iloc[0]
424
  if value not in self.label_encoders[col].classes_:
425
+ # For hobbies, try to find a similar existing hobby
426
+ if col == 'hobbies':
427
+ similar_hobby = self._find_similar_hobby(value)
428
+ if similar_hobby:
429
+ print(f"Warning: Unseen hobby '{value}', using similar: '{similar_hobby}'")
430
+ input_data[col] = self.label_encoders[col].transform([similar_hobby])[0]
431
+ else:
432
+ print(f"Warning: Unseen hobby '{value}', using default encoding")
433
+ input_data[col] = 0
434
+ else:
435
+ print(f"Warning: Unseen value '{value}' in {col}, using default encoding")
436
+ input_data[col] = 0
437
  else:
438
  input_data[col] = self.label_encoders[col].transform([value])[0]
439
 
 
460
 
461
  # Only include courses with meaningful probabilities
462
  if confidence > 0.01: # Threshold for meaningful recommendations
463
+ recommendations.append({
464
+ 'code': code,
465
+ 'name': course_map.get(code, code),
466
+ 'rating': round(confidence * 100, 1)
467
+ })
468
 
469
  # If no meaningful recommendations, fall back to default
470
  if not recommendations:
 
477
  print(f"Error recommending courses: {e}")
478
  return self.get_default_recommendations(stanine, gwa, strand, hobbies or "")
479
 
480
+ def _find_similar_hobby(self, hobby):
481
+ """Find a similar hobby from training data"""
482
+ hobby_lower = hobby.lower()
483
+
484
+ # Define keyword mappings for similar hobbies
485
+ keyword_mappings = {
486
+ 'programming': ['programming', 'computers', 'technology', 'coding', 'ict'],
487
+ 'computers': ['programming', 'computers', 'technology', 'coding', 'ict'],
488
+ 'technology': ['programming', 'computers', 'technology', 'coding', 'ict'],
489
+ 'teaching': ['teaching', 'education', 'helping', 'children'],
490
+ 'business': ['business', 'entrepreneurship', 'management', 'leadership'],
491
+ 'hospitality': ['hospitality', 'tourism', 'service', 'events'],
492
+ 'agriculture': ['farming', 'agriculture', 'plants', 'environment'],
493
+ 'communication': ['communication', 'media', 'writing', 'language']
494
+ }
495
+
496
+ # Check if any keyword matches
497
+ for key, keywords in keyword_mappings.items():
498
+ if any(keyword in hobby_lower for keyword in keywords):
499
+ # Find the most similar training hobby
500
+ training_hobbies = self.label_encoders['hobbies'].classes_
501
+ for training_hobby in training_hobbies:
502
+ if any(keyword in training_hobby.lower() for keyword in keywords):
503
+ return training_hobby
504
+
505
+ return None
506
+
507
  def _get_recommendation_reason(self, course, stanine, gwa, strand, hobbies, interests, personality_type, learning_style, career_goals):
508
  """Generate personalized reason for recommendation"""
509
  reasons = []
 
595
  try:
596
  stanine = int(stanine.strip()) if isinstance(stanine, str) else int(stanine)
597
  except (ValueError, TypeError, AttributeError):
598
+ return "Stanine score must be a valid number between 1 and 9"
599
  try:
600
  gwa = float(gwa.strip()) if isinstance(gwa, str) else float(gwa)
601
  except (ValueError, TypeError, AttributeError):
602
+ return "GWA must be a valid number between 75 and 100"
603
  if not (1 <= stanine <= 9):
604
+ return "Stanine score must be between 1 and 9"
605
  if not (75 <= gwa <= 100):
606
+ return "GWA must be between 75 and 100"
607
  if not strand:
608
+ return "Please select a strand"
609
  if not hobbies or not str(hobbies).strip():
610
+ return "Please enter your hobbies/interests"
611
 
612
  recommendations = recommender.recommend_courses(
613
  stanine=stanine,
 
617
  )
618
  if not recommendations:
619
  return "No recommendations available at the moment."
620
+ response = f"## Course Recommendations for You\n\n"
621
  response += f"**Profile:** Stanine {stanine}, GWA {gwa}, {strand} Strand\n"
622
  response += f"**Interests:** {hobbies}\n\n"
623
  for i, rec in enumerate(recommendations, 1):
 
625
  response += f"**Match Score:** {rec.get('rating', rec.get('probability', 0)):.1f}%\n\n"
626
  return response
627
  except Exception as e:
628
+ return f"Error getting recommendations: {str(e)}"
629
 
630
  # Example usage
631
  if __name__ == "__main__":