markobinario commited on
Commit
3939d46
·
verified ·
1 Parent(s): c6af53f

Update course_recommender.py

Browse files
Files changed (1) hide show
  1. course_recommender.py +99 -23
course_recommender.py CHANGED
@@ -17,6 +17,10 @@ class CourseRecommender:
17
  self.scaler = StandardScaler()
18
  self.db_connection = DatabaseConnection()
19
  self.is_trained = False
 
 
 
 
20
 
21
  def preprocess_data(self, df: pd.DataFrame) -> pd.DataFrame:
22
  """Preprocess the data for training"""
@@ -85,23 +89,96 @@ class CourseRecommender:
85
 
86
  return df_features
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def train_model(self, use_database: bool = True):
89
  """Train the recommendation model using student feedback data"""
90
  print("Loading training data from student feedback...")
91
 
92
- # Get available courses from /courses endpoint
93
- available_courses = self.db_connection.get_available_courses()
94
- if not available_courses:
95
- print("No courses found in /courses endpoint. Using courses from student feedback data...")
96
- # Get courses from student feedback data
97
- df_temp = self.db_connection.get_student_feedback_counts()
98
- if df_temp.empty:
99
- raise ValueError("No courses available in /courses endpoint and no student feedback data found.")
100
- available_courses = df_temp['course'].unique().tolist()
101
- print(f"Using courses from student feedback: {available_courses}")
102
-
103
- print(f"Available courses from /courses: {len(available_courses)}")
104
- print(f"Available courses: {available_courses}")
105
 
106
  # Get training data from student feedback
107
  df = self.db_connection.get_student_feedback_counts()
@@ -170,6 +247,9 @@ class CourseRecommender:
170
  # Save model
171
  self.save_model()
172
 
 
 
 
173
  return accuracy
174
 
175
  def predict_course(self, stanine: int, gwa: float, strand: str, hobbies: str) -> List[Tuple[str, float]]:
@@ -179,16 +259,8 @@ class CourseRecommender:
179
  if not self.is_trained:
180
  raise ValueError("Model not trained. Please train the model first.")
181
 
182
- # Get available courses from /courses endpoint
183
- available_courses = self.db_connection.get_available_courses()
184
- if not available_courses:
185
- print("No courses found in /courses endpoint. Using courses from student feedback data...")
186
- # Get courses from student feedback data
187
- df_temp = self.db_connection.get_student_feedback_counts()
188
- if df_temp.empty:
189
- raise ValueError("No courses available in /courses endpoint and no student feedback data found.")
190
- available_courses = df_temp['course'].unique().tolist()
191
- print(f"Using courses from student feedback: {available_courses}")
192
 
193
  # Create input data
194
  input_data = pd.DataFrame({
@@ -243,6 +315,10 @@ class CourseRecommender:
243
  self.label_encoders = joblib.load('models/label_encoders.pkl')
244
  self.scaler = joblib.load('models/scaler.pkl')
245
  self.is_trained = True
 
 
 
 
246
  print("Model loaded successfully")
247
  except FileNotFoundError:
248
  print("No saved model found. Please train the model first.")
 
17
  self.scaler = StandardScaler()
18
  self.db_connection = DatabaseConnection()
19
  self.is_trained = False
20
+ self._available_courses = None # Cache for available courses
21
+ self._last_data_count = 0 # Track data count for auto-retraining
22
+ self._auto_retrain_threshold = 5 # Retrain every 5 new feedbacks
23
+ self._min_samples_for_training = 10 # Minimum samples needed to train
24
 
25
  def preprocess_data(self, df: pd.DataFrame) -> pd.DataFrame:
26
  """Preprocess the data for training"""
 
89
 
90
  return df_features
91
 
92
+ def get_available_courses(self):
93
+ """Get available courses with caching"""
94
+ if self._available_courses is None:
95
+ # Try to get courses from /courses endpoint first
96
+ courses = self.db_connection.get_available_courses()
97
+ if not courses:
98
+ print("No courses found in /courses endpoint. Using courses from student feedback data...")
99
+ # Get courses from student feedback data
100
+ df_temp = self.db_connection.get_student_feedback_counts()
101
+ if df_temp.empty:
102
+ raise ValueError("No courses available in /courses endpoint and no student feedback data found.")
103
+ courses = df_temp['course'].unique().tolist()
104
+ print(f"Using courses from student feedback: {courses}")
105
+
106
+ self._available_courses = courses
107
+ print(f"Available courses cached: {len(courses)} courses")
108
+
109
+ return self._available_courses
110
+
111
+ def refresh_courses_cache(self):
112
+ """Refresh the available courses cache"""
113
+ self._available_courses = None
114
+ return self.get_available_courses()
115
+
116
+ def get_current_data_count(self):
117
+ """Get current number of feedback records in database"""
118
+ try:
119
+ df = self.db_connection.get_student_feedback_counts()
120
+ return len(df) if not df.empty else 0
121
+ except:
122
+ return 0
123
+
124
+ def check_and_auto_retrain(self):
125
+ """Check if enough new data exists and auto-retrain if needed"""
126
+ current_count = self.get_current_data_count()
127
+
128
+ if current_count < self._min_samples_for_training:
129
+ print(f"Not enough data for training: {current_count} < {self._min_samples_for_training}")
130
+ return False
131
+
132
+ if current_count - self._last_data_count >= self._auto_retrain_threshold:
133
+ print(f"Auto-retraining triggered: {current_count - self._last_data_count} new feedbacks")
134
+ try:
135
+ accuracy = self.train_model(use_database=True)
136
+ self._last_data_count = current_count
137
+ print(f"Auto-retraining completed with accuracy: {accuracy:.3f}")
138
+ return True
139
+ except Exception as e:
140
+ print(f"Auto-retraining failed: {e}")
141
+ return False
142
+
143
+ return False
144
+
145
+ def add_feedback_with_learning(self, course: str, stanine: int, gwa: float, strand: str,
146
+ rating: int, hobbies: str) -> bool:
147
+ """Add feedback to database and trigger auto-learning if needed"""
148
+ # Add feedback to database
149
+ success = self.db_connection.add_feedback(course, stanine, gwa, strand, rating, hobbies)
150
+
151
+ if success:
152
+ print(f"Feedback added for course: {course}")
153
+ # Check if we should auto-retrain
154
+ self.check_and_auto_retrain()
155
+
156
+ return success
157
+
158
+ def configure_auto_learning(self, retrain_threshold=5, min_samples=10):
159
+ """Configure auto-learning parameters"""
160
+ self._auto_retrain_threshold = retrain_threshold
161
+ self._min_samples_for_training = min_samples
162
+ print(f"Auto-learning configured: retrain every {retrain_threshold} new feedbacks, minimum {min_samples} samples")
163
+
164
+ def get_learning_status(self):
165
+ """Get current learning status"""
166
+ current_count = self.get_current_data_count()
167
+ return {
168
+ 'current_data_count': current_count,
169
+ 'last_trained_count': self._last_data_count,
170
+ 'new_feedbacks': current_count - self._last_data_count,
171
+ 'retrain_threshold': self._auto_retrain_threshold,
172
+ 'min_samples': self._min_samples_for_training,
173
+ 'ready_for_retrain': (current_count - self._last_data_count) >= self._auto_retrain_threshold
174
+ }
175
+
176
  def train_model(self, use_database: bool = True):
177
  """Train the recommendation model using student feedback data"""
178
  print("Loading training data from student feedback...")
179
 
180
+ # Get available courses with caching
181
+ available_courses = self.get_available_courses()
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  # Get training data from student feedback
184
  df = self.db_connection.get_student_feedback_counts()
 
247
  # Save model
248
  self.save_model()
249
 
250
+ # Update data count tracking
251
+ self._last_data_count = len(df_clean)
252
+
253
  return accuracy
254
 
255
  def predict_course(self, stanine: int, gwa: float, strand: str, hobbies: str) -> List[Tuple[str, float]]:
 
259
  if not self.is_trained:
260
  raise ValueError("Model not trained. Please train the model first.")
261
 
262
+ # Get available courses with caching
263
+ available_courses = self.get_available_courses()
 
 
 
 
 
 
 
 
264
 
265
  # Create input data
266
  input_data = pd.DataFrame({
 
315
  self.label_encoders = joblib.load('models/label_encoders.pkl')
316
  self.scaler = joblib.load('models/scaler.pkl')
317
  self.is_trained = True
318
+
319
+ # Initialize data count tracking
320
+ self._last_data_count = self.get_current_data_count()
321
+
322
  print("Model loaded successfully")
323
  except FileNotFoundError:
324
  print("No saved model found. Please train the model first.")