Ahmed694200 commited on
Commit
7f47e42
·
1 Parent(s): e8eed09

Complete 3-approach evaluation comparison in API and UI

Browse files

- Added _get_relevant_for_user to Evaluator for Precision/Recall ground truth
- Extended /api/evaluate to compare CF, Content-Based, and Knowledge-Based
- Updated evaluation dashboard to highlight best approach with data-driven insights

Files changed (3) hide show
  1. app.py +58 -14
  2. recommender/evaluation.py +18 -0
  3. templates/evaluation.html +17 -6
app.py CHANGED
@@ -222,32 +222,76 @@ def api_evaluate():
222
  best_rmse = r["RMSE"]
223
  best_cf = r["method"]
224
 
 
 
 
225
  approach_results = []
226
 
227
- def run_cf_for_user(uid):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  return cf.recommend("svd", uid, n_recommendations=10)
229
 
230
- cf_precisions = []
231
- cf_recalls = []
232
- test_users = TEST["user_id"].unique()[:20]
233
- for uid in test_users:
234
- recs = run_cf_for_user(uid)
235
- rec_items = [r[0] for r in recs]
236
- relevant = TEST[(TEST["user_id"] == uid) & (TEST["rating"] >= 3.5)]["product_id"].tolist()
237
- if relevant:
238
- cf_precisions.append(evaluator.precision_at_k(rec_items, relevant, 5))
239
- cf_recalls.append(evaluator.recall_at_k(rec_items, relevant, 5))
240
- if cf_precisions:
 
 
 
 
 
 
 
 
241
  approach_results.append({
242
  "approach": "Collaborative Filtering",
243
- "Precision@5": round(np.mean(cf_precisions), 4),
244
- "Recall@5": round(np.mean(cf_recalls), 4),
 
 
 
 
 
 
 
 
245
  })
246
 
 
 
 
 
 
 
 
 
 
 
247
  return jsonify({
248
  "cf_methods": cf_results,
249
  "best_cf_method": best_cf,
250
  "approaches": approach_results,
 
251
  })
252
  except Exception as e:
253
  return jsonify({"error": str(e)}), 500
 
222
  best_rmse = r["RMSE"]
223
  best_cf = r["method"]
224
 
225
+ evaluator.set_test_ratings(TEST)
226
+ test_users = TEST["user_id"].unique()[:20]
227
+
228
  approach_results = []
229
 
230
+ def approach_precision_recall(recommender_fn, test_users):
231
+ precisions, recalls = [], []
232
+ for uid in test_users:
233
+ try:
234
+ recs = recommender_fn(uid)
235
+ except Exception:
236
+ recs = []
237
+ rec_items = [r[0] for r in recs]
238
+ relevant = evaluator._get_relevant_for_user(uid)
239
+ if relevant:
240
+ precisions.append(evaluator.precision_at_k(rec_items, relevant, 5))
241
+ recalls.append(evaluator.recall_at_k(rec_items, relevant, 5))
242
+ return precisions, recalls
243
+
244
+ def cf_recommender(uid):
245
  return cf.recommend("svd", uid, n_recommendations=10)
246
 
247
+ train_ratings = ratings[~ratings.index.isin(TEST.index)]
248
+ def cb_recommender(uid):
249
+ profile = train_ratings[
250
+ (train_ratings["user_id"] == uid) & (train_ratings["rating"] >= 3.5)
251
+ ]["product_id"].tolist()
252
+ return cb.recommend("tfidf", user_profile_items=profile, n_recommendations=10)
253
+
254
+ def kb_recommender(uid):
255
+ prefs = get_user_preferences(users, uid)
256
+ constraints = {
257
+ "budget_min": prefs.get("budget_min", 0),
258
+ "budget_max": prefs.get("budget_max", 999999),
259
+ "category": list(prefs.get("preferred_categories", set())),
260
+ "brand": list(prefs.get("favorite_brands", set())),
261
+ }
262
+ return kb.recommend("constraint", constraints=constraints, n_recommendations=10)
263
+
264
+ cf_p, cf_r = approach_precision_recall(cf_recommender, test_users)
265
+ if cf_p:
266
  approach_results.append({
267
  "approach": "Collaborative Filtering",
268
+ "Precision@5": round(np.mean(cf_p), 4),
269
+ "Recall@5": round(np.mean(cf_r), 4),
270
+ })
271
+
272
+ cb_p, cb_r = approach_precision_recall(cb_recommender, test_users)
273
+ if cb_p:
274
+ approach_results.append({
275
+ "approach": "Content-Based",
276
+ "Precision@5": round(np.mean(cb_p), 4),
277
+ "Recall@5": round(np.mean(cb_r), 4),
278
  })
279
 
280
+ kb_p, kb_r = approach_precision_recall(kb_recommender, test_users)
281
+ if kb_p:
282
+ approach_results.append({
283
+ "approach": "Knowledge-Based",
284
+ "Precision@5": round(np.mean(kb_p), 4),
285
+ "Recall@5": round(np.mean(kb_r), 4),
286
+ })
287
+
288
+ best_approach = max(approach_results, key=lambda a: a.get("Precision@5", 0))["approach"] if approach_results else None
289
+
290
  return jsonify({
291
  "cf_methods": cf_results,
292
  "best_cf_method": best_cf,
293
  "approaches": approach_results,
294
+ "best_approach": best_approach,
295
  })
296
  except Exception as e:
297
  return jsonify({"error": str(e)}), 500
recommender/evaluation.py CHANGED
@@ -7,6 +7,23 @@ class Evaluator:
7
  def __init__(self, ratings_df, predictions_df=None):
8
  self.ratings = ratings_df
9
  self.predictions = predictions_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def rmse(self, y_true, y_pred):
12
  return float(np.sqrt(mean_squared_error(y_true, y_pred)))
@@ -128,6 +145,7 @@ class Evaluator:
128
  }
129
 
130
  def compare_approaches(self, cf_instance, cb_instance, kb_instance, test_ratings, products_df, k=5):
 
131
  test_users = test_ratings["user_id"].unique()[:20]
132
 
133
  def cf_recommender(uid):
 
7
  def __init__(self, ratings_df, predictions_df=None):
8
  self.ratings = ratings_df
9
  self.predictions = predictions_df
10
+ self._test_ratings = None
11
+
12
+ def set_test_ratings(self, test_ratings):
13
+ self._test_ratings = test_ratings
14
+
15
+ def _get_relevant_for_user(self, user_id, rating_threshold=3.5):
16
+ if self._test_ratings is not None:
17
+ relevant = self._test_ratings[
18
+ (self._test_ratings["user_id"] == user_id) &
19
+ (self._test_ratings["rating"] >= rating_threshold)
20
+ ]["product_id"].tolist()
21
+ else:
22
+ relevant = self.ratings[
23
+ (self.ratings["user_id"] == user_id) &
24
+ (self.ratings["rating"] >= rating_threshold)
25
+ ]["product_id"].tolist()
26
+ return relevant
27
 
28
  def rmse(self, y_true, y_pred):
29
  return float(np.sqrt(mean_squared_error(y_true, y_pred)))
 
145
  }
146
 
147
  def compare_approaches(self, cf_instance, cb_instance, kb_instance, test_ratings, products_df, k=5):
148
+ self.set_test_ratings(test_ratings)
149
  test_users = test_ratings["user_id"].unique()[:20]
150
 
151
  def cf_recommender(uid):
templates/evaluation.html CHANGED
@@ -270,9 +270,11 @@
270
  tbody.appendChild(tr);
271
  return;
272
  }
 
273
  const tr = document.createElement('tr');
 
274
  tr.innerHTML = `
275
- <td><strong>${row.approach}</strong></td>
276
  <td>${row['Precision@5']?.toFixed(4) || 'N/A'}</td>
277
  <td>${row['Recall@5']?.toFixed(4) || 'N/A'}</td>
278
  `;
@@ -286,16 +288,25 @@
286
  const methodLabels = { user_based: 'User-Based', item_based: 'Item-Based', svd: 'SVD', knn: 'KNN', slope_one: 'Slope One' };
287
 
288
  if (data.best_cf_method) {
 
 
289
  document.getElementById('analysisBestMethod').innerHTML =
290
- `<strong>${methodLabels[data.best_cf_method] || data.best_cf_method}</strong> achieves the lowest RMSE among all CF methods. ` +
291
  `SVD (Matrix Factorization) typically performs best because it captures latent factors in the user-item interaction matrix, ` +
292
  `handling sparsity better than memory-based methods like User-Based or Item-Based CF.`;
293
  }
294
 
295
- document.getElementById('analysisBestApproach').innerHTML =
296
- `<strong>Collaborative Filtering</strong> generally performs best when sufficient rating data exists for a user. ` +
297
- `Content-Based works well for new items but suffers from overspecialization. ` +
298
- `Knowledge-Based excels in cold-start scenarios and when users have explicit constraints.`;
 
 
 
 
 
 
 
299
 
300
  document.getElementById('analysisConditions').innerHTML = `
301
  <b>• Dense user data:</b> Collaborative Filtering (leverages peer patterns)<br>
 
270
  tbody.appendChild(tr);
271
  return;
272
  }
273
+ const isBest = data.best_approach && row.approach === data.best_approach;
274
  const tr = document.createElement('tr');
275
+ if (isBest) tr.className = 'best-row';
276
  tr.innerHTML = `
277
+ <td><strong>${row.approach}</strong> ${isBest ? '<span class="badge-best">BEST</span>' : ''}</td>
278
  <td>${row['Precision@5']?.toFixed(4) || 'N/A'}</td>
279
  <td>${row['Recall@5']?.toFixed(4) || 'N/A'}</td>
280
  `;
 
288
  const methodLabels = { user_based: 'User-Based', item_based: 'Item-Based', svd: 'SVD', knn: 'KNN', slope_one: 'Slope One' };
289
 
290
  if (data.best_cf_method) {
291
+ const best = data.cf_methods.find(r => r.method === data.best_cf_method);
292
+ const rmseVal = best ? best.RMSE.toFixed(4) : '—';
293
  document.getElementById('analysisBestMethod').innerHTML =
294
+ `<strong>${methodLabels[data.best_cf_method] || data.best_cf_method}</strong> achieves the lowest RMSE (${rmseVal}) among all CF methods. ` +
295
  `SVD (Matrix Factorization) typically performs best because it captures latent factors in the user-item interaction matrix, ` +
296
  `handling sparsity better than memory-based methods like User-Based or Item-Based CF.`;
297
  }
298
 
299
+ if (data.best_approach && data.approaches && data.approaches.length > 0) {
300
+ const best = data.approaches.find(a => a.approach === data.best_approach);
301
+ const precVal = best ? best['Precision@5'].toFixed(4) : '—';
302
+ document.getElementById('analysisBestApproach').innerHTML =
303
+ `<strong>${data.best_approach}</strong> achieves the highest Precision@5 (${precVal}) on this dataset. ` +
304
+ `Collaborative Filtering generally performs best when sufficient rating data exists. ` +
305
+ `Content-Based works well for new items but suffers from overspecialization. ` +
306
+ `Knowledge-Based excels in cold-start scenarios and when users have explicit constraints.`;
307
+ } else {
308
+ document.getElementById('analysisBestApproach').textContent = 'Run evaluation to compare approaches.';
309
+ }
310
 
311
  document.getElementById('analysisConditions').innerHTML = `
312
  <b>• Dense user data:</b> Collaborative Filtering (leverages peer patterns)<br>