Spaces:

Abdallah4Zain
/

TasteEngine

Sleeping

Ahmed694200 commited on 23 days ago

Commit

7f47e42

1 Parent(s): e8eed09

Complete 3-approach evaluation comparison in API and UI

- Added _get_relevant_for_user to Evaluator for Precision/Recall ground truth
- Extended /api/evaluate to compare CF, Content-Based, and Knowledge-Based
- Updated evaluation dashboard to highlight best approach with data-driven insights

Files changed (3) hide show

app.py +58 -14
recommender/evaluation.py +18 -0
templates/evaluation.html +17 -6

app.py CHANGED Viewed

@@ -222,32 +222,76 @@ def api_evaluate():
                 best_rmse = r["RMSE"]
                 best_cf = r["method"]
         approach_results = []
-        def run_cf_for_user(uid):
             return cf.recommend("svd", uid, n_recommendations=10)
-        cf_precisions = []
-        cf_recalls = []
-        test_users = TEST["user_id"].unique()[:20]
-        for uid in test_users:
-            recs = run_cf_for_user(uid)
-            rec_items = [r[0] for r in recs]
-            relevant = TEST[(TEST["user_id"] == uid) & (TEST["rating"] >= 3.5)]["product_id"].tolist()
-            if relevant:
-                cf_precisions.append(evaluator.precision_at_k(rec_items, relevant, 5))
-                cf_recalls.append(evaluator.recall_at_k(rec_items, relevant, 5))
-        if cf_precisions:
             approach_results.append({
                 "approach": "Collaborative Filtering",
-                "Precision@5": round(np.mean(cf_precisions), 4),
-                "Recall@5": round(np.mean(cf_recalls), 4),
             })
         return jsonify({
             "cf_methods": cf_results,
             "best_cf_method": best_cf,
             "approaches": approach_results,
         })
     except Exception as e:
         return jsonify({"error": str(e)}), 500

                 best_rmse = r["RMSE"]
                 best_cf = r["method"]
+        evaluator.set_test_ratings(TEST)
+        test_users = TEST["user_id"].unique()[:20]
         approach_results = []
+        def approach_precision_recall(recommender_fn, test_users):
+            precisions, recalls = [], []
+            for uid in test_users:
+                try:
+                    recs = recommender_fn(uid)
+                except Exception:
+                    recs = []
+                rec_items = [r[0] for r in recs]
+                relevant = evaluator._get_relevant_for_user(uid)
+                if relevant:
+                    precisions.append(evaluator.precision_at_k(rec_items, relevant, 5))
+                    recalls.append(evaluator.recall_at_k(rec_items, relevant, 5))
+            return precisions, recalls
+        def cf_recommender(uid):
             return cf.recommend("svd", uid, n_recommendations=10)
+        train_ratings = ratings[~ratings.index.isin(TEST.index)]
+        def cb_recommender(uid):
+            profile = train_ratings[
+                (train_ratings["user_id"] == uid) & (train_ratings["rating"] >= 3.5)
+            ]["product_id"].tolist()
+            return cb.recommend("tfidf", user_profile_items=profile, n_recommendations=10)
+        def kb_recommender(uid):
+            prefs = get_user_preferences(users, uid)
+            constraints = {
+                "budget_min": prefs.get("budget_min", 0),
+                "budget_max": prefs.get("budget_max", 999999),
+                "category": list(prefs.get("preferred_categories", set())),
+                "brand": list(prefs.get("favorite_brands", set())),
+            }
+            return kb.recommend("constraint", constraints=constraints, n_recommendations=10)
+        cf_p, cf_r = approach_precision_recall(cf_recommender, test_users)
+        if cf_p:
             approach_results.append({
                 "approach": "Collaborative Filtering",
+                "Precision@5": round(np.mean(cf_p), 4),
+                "Recall@5": round(np.mean(cf_r), 4),
+            })
+        cb_p, cb_r = approach_precision_recall(cb_recommender, test_users)
+        if cb_p:
+            approach_results.append({
+                "approach": "Content-Based",
+                "Precision@5": round(np.mean(cb_p), 4),
+                "Recall@5": round(np.mean(cb_r), 4),
             })
+        kb_p, kb_r = approach_precision_recall(kb_recommender, test_users)
+        if kb_p:
+            approach_results.append({
+                "approach": "Knowledge-Based",
+                "Precision@5": round(np.mean(kb_p), 4),
+                "Recall@5": round(np.mean(kb_r), 4),
+            })
+        best_approach = max(approach_results, key=lambda a: a.get("Precision@5", 0))["approach"] if approach_results else None
         return jsonify({
             "cf_methods": cf_results,
             "best_cf_method": best_cf,
             "approaches": approach_results,
+            "best_approach": best_approach,
         })
     except Exception as e:
         return jsonify({"error": str(e)}), 500

recommender/evaluation.py CHANGED Viewed

@@ -7,6 +7,23 @@ class Evaluator:
     def __init__(self, ratings_df, predictions_df=None):
         self.ratings = ratings_df
         self.predictions = predictions_df
     def rmse(self, y_true, y_pred):
         return float(np.sqrt(mean_squared_error(y_true, y_pred)))
@@ -128,6 +145,7 @@ class Evaluator:
         }
     def compare_approaches(self, cf_instance, cb_instance, kb_instance, test_ratings, products_df, k=5):
         test_users = test_ratings["user_id"].unique()[:20]
         def cf_recommender(uid):

     def __init__(self, ratings_df, predictions_df=None):
         self.ratings = ratings_df
         self.predictions = predictions_df
+        self._test_ratings = None
+    def set_test_ratings(self, test_ratings):
+        self._test_ratings = test_ratings
+    def _get_relevant_for_user(self, user_id, rating_threshold=3.5):
+        if self._test_ratings is not None:
+            relevant = self._test_ratings[
+                (self._test_ratings["user_id"] == user_id) &
+                (self._test_ratings["rating"] >= rating_threshold)
+            ]["product_id"].tolist()
+        else:
+            relevant = self.ratings[
+                (self.ratings["user_id"] == user_id) &
+                (self.ratings["rating"] >= rating_threshold)
+            ]["product_id"].tolist()
+        return relevant
     def rmse(self, y_true, y_pred):
         return float(np.sqrt(mean_squared_error(y_true, y_pred)))
         }
     def compare_approaches(self, cf_instance, cb_instance, kb_instance, test_ratings, products_df, k=5):
+        self.set_test_ratings(test_ratings)
         test_users = test_ratings["user_id"].unique()[:20]
         def cf_recommender(uid):

templates/evaluation.html CHANGED Viewed

@@ -270,9 +270,11 @@
                     tbody.appendChild(tr);
                     return;
                 }
                 const tr = document.createElement('tr');
                 tr.innerHTML = `
-                    <td><strong>${row.approach}</strong></td>
                     <td>${row['Precision@5']?.toFixed(4) || 'N/A'}</td>
                     <td>${row['Recall@5']?.toFixed(4) || 'N/A'}</td>
                 `;
@@ -286,16 +288,25 @@
             const methodLabels = { user_based: 'User-Based', item_based: 'Item-Based', svd: 'SVD', knn: 'KNN', slope_one: 'Slope One' };
             if (data.best_cf_method) {
                 document.getElementById('analysisBestMethod').innerHTML =
-                    `<strong>${methodLabels[data.best_cf_method] || data.best_cf_method}</strong> achieves the lowest RMSE among all CF methods. ` +
                     `SVD (Matrix Factorization) typically performs best because it captures latent factors in the user-item interaction matrix, ` +
                     `handling sparsity better than memory-based methods like User-Based or Item-Based CF.`;
             }
-            document.getElementById('analysisBestApproach').innerHTML =
-                `<strong>Collaborative Filtering</strong> generally performs best when sufficient rating data exists for a user. ` +
-                `Content-Based works well for new items but suffers from overspecialization. ` +
-                `Knowledge-Based excels in cold-start scenarios and when users have explicit constraints.`;
             document.getElementById('analysisConditions').innerHTML = `
                 <b>• Dense user data:</b> Collaborative Filtering (leverages peer patterns)<br>

                     tbody.appendChild(tr);
                     return;
                 }
+                const isBest = data.best_approach && row.approach === data.best_approach;
                 const tr = document.createElement('tr');
+                if (isBest) tr.className = 'best-row';
                 tr.innerHTML = `
+                    <td><strong>${row.approach}</strong> ${isBest ? '<span class="badge-best">BEST</span>' : ''}</td>
                     <td>${row['Precision@5']?.toFixed(4) || 'N/A'}</td>
                     <td>${row['Recall@5']?.toFixed(4) || 'N/A'}</td>
                 `;
             const methodLabels = { user_based: 'User-Based', item_based: 'Item-Based', svd: 'SVD', knn: 'KNN', slope_one: 'Slope One' };
             if (data.best_cf_method) {
+                const best = data.cf_methods.find(r => r.method === data.best_cf_method);
+                const rmseVal = best ? best.RMSE.toFixed(4) : '—';
                 document.getElementById('analysisBestMethod').innerHTML =
+                    `<strong>${methodLabels[data.best_cf_method] || data.best_cf_method}</strong> achieves the lowest RMSE (${rmseVal}) among all CF methods. ` +
                     `SVD (Matrix Factorization) typically performs best because it captures latent factors in the user-item interaction matrix, ` +
                     `handling sparsity better than memory-based methods like User-Based or Item-Based CF.`;
             }
+            if (data.best_approach && data.approaches && data.approaches.length > 0) {
+                const best = data.approaches.find(a => a.approach === data.best_approach);
+                const precVal = best ? best['Precision@5'].toFixed(4) : '—';
+                document.getElementById('analysisBestApproach').innerHTML =
+                    `<strong>${data.best_approach}</strong> achieves the highest Precision@5 (${precVal}) on this dataset. ` +
+                    `Collaborative Filtering generally performs best when sufficient rating data exists. ` +
+                    `Content-Based works well for new items but suffers from overspecialization. ` +
+                    `Knowledge-Based excels in cold-start scenarios and when users have explicit constraints.`;
+            } else {
+                document.getElementById('analysisBestApproach').textContent = 'Run evaluation to compare approaches.';
+            }
             document.getElementById('analysisConditions').innerHTML = `
                 <b>• Dense user data:</b> Collaborative Filtering (leverages peer patterns)<br>