Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s") | |
| log = logging.getLogger(__name__) | |
| def precision_at_k(recommended, relevant, k): | |
| if k == 0 or len(relevant) == 0: | |
| return 0.0 | |
| hits = sum(1 for item in recommended[:k] if item in relevant) | |
| return hits / k | |
| def recall_at_k(recommended, relevant, k): | |
| if k == 0 or len(relevant) == 0: | |
| return 0.0 | |
| hits = sum(1 for item in recommended[:k] if item in relevant) | |
| return hits / len(relevant) | |
| def ndcg_at_k(recommended, relevant, k): | |
| if k == 0 or len(relevant) == 0: | |
| return 0.0 | |
| dcg = sum( | |
| 1.0 / np.log2(i + 2) | |
| for i, item in enumerate(recommended[:k]) | |
| if item in relevant | |
| ) | |
| n_relevant = min(len(relevant), k) | |
| idcg = sum(1.0 / np.log2(i + 2) for i in range(n_relevant)) | |
| return dcg / idcg if idcg > 0 else 0.0 | |
| def hit_rate_at_k(recommended, relevant, k): | |
| return float(any(item in relevant for item in recommended[:k])) | |
| def evaluate_model(model, test_df, train_df, k_values=None, n_users=None): | |
| if k_values is None: | |
| k_values = [5, 10, 20] | |
| max_k = max(k_values) | |
| train_users = set(train_df["user_idx"].unique()) | |
| test_users = test_df[test_df["user_idx"].isin(train_users)]["user_idx"].unique() | |
| if n_users is not None: | |
| test_users = test_users[:n_users] | |
| log.info(f"Evaluating {len(test_users):,} users at K={k_values}") | |
| ground_truth = ( | |
| test_df[test_df["user_idx"].isin(test_users)] | |
| .groupby("user_idx")["item_idx"] | |
| .apply(set).to_dict() | |
| ) | |
| results = {k: {"precision": [], "recall": [], "ndcg": [], "hit_rate": []} | |
| for k in k_values} | |
| skipped = 0 | |
| for user_idx in test_users: | |
| relevant = ground_truth.get(user_idx, set()) | |
| if not relevant: | |
| skipped += 1 | |
| continue | |
| try: | |
| recs = model.recommend(user_idx, k=max_k) | |
| except Exception: | |
| skipped += 1 | |
| continue | |
| for k in k_values: | |
| results[k]["precision"].append(precision_at_k(recs, relevant, k)) | |
| results[k]["recall"].append(recall_at_k(recs, relevant, k)) | |
| results[k]["ndcg"].append(ndcg_at_k(recs, relevant, k)) | |
| results[k]["hit_rate"].append(hit_rate_at_k(recs, relevant, k)) | |
| if skipped > 0: | |
| log.warning(f"Skipped {skipped} users") | |
| summary = {} | |
| for k in k_values: | |
| summary[f"precision@{k}"] = float(np.mean(results[k]["precision"])) | |
| summary[f"recall@{k}"] = float(np.mean(results[k]["recall"])) | |
| summary[f"ndcg@{k}"] = float(np.mean(results[k]["ndcg"])) | |
| summary[f"hit_rate@{k}"] = float(np.mean(results[k]["hit_rate"])) | |
| summary["n_users_evaluated"] = int(len(test_users) - skipped) | |
| return summary | |
| def catalog_coverage(model, test_users, n_items, k=10): | |
| recommended_items = set() | |
| for u in test_users: | |
| try: | |
| recs = model.recommend(u, k=k) | |
| recommended_items.update(recs) | |
| except Exception: | |
| continue | |
| return len(recommended_items) / n_items | |
| def intra_list_diversity(model, test_users, item_feats, k=10, sample=200): | |
| feat_map = item_feats.set_index("item_idx")["popularity_score"].to_dict() | |
| diversities = [] | |
| for u in list(test_users)[:sample]: | |
| try: | |
| recs = model.recommend(u, k=k) | |
| except Exception: | |
| continue | |
| scores = np.array([feat_map.get(i, 0.0) for i in recs]) | |
| if len(scores) < 2: | |
| continue | |
| diffs = np.abs(scores[:, None] - scores[None, :]) | |
| div = diffs[np.triu_indices(len(scores), k=1)].mean() | |
| diversities.append(div) | |
| return float(np.mean(diversities)) if diversities else 0.0 | |