recsys-ecommerce / src /evaluator.py
dscsdvdfsvs's picture
fix: upload src folder with model classes
80843b0 verified
import numpy as np
import pandas as pd
import logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")
log = logging.getLogger(__name__)
def precision_at_k(recommended, relevant, k):
if k == 0 or len(relevant) == 0:
return 0.0
hits = sum(1 for item in recommended[:k] if item in relevant)
return hits / k
def recall_at_k(recommended, relevant, k):
if k == 0 or len(relevant) == 0:
return 0.0
hits = sum(1 for item in recommended[:k] if item in relevant)
return hits / len(relevant)
def ndcg_at_k(recommended, relevant, k):
if k == 0 or len(relevant) == 0:
return 0.0
dcg = sum(
1.0 / np.log2(i + 2)
for i, item in enumerate(recommended[:k])
if item in relevant
)
n_relevant = min(len(relevant), k)
idcg = sum(1.0 / np.log2(i + 2) for i in range(n_relevant))
return dcg / idcg if idcg > 0 else 0.0
def hit_rate_at_k(recommended, relevant, k):
return float(any(item in relevant for item in recommended[:k]))
def evaluate_model(model, test_df, train_df, k_values=None, n_users=None):
if k_values is None:
k_values = [5, 10, 20]
max_k = max(k_values)
train_users = set(train_df["user_idx"].unique())
test_users = test_df[test_df["user_idx"].isin(train_users)]["user_idx"].unique()
if n_users is not None:
test_users = test_users[:n_users]
log.info(f"Evaluating {len(test_users):,} users at K={k_values}")
ground_truth = (
test_df[test_df["user_idx"].isin(test_users)]
.groupby("user_idx")["item_idx"]
.apply(set).to_dict()
)
results = {k: {"precision": [], "recall": [], "ndcg": [], "hit_rate": []}
for k in k_values}
skipped = 0
for user_idx in test_users:
relevant = ground_truth.get(user_idx, set())
if not relevant:
skipped += 1
continue
try:
recs = model.recommend(user_idx, k=max_k)
except Exception:
skipped += 1
continue
for k in k_values:
results[k]["precision"].append(precision_at_k(recs, relevant, k))
results[k]["recall"].append(recall_at_k(recs, relevant, k))
results[k]["ndcg"].append(ndcg_at_k(recs, relevant, k))
results[k]["hit_rate"].append(hit_rate_at_k(recs, relevant, k))
if skipped > 0:
log.warning(f"Skipped {skipped} users")
summary = {}
for k in k_values:
summary[f"precision@{k}"] = float(np.mean(results[k]["precision"]))
summary[f"recall@{k}"] = float(np.mean(results[k]["recall"]))
summary[f"ndcg@{k}"] = float(np.mean(results[k]["ndcg"]))
summary[f"hit_rate@{k}"] = float(np.mean(results[k]["hit_rate"]))
summary["n_users_evaluated"] = int(len(test_users) - skipped)
return summary
def catalog_coverage(model, test_users, n_items, k=10):
recommended_items = set()
for u in test_users:
try:
recs = model.recommend(u, k=k)
recommended_items.update(recs)
except Exception:
continue
return len(recommended_items) / n_items
def intra_list_diversity(model, test_users, item_feats, k=10, sample=200):
feat_map = item_feats.set_index("item_idx")["popularity_score"].to_dict()
diversities = []
for u in list(test_users)[:sample]:
try:
recs = model.recommend(u, k=k)
except Exception:
continue
scores = np.array([feat_map.get(i, 0.0) for i in recs])
if len(scores) < 2:
continue
diffs = np.abs(scores[:, None] - scores[None, :])
div = diffs[np.triu_indices(len(scores), k=1)].mean()
diversities.append(div)
return float(np.mean(diversities)) if diversities else 0.0