ymlin105's picture
chore: remove obsolete files and update project structure
6ad997d
import sys
import os
sys.path.append(os.getcwd())
import pandas as pd
import numpy as np
import logging
from tqdm import tqdm
from src.services.recommend_service import RecommendationService
from src.data.stores.metadata_store import metadata_store
from src.core.diversity_metrics import compute_diversity_metrics
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def _get_category(isbn: str) -> str:
meta = metadata_store.get_book_metadata(str(isbn))
return (meta.get("simple_categories", "") or "Unknown").strip()
def evaluate_baseline(sample_n=1000):
logger.info("Initializing Evaluation...")
# 1. Load Test Data
test_df = pd.read_csv('data/rec/test.csv')
# Sample users
if sample_n and sample_n < len(test_df):
eval_df = test_df.sample(n=sample_n, random_state=42)
else:
eval_df = test_df
logger.info(f"Evaluating on {len(eval_df)} users...")
# 2. Init Service
service = RecommendationService()
service.load_resources()
# Load ISBN -> Title map for evaluation
isbn_to_title = {}
try:
books_df = pd.read_csv('data/books_processed.csv', usecols=['isbn13', 'title'])
books_df['isbn13'] = books_df['isbn13'].astype(str).str.replace(r'\.0$', '', regex=True)
isbn_to_title = pd.Series(books_df.title.values, index=books_df.isbn13.values).to_dict()
logger.info("Loaded ISBN-Title map for relaxed evaluation.")
except Exception as e:
logger.warning(f"Could not load books for evaluation: {e}")
# 3. Predict & Metric
k = 10
hits = 0
mrr_sum = 0.0
# P3: Diversity metrics (aggregate over all users)
diversity_cov_sum = 0.0
diversity_ilsd_sum = 0.0
diversity_count = 0
results = []
for idx, (_, row) in tqdm(enumerate(eval_df.iterrows()), total=len(eval_df), desc="Evaluating"):
user_id = row['user_id']
target_isbn = row['isbn']
# Get Recs
try:
# We disable favorite filtering for evaluation to handle potential data leakage in test set splits
recs = service.get_recommendations(user_id, top_k=50, filter_favorites=False)
# P3: Optional A/B test diversity: enable_diversity_rerank=True by default
if not recs:
if idx < 5:
logger.warning(f"Empty recs for user {user_id}")
continue
rec_isbns = [r[0] for r in recs]
# Check Hit
hit = False
rank = -1
# 1. Exact Match
if target_isbn in rec_isbns:
rank = rec_isbns.index(target_isbn)
hit = True
# 2. Relaxed Title Match (if Exact failed)
if not hit:
target_title = isbn_to_title.get(str(target_isbn), "").lower().strip()
if target_title:
for r_idx, r_isbn in enumerate(rec_isbns):
r_title = isbn_to_title.get(str(r_isbn), "").lower().strip()
if r_title and r_title == target_title:
rank = r_idx
hit = True
# logger.info(f"Title Match! Target: {target_isbn} ({target_title}) matches Rec: {r_isbn}")
break
# P3: Diversity metrics on top-10
if rec_isbns:
d = compute_diversity_metrics(rec_isbns, _get_category, top_k=10)
diversity_cov_sum += d["category_coverage"]
diversity_ilsd_sum += d["ilsd"]
diversity_count += 1
if hit:
# HR@10
if rank < 10:
hits += 1
# MRR (consider top 50)
# MRR@5 (Strict)
if (rank + 1) <= 5: # Check if rank is within top 5 (1-indexed)
mrr_sum += 1.0 / (rank + 1)
else:
if idx < 5:
logger.info(f"MISS USER {user_id}: Target {target_isbn} not in top {len(rec_isbns)} recs.")
logger.info(f"Top 5 Recs: {rec_isbns[:5]}")
logger.info(f"Type check - Target: {type(target_isbn)}, Recs: {type(rec_isbns[0]) if rec_isbns else 'N/A'}")
except Exception as e:
logger.error(f"Error for user {user_id}: {e}")
continue
# 4. Report
hr_10 = hits / len(eval_df)
mean_mrr = mrr_sum / len(eval_df)
div_n = max(diversity_count, 1)
logger.info("==============================")
logger.info(" EVALUATION RESULTS (Strict)")
logger.info("==============================")
logger.info(f"Users Evaluated: {len(eval_df)}")
logger.info(f"Hit Rate@10: {hr_10:.4f}")
logger.info(f"MRR@5: {mean_mrr:.4f}")
logger.info(f"P3 Category Coverage@10: {diversity_cov_sum / div_n:.4f}")
logger.info(f"P3 ILSD@10: {diversity_ilsd_sum / div_n:.4f}")
logger.info("==============================")
if __name__ == "__main__":
evaluate_baseline(sample_n=500) # Fast check