import pandas as pd import requests import random import argparse import sys from tqdm import tqdm from pathlib import Path sys.path.append(str(Path(__file__).parent.parent)) from src.personalization.config import settings CATALOG_PATH = Path("data/catalog/books_catalog.csv") NUM_SAMPLES = 100 def main(): parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default=settings.HOST) parser.add_argument("--port", type=int, default=settings.PORT) parser.add_argument("--samples", type=int, default=100, help="Number of evaluation queries") args = parser.parse_args() api_url = f"http://{args.host}:{args.port}/personalize/recommend" print("Loading catalog for ground truth...") if not CATALOG_PATH.exists(): print("Catalog not found!") return df = pd.read_csv(CATALOG_PATH) author_counts = df['authors'].value_counts() valid_authors = author_counts[author_counts >= 5].index.tolist() print(f"Found {len(valid_authors)} authors with 5+ books.") hits = 0 genre_matches = 0 total_recs = 0 print(f"Running {args.samples} evaluation queries against {api_url}...") for _ in tqdm(range(args.samples)): author = random.choice(valid_authors) books = df[df['authors'] == author] if len(books) < 5: continue sample = books.sample(n=4, replace=False) history = sample.iloc[:3]['title'].tolist() target_book = sample.iloc[3] target_title = target_book['title'] try: payload = {"user_history": history, "top_k": 10} resp = requests.post(api_url, json=payload) if resp.status_code != 200: continue recs = resp.json() rec_titles = [r['title'] for r in recs] if target_title in rec_titles: hits += 1 rec_authors = df[df['title'].isin(rec_titles)]['authors'].tolist() if author in rec_authors: matches = rec_authors.count(author) genre_matches += matches total_recs += len(recs) except Exception as e: print(f"Connection Error: {e}") break if total_recs > 0: print("\n--- Evaluation Results ---") print(f"Exact Target Hit Rate @ 10: {hits / args.samples:.2%}") print(f"Same Author Relevance: {genre_matches / total_recs:.2%} (Approx)") else: print("No results obtained. Check API connection.") if __name__ == "__main__": main()