File size: 2,716 Bytes
7964128 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | import pandas as pd
import requests
import random
import argparse
import sys
from tqdm import tqdm
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent))
from src.personalization.config import settings
CATALOG_PATH = Path("data/catalog/books_catalog.csv")
NUM_SAMPLES = 100
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default=settings.HOST)
parser.add_argument("--port", type=int, default=settings.PORT)
parser.add_argument("--samples", type=int, default=100, help="Number of evaluation queries")
args = parser.parse_args()
api_url = f"http://{args.host}:{args.port}/personalize/recommend"
print("Loading catalog for ground truth...")
if not CATALOG_PATH.exists():
print("Catalog not found!")
return
df = pd.read_csv(CATALOG_PATH)
author_counts = df['authors'].value_counts()
valid_authors = author_counts[author_counts >= 5].index.tolist()
print(f"Found {len(valid_authors)} authors with 5+ books.")
hits = 0
genre_matches = 0
total_recs = 0
print(f"Running {args.samples} evaluation queries against {api_url}...")
for _ in tqdm(range(args.samples)):
author = random.choice(valid_authors)
books = df[df['authors'] == author]
if len(books) < 5:
continue
sample = books.sample(n=4, replace=False)
history = sample.iloc[:3]['title'].tolist()
target_book = sample.iloc[3]
target_title = target_book['title']
try:
payload = {"user_history": history, "top_k": 10}
resp = requests.post(api_url, json=payload)
if resp.status_code != 200:
continue
recs = resp.json()
rec_titles = [r['title'] for r in recs]
if target_title in rec_titles:
hits += 1
rec_authors = df[df['title'].isin(rec_titles)]['authors'].tolist()
if author in rec_authors:
matches = rec_authors.count(author)
genre_matches += matches
total_recs += len(recs)
except Exception as e:
print(f"Connection Error: {e}")
break
if total_recs > 0:
print("\n--- Evaluation Results ---")
print(f"Exact Target Hit Rate @ 10: {hits / args.samples:.2%}")
print(f"Same Author Relevance: {genre_matches / total_recs:.2%} (Approx)")
else:
print("No results obtained. Check API connection.")
if __name__ == "__main__":
main() |