# utils.py from typing import Dict, Set def text_blob(b: Dict) -> str: parts = [ b.get("title", ""), b.get("authors", ""), b.get("categories", ""), b.get("subjects", ""), b.get("description", "") ] return " | ".join(p for p in parts if p) def _to_tagset(s: str) -> Set[str]: return {t.strip().lower() for t in (s or "").split(";") if t.strip()} def jaccard(a: str, b: str) -> float: A, B = _to_tagset(a), _to_tagset(b) if not A or not B: return 0.0 return len(A & B) / len(A | B)