| # utils.py | |
| from typing import Dict, Set | |
| def text_blob(b: Dict) -> str: | |
| parts = [ | |
| b.get("title", ""), | |
| b.get("authors", ""), | |
| b.get("categories", ""), | |
| b.get("subjects", ""), | |
| b.get("description", "") | |
| ] | |
| return " | ".join(p for p in parts if p) | |
| def _to_tagset(s: str) -> Set[str]: | |
| return {t.strip().lower() for t in (s or "").split(";") if t.strip()} | |
| def jaccard(a: str, b: str) -> float: | |
| A, B = _to_tagset(a), _to_tagset(b) | |
| if not A or not B: | |
| return 0.0 | |
| return len(A & B) / len(A | B) | |