File size: 580 Bytes
7eef73f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | # utils.py
from typing import Dict, Set
def text_blob(b: Dict) -> str:
parts = [
b.get("title", ""),
b.get("authors", ""),
b.get("categories", ""),
b.get("subjects", ""),
b.get("description", "")
]
return " | ".join(p for p in parts if p)
def _to_tagset(s: str) -> Set[str]:
return {t.strip().lower() for t in (s or "").split(";") if t.strip()}
def jaccard(a: str, b: str) -> float:
A, B = _to_tagset(a), _to_tagset(b)
if not A or not B:
return 0.0
return len(A & B) / len(A | B)
|