File size: 580 Bytes
7eef73f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# utils.py
from typing import Dict, Set

def text_blob(b: Dict) -> str:
    parts = [
        b.get("title", ""),
        b.get("authors", ""),
        b.get("categories", ""),
        b.get("subjects", ""),
        b.get("description", "")
    ]
    return " | ".join(p for p in parts if p)

def _to_tagset(s: str) -> Set[str]:
    return {t.strip().lower() for t in (s or "").split(";") if t.strip()}

def jaccard(a: str, b: str) -> float:
    A, B = _to_tagset(a), _to_tagset(b)
    if not A or not B:
        return 0.0
    return len(A & B) / len(A | B)