Sairii commited on
Commit
7eef73f
·
verified ·
1 Parent(s): 59277a9

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +21 -0
utils.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils.py
2
+ from typing import Dict, Set
3
+
4
+ def text_blob(b: Dict) -> str:
5
+ parts = [
6
+ b.get("title", ""),
7
+ b.get("authors", ""),
8
+ b.get("categories", ""),
9
+ b.get("subjects", ""),
10
+ b.get("description", "")
11
+ ]
12
+ return " | ".join(p for p in parts if p)
13
+
14
+ def _to_tagset(s: str) -> Set[str]:
15
+ return {t.strip().lower() for t in (s or "").split(";") if t.strip()}
16
+
17
+ def jaccard(a: str, b: str) -> float:
18
+ A, B = _to_tagset(a), _to_tagset(b)
19
+ if not A or not B:
20
+ return 0.0
21
+ return len(A & B) / len(A | B)