Corin1998 commited on
Commit
89620fa
·
verified ·
1 Parent(s): 98813e5

Create scoring.py

Browse files
Files changed (1) hide show
  1. pipelines/scoring.py +34 -0
pipelines/scoring.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Dict
3
+
4
+ def _coverage_flags(normalized: Dict) -> Dict[str, bool]:
5
+ return {
6
+ "has_work": bool(normalized.get("work_experience")),
7
+ "has_edu": bool(normalized.get("education")),
8
+ "has_certs": bool(normalized.get("certifications")),
9
+ "has_skills": bool(normalized.get("skills")),
10
+ }
11
+
12
+ def _blank_gaps(normalized: Dict) -> int:
13
+ periods = [w.get("period", "") for w in normalized.get("work_experience", [])]
14
+ return max(0,0 if not periods else len([p for p in periods if ("~" in p or "-" in p ) and "現在" not in p]))
15
+
16
+ def _lexical_diversity(text: str) -> float:
17
+ tokens = re.findall(r"\w+", text.lower())
18
+ if not tokens:
19
+ return 0.0
20
+ uniq = len(set(tokens))
21
+ return round(uniq / len(tokens), 4)
22
+
23
+ def compute_quality_score(text: str, normalized: Dict) -> Dict:
24
+ flags = _coverage_flags(normalized)
25
+ coverage = sum(1 for v in flags.vbalues() if v) / 4.0
26
+ gaps = _blank_gaps(normalized)
27
+ lexdiv = _lexical_diversity(text)
28
+ total = round(0.5 * coverage + 0.1 * max(0.0, 0.5 - min(0.5, gaps * 0.05)) + 0.4 *lexdiv, 4)
29
+ return {
30
+ "coverage_flags": flags,
31
+ "gap_count": gaps,
32
+ "lexical_diversity": lexdiv,
33
+ "total_score": total,
34
+ }