chirag1121's picture
Update utils/scorer.py
59d43ff verified
"""
scorer.py β€” Resume scoring module.
Computes:
1. Resume Base Score (0–100) based on resume content analysis
2. ATS Score (0–100) combining base score + job match similarity
Scoring rubric (Base Score):
- Skills richness : up to 20 pts
- Experience section: up to 30 pts
- Projects section : up to 20 pts
- Education section : up to 10 pts
- Resume length : up to 10 pts
- Skill diversity : up to 10 pts
TOTAL : 100 pts
"""
import math
def compute_base_score(
text: str,
sections: dict,
skills: dict,
) -> dict:
"""
Compute the resume base score from its content.
Args:
text : full resume text
sections: output of nlp_utils.detect_sections()
skills : output of nlp_utils.extract_skills()
Returns:
dict with:
'total' : overall score (0–100)
'breakdown' : per-category score dict
"""
breakdown = {}
# ── 1. Skills richness (0–20) ─────────────────────────────────────────
tech_count = len(skills.get("technical", []))
# 0 skills β†’ 0, 5 skills β†’ 10, 10+ skills β†’ 20
skills_score = min(20, int((tech_count / 10) * 20))
breakdown["Skills"] = skills_score
# ── 2. Experience section (0–30) ──────────────────────────────────────
if sections.get("experience"):
# More experience-related content = higher score
exp_text = _extract_section_text(text, ["experience", "employment", "work history"])
exp_words = len(exp_text.split())
# 0 words = 0, 100+ words = 30
exp_score = min(30, int((exp_words / 100) * 30))
exp_score = max(exp_score, 10 if sections.get("experience") else 0)
else:
exp_score = 0
breakdown["Experience"] = exp_score
# ── 3. Projects section (0–20) ────────────────────────────────────────
if sections.get("projects"):
proj_text = _extract_section_text(text, ["project"])
proj_words = len(proj_text.split())
proj_score = min(20, int((proj_words / 60) * 20))
proj_score = max(proj_score, 8 if sections.get("projects") else 0)
else:
proj_score = 0
breakdown["Projects"] = proj_score
# ── 4. Education section (0–10) ───────────────────────────────────────
breakdown["Education"] = 10 if sections.get("education") else 0
# ── 5. Resume length (0–10) ───────────────────────────────────────────
word_count = len(text.split())
# Ideal range: 300–700 words
if word_count >= 700:
length_score = 10
elif word_count >= 300:
length_score = int(5 + ((word_count - 300) / 400) * 5)
elif word_count >= 100:
length_score = int((word_count / 300) * 5)
else:
length_score = 0
breakdown["Length"] = length_score
# ── 6. Skill diversity (0–10) ─────────────────────────────────────────
# Reward having both technical AND soft skills
has_tech = len(skills.get("technical", [])) >= 3
has_soft = len(skills.get("soft", [])) >= 1
has_summary = sections.get("summary", False)
diversity_score = sum([has_tech * 5, has_soft * 3, has_summary * 2])
breakdown["Diversity"] = min(10, diversity_score)
total = sum(breakdown.values())
return {
"total": min(100, total),
"breakdown": breakdown,
}
def compute_ats_score(base_score: float, job_match_score: float) -> float:
"""
Compute final ATS score.
Formula: ATS = 0.6 Γ— base_score + 0.4 Γ— job_match_score
Capped at 100.
Args:
base_score : resume base score (0–100)
job_match_score: job description match percentage (0–100)
Returns:
ATS score as a float (0–100), rounded to 1 decimal place.
"""
ats = (0.6 * base_score) + (0.4 * job_match_score)
return round(min(100.0, ats), 1)
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _extract_section_text(text: str, keywords: list) -> str:
"""
Attempt to extract the content under a section heading.
Searches for lines containing any of the keywords and returns
all text until the next section-like heading.
Args:
text : full resume text
keywords: list of lowercase keywords to identify the section heading
Returns:
Extracted section text (may be empty string).
"""
lines = text.splitlines()
in_section = False
collected = []
# Common heading indicators (short, possibly title-cased lines)
def _is_heading(line: str) -> bool:
stripped = line.strip()
return (
len(stripped) < 60
and stripped
and stripped == stripped.upper()
or any(
kw in stripped.lower()
for kw in [
"skills", "education", "experience", "project",
"certification", "summary", "objective", "awards",
"contact", "languages", "interests",
]
)
)
for line in lines:
line_lower = line.lower().strip()
if any(kw in line_lower for kw in keywords) and len(line.strip()) < 60:
in_section = True
continue
if in_section:
# Stop collecting at the next major heading
if _is_heading(line) and not any(kw in line.lower() for kw in keywords):
break
collected.append(line)
return " ".join(collected)