Spaces:

chirag1121
/

Resume_Screening_Model

Sleeping

App Files Files Community

Resume_Screening_Model / utils /scorer.py

chirag1121

Update utils/scorer.py

59d43ff verified about 2 months ago

raw

history blame contribute delete

6.12 kB

	"""
	scorer.py — Resume scoring module.

	Computes:
	1. Resume Base Score (0–100) based on resume content analysis
	2. ATS Score (0–100) combining base score + job match similarity

	Scoring rubric (Base Score):
	- Skills richness : up to 20 pts
	- Experience section: up to 30 pts
	- Projects section : up to 20 pts
	- Education section : up to 10 pts
	- Resume length : up to 10 pts
	- Skill diversity : up to 10 pts
	TOTAL : 100 pts
	"""

	import math


	def compute_base_score(
	text: str,
	sections: dict,
	skills: dict,
	) -> dict:
	"""
	Compute the resume base score from its content.

	Args:
	text : full resume text
	sections: output of nlp_utils.detect_sections()
	skills : output of nlp_utils.extract_skills()

	Returns:
	dict with:
	'total' : overall score (0–100)
	'breakdown' : per-category score dict
	"""
	breakdown = {}

	# ── 1. Skills richness (0–20) ─────────────────────────────────────────
	tech_count = len(skills.get("technical", []))
	# 0 skills → 0, 5 skills → 10, 10+ skills → 20
	skills_score = min(20, int((tech_count / 10) * 20))
	breakdown["Skills"] = skills_score

	# ── 2. Experience section (0–30) ──────────────────────────────────────
	if sections.get("experience"):
	# More experience-related content = higher score
	exp_text = _extract_section_text(text, ["experience", "employment", "work history"])
	exp_words = len(exp_text.split())
	# 0 words = 0, 100+ words = 30
	exp_score = min(30, int((exp_words / 100) * 30))
	exp_score = max(exp_score, 10 if sections.get("experience") else 0)
	else:
	exp_score = 0
	breakdown["Experience"] = exp_score

	# ── 3. Projects section (0–20) ────────────────────────────────────────
	if sections.get("projects"):
	proj_text = _extract_section_text(text, ["project"])
	proj_words = len(proj_text.split())
	proj_score = min(20, int((proj_words / 60) * 20))
	proj_score = max(proj_score, 8 if sections.get("projects") else 0)
	else:
	proj_score = 0
	breakdown["Projects"] = proj_score

	# ── 4. Education section (0–10) ───────────────────────────────────────
	breakdown["Education"] = 10 if sections.get("education") else 0

	# ── 5. Resume length (0–10) ───────────────────────────────────────────
	word_count = len(text.split())
	# Ideal range: 300–700 words
	if word_count >= 700:
	length_score = 10
	elif word_count >= 300:
	length_score = int(5 + ((word_count - 300) / 400) * 5)
	elif word_count >= 100:
	length_score = int((word_count / 300) * 5)
	else:
	length_score = 0
	breakdown["Length"] = length_score

	# ── 6. Skill diversity (0–10) ─────────────────────────────────────────
	# Reward having both technical AND soft skills
	has_tech = len(skills.get("technical", [])) >= 3
	has_soft = len(skills.get("soft", [])) >= 1
	has_summary = sections.get("summary", False)
	diversity_score = sum([has_tech * 5, has_soft * 3, has_summary * 2])
	breakdown["Diversity"] = min(10, diversity_score)

	total = sum(breakdown.values())

	return {
	"total": min(100, total),
	"breakdown": breakdown,
	}


	def compute_ats_score(base_score: float, job_match_score: float) -> float:
	"""
	Compute final ATS score.

	Formula: ATS = 0.6 × base_score + 0.4 × job_match_score
	Capped at 100.

	Args:
	base_score : resume base score (0–100)
	job_match_score: job description match percentage (0–100)

	Returns:
	ATS score as a float (0–100), rounded to 1 decimal place.
	"""
	ats = (0.6 * base_score) + (0.4 * job_match_score)
	return round(min(100.0, ats), 1)


	# ---------------------------------------------------------------------------
	# Internal helpers
	# ---------------------------------------------------------------------------

	def _extract_section_text(text: str, keywords: list) -> str:
	"""
	Attempt to extract the content under a section heading.

	Searches for lines containing any of the keywords and returns
	all text until the next section-like heading.

	Args:
	text : full resume text
	keywords: list of lowercase keywords to identify the section heading

	Returns:
	Extracted section text (may be empty string).
	"""
	lines = text.splitlines()
	in_section = False
	collected = []

	# Common heading indicators (short, possibly title-cased lines)
	def _is_heading(line: str) -> bool:
	stripped = line.strip()
	return (
	len(stripped) < 60
	and stripped
	and stripped == stripped.upper()
	or any(
	kw in stripped.lower()
	for kw in [
	"skills", "education", "experience", "project",
	"certification", "summary", "objective", "awards",
	"contact", "languages", "interests",
	]
	)
	)

	for line in lines:
	line_lower = line.lower().strip()
	if any(kw in line_lower for kw in keywords) and len(line.strip()) < 60:
	in_section = True
	continue
	if in_section:
	# Stop collecting at the next major heading
	if _is_heading(line) and not any(kw in line.lower() for kw in keywords):
	break
	collected.append(line)

	return " ".join(collected)