nsbecf / src /scoring /matcher.py
acarey5
build AI Career Fair Matcher
fa6caa6
import re
from collections import defaultdict
from typing import Dict, List
from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile
ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"}
SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"}
def _tokenize(text: str) -> set[str]:
return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower()))
def _contains_phrase(text: str, phrases: List[str]) -> bool:
lowered = text.lower()
return any(phrase.lower() in lowered for phrase in phrases)
def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch:
blob = " ".join([job.title, job.department, job.description, job.location]).lower()
tokens = _tokenize(blob)
skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools)
overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens}
skill_score = min(40.0, 8.0 * len(overlap))
role_score = 0.0
if profile.target_titles and _contains_phrase(job.title, profile.target_titles):
role_score = 25.0
elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]):
role_score = 12.0
entry_score = 0.0
title_lower = job.title.lower()
if any(term in title_lower for term in ENTRY_LEVEL_TERMS):
entry_score += 20.0
if any(term in title_lower for term in SENIOR_TERMS):
entry_score -= 25.0
location_score = 0.0
if profile.locations and any(loc.lower() in blob for loc in profile.locations):
location_score = 10.0
total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score))
reasons = []
if overlap:
reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})")
if role_score > 0:
reasons.append("role alignment")
if entry_score > 0:
reasons.append("entry-level title")
if entry_score < 0:
reasons.append("senior-level penalty")
if location_score > 0:
reasons.append("location fit")
if not reasons:
reasons.append("limited overlap but still relevant board")
return JobMatch(
company=job.company,
title=job.title,
location=job.location,
url=job.url,
score=round(total, 2),
explanation="; ".join(reasons),
ats=job.ats,
)
def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]:
grouped: Dict[str, List[JobMatch]] = defaultdict(list)
for match in matches:
grouped[match.company].append(match)
rankings: List[CompanyRanking] = []
for company, company_matches in grouped.items():
sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True)
top = sorted_matches[:5]
avg_top = sum(match.score for match in top) / len(top)
rankings.append(
CompanyRanking(
company=company,
company_score=round(avg_top, 2),
match_count=len([m for m in company_matches if m.score >= 20]),
best_role=top[0].title if top else "",
ats=top[0].ats if top else "unknown",
explanation=top[0].explanation if top else "",
)
)
return sorted(rankings, key=lambda item: item.company_score, reverse=True)