import re from collections import defaultdict from typing import Dict, List from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"} SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"} def _tokenize(text: str) -> set[str]: return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower())) def _contains_phrase(text: str, phrases: List[str]) -> bool: lowered = text.lower() return any(phrase.lower() in lowered for phrase in phrases) def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch: blob = " ".join([job.title, job.department, job.description, job.location]).lower() tokens = _tokenize(blob) skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools) overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens} skill_score = min(40.0, 8.0 * len(overlap)) role_score = 0.0 if profile.target_titles and _contains_phrase(job.title, profile.target_titles): role_score = 25.0 elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]): role_score = 12.0 entry_score = 0.0 title_lower = job.title.lower() if any(term in title_lower for term in ENTRY_LEVEL_TERMS): entry_score += 20.0 if any(term in title_lower for term in SENIOR_TERMS): entry_score -= 25.0 location_score = 0.0 if profile.locations and any(loc.lower() in blob for loc in profile.locations): location_score = 10.0 total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score)) reasons = [] if overlap: reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})") if role_score > 0: reasons.append("role alignment") if entry_score > 0: reasons.append("entry-level title") if entry_score < 0: reasons.append("senior-level penalty") if location_score > 0: reasons.append("location fit") if not reasons: reasons.append("limited overlap but still relevant board") return JobMatch( company=job.company, title=job.title, location=job.location, url=job.url, score=round(total, 2), explanation="; ".join(reasons), ats=job.ats, ) def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]: grouped: Dict[str, List[JobMatch]] = defaultdict(list) for match in matches: grouped[match.company].append(match) rankings: List[CompanyRanking] = [] for company, company_matches in grouped.items(): sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True) top = sorted_matches[:5] avg_top = sum(match.score for match in top) / len(top) rankings.append( CompanyRanking( company=company, company_score=round(avg_top, 2), match_count=len([m for m in company_matches if m.score >= 20]), best_role=top[0].title if top else "", ats=top[0].ats if top else "unknown", explanation=top[0].explanation if top else "", ) ) return sorted(rankings, key=lambda item: item.company_score, reverse=True)