Spaces:
Sleeping
Sleeping
| import re | |
| from collections import defaultdict | |
| from typing import Dict, List | |
| from src.models import CompanyRanking, JobMatch, JobPosting, ResumeProfile | |
| ENTRY_LEVEL_TERMS = {"intern", "internship", "entry", "junior", "new grad", "associate", "graduate"} | |
| SENIOR_TERMS = {"senior", "staff", "principal", "lead", "manager", "director", "architect"} | |
| def _tokenize(text: str) -> set[str]: | |
| return set(re.findall(r"[a-zA-Z0-9\+#\.]+", text.lower())) | |
| def _contains_phrase(text: str, phrases: List[str]) -> bool: | |
| lowered = text.lower() | |
| return any(phrase.lower() in lowered for phrase in phrases) | |
| def score_job_match(job: JobPosting, profile: ResumeProfile) -> JobMatch: | |
| blob = " ".join([job.title, job.department, job.description, job.location]).lower() | |
| tokens = _tokenize(blob) | |
| skill_pool = set(profile.skills + profile.languages + profile.frameworks + profile.tools) | |
| overlap = {item for item in skill_pool if item.lower() in blob or item.lower() in tokens} | |
| skill_score = min(40.0, 8.0 * len(overlap)) | |
| role_score = 0.0 | |
| if profile.target_titles and _contains_phrase(job.title, profile.target_titles): | |
| role_score = 25.0 | |
| elif not profile.target_titles and _contains_phrase(job.title, ["engineer", "analyst", "developer", "scientist"]): | |
| role_score = 12.0 | |
| entry_score = 0.0 | |
| title_lower = job.title.lower() | |
| if any(term in title_lower for term in ENTRY_LEVEL_TERMS): | |
| entry_score += 20.0 | |
| if any(term in title_lower for term in SENIOR_TERMS): | |
| entry_score -= 25.0 | |
| location_score = 0.0 | |
| if profile.locations and any(loc.lower() in blob for loc in profile.locations): | |
| location_score = 10.0 | |
| total = max(0.0, min(100.0, skill_score + role_score + entry_score + location_score)) | |
| reasons = [] | |
| if overlap: | |
| reasons.append(f"skill overlap ({', '.join(sorted(overlap)[:4])})") | |
| if role_score > 0: | |
| reasons.append("role alignment") | |
| if entry_score > 0: | |
| reasons.append("entry-level title") | |
| if entry_score < 0: | |
| reasons.append("senior-level penalty") | |
| if location_score > 0: | |
| reasons.append("location fit") | |
| if not reasons: | |
| reasons.append("limited overlap but still relevant board") | |
| return JobMatch( | |
| company=job.company, | |
| title=job.title, | |
| location=job.location, | |
| url=job.url, | |
| score=round(total, 2), | |
| explanation="; ".join(reasons), | |
| ats=job.ats, | |
| ) | |
| def rank_companies(matches: List[JobMatch]) -> List[CompanyRanking]: | |
| grouped: Dict[str, List[JobMatch]] = defaultdict(list) | |
| for match in matches: | |
| grouped[match.company].append(match) | |
| rankings: List[CompanyRanking] = [] | |
| for company, company_matches in grouped.items(): | |
| sorted_matches = sorted(company_matches, key=lambda m: m.score, reverse=True) | |
| top = sorted_matches[:5] | |
| avg_top = sum(match.score for match in top) / len(top) | |
| rankings.append( | |
| CompanyRanking( | |
| company=company, | |
| company_score=round(avg_top, 2), | |
| match_count=len([m for m in company_matches if m.score >= 20]), | |
| best_role=top[0].title if top else "", | |
| ats=top[0].ats if top else "unknown", | |
| explanation=top[0].explanation if top else "", | |
| ) | |
| ) | |
| return sorted(rankings, key=lambda item: item.company_score, reverse=True) | |