| import re |
| import math |
| from typing import Any |
|
|
|
|
| SENIORITY_MAP = { |
| "intern": 0, "trainee": 0, "junior": 1, "associate": 1, |
| "mid": 2, "senior": 3, "lead": 4, "staff": 4, |
| "principal": 5, "architect": 5, "manager": 4, "director": 6, "vp": 7, "cto": 8, |
| } |
|
|
| TIER1_EDU = {"iit", "iim", "nit", "bits", "iiit", "mit", "stanford", "cmu", "berkeley"} |
|
|
|
|
| def build_candidate_text(candidate: dict[str, Any]) -> str: |
| parts = [] |
| if candidate.get("parsed_summary"): |
| parts.append(candidate["parsed_summary"]) |
| if candidate.get("parsed_skills"): |
| parts.append(f"Skills: {candidate['parsed_skills']}") |
| langs = candidate.get("programming_languages") or [] |
| if langs: |
| parts.append(f"Languages: {', '.join(langs)}") |
| frameworks = (candidate.get("backend_frameworks") or []) + (candidate.get("frontend_technologies") or []) |
| if frameworks: |
| parts.append(f"Frameworks: {', '.join(frameworks)}") |
| work_exp = candidate.get("parsed_work_experience") or [] |
| for we in work_exp[:3]: |
| if isinstance(we, dict): |
| desc = we.get("description") or we.get("role") or "" |
| company = we.get("company") or "" |
| if desc or company: |
| parts.append(f"{company}: {desc}".strip(": ")) |
| if candidate.get("most_recent_company_description"): |
| parts.append(candidate["most_recent_company_description"]) |
| return " | ".join(filter(None, parts)) |
|
|
|
|
| def _parse_duration_months(entry: dict) -> float: |
| duration = entry.get("duration") or entry.get("tenure") or "" |
| if not duration: |
| return 12.0 |
| years = re.findall(r"(\d+\.?\d*)\s*(?:year|yr)", duration, re.IGNORECASE) |
| months = re.findall(r"(\d+\.?\d*)\s*(?:month|mo)", duration, re.IGNORECASE) |
| total = sum(float(y) * 12 for y in years) + sum(float(m) for m in months) |
| return total if total > 0 else 12.0 |
|
|
|
|
| def _extract_seniority(title: str) -> int: |
| title_lower = title.lower() |
| for key, val in sorted(SENIORITY_MAP.items(), key=lambda x: -x[1]): |
| if key in title_lower: |
| return val |
| return 2 |
|
|
|
|
| def compute_growth_velocity(work_experience: list[dict], is_funded: bool = False) -> float: |
| import json as _json |
|
|
| |
| if isinstance(work_experience, str): |
| try: |
| work_experience = _json.loads(work_experience) |
| except Exception: |
| work_experience = [] |
|
|
| |
| valid_entries = [e for e in (work_experience or []) if isinstance(e, dict) and (e.get("title") or e.get("role"))] |
|
|
| if len(valid_entries) < 2: |
| |
| |
| base = 0.6 if is_funded else 0.5 |
| return base |
|
|
| entries = sorted(valid_entries, key=lambda x: x.get("start_date", "") or "") |
| seniority_levels = [] |
| total_months = 0.0 |
|
|
| for entry in entries: |
| title = entry.get("title") or entry.get("role") or "" |
| seniority_levels.append(_extract_seniority(title)) |
| total_months += _parse_duration_months(entry) |
|
|
| if len(seniority_levels) < 2: |
| return 0.5 |
|
|
| seniority_gain = seniority_levels[-1] - seniority_levels[0] |
| years_elapsed = max(total_months / 12, 0.5) |
| velocity = seniority_gain / years_elapsed |
|
|
| normalized = min(max((velocity + 1) / 3, 0.0), 1.0) |
|
|
| if is_funded: |
| normalized = min(normalized + 0.1, 1.0) |
|
|
| return round(normalized, 4) |
|
|
|
|
| def skill_jaccard(jd_skills: list[str], candidate_skills: list[str]) -> float: |
| if not jd_skills: |
| return 0.5 |
| jd_set = {s.lower().strip() for s in jd_skills if s} |
| cand_set = {s.lower().strip() for s in candidate_skills if s} |
| if not cand_set: |
| return 0.0 |
| intersection = jd_set & cand_set |
| union = jd_set | cand_set |
| return len(intersection) / len(union) if union else 0.0 |
|
|
|
|
| def yoe_match(min_yoe: float | None, max_yoe: float | None, candidate_yoe: float | None) -> float: |
| if candidate_yoe is None: |
| return 0.5 |
| if min_yoe is None and max_yoe is None: |
| return 0.7 |
| candidate_yoe = float(candidate_yoe) |
| if min_yoe is not None and candidate_yoe < min_yoe: |
| gap = min_yoe - candidate_yoe |
| return max(0.0, 1.0 - gap * 0.2) |
| if max_yoe is not None and candidate_yoe > max_yoe + 3: |
| return 0.7 |
| return 1.0 |
|
|
|
|
| def company_quality_signal(candidate: dict[str, Any]) -> float: |
| score = 0.5 |
| if candidate.get("most_recent_company_is_product_company"): |
| score += 0.2 |
| if candidate.get("most_recent_company_is_funded"): |
| score += 0.15 |
| funding = candidate.get("most_recent_company_total_funding") or 0 |
| if funding > 10_000_000: |
| score += 0.1 |
| if funding > 100_000_000: |
| score += 0.05 |
| return min(score, 1.0) |
|
|
|
|
| def education_match(candidate: dict[str, Any]) -> float: |
| degree = (candidate.get("degree") or "").lower() |
| status = (candidate.get("education_status") or "").lower() |
| score = 0.5 |
| if "bachelor" in degree or "b.tech" in degree or "be " in degree: |
| score = 0.6 |
| if "master" in degree or "m.tech" in degree or "mba" in degree: |
| score = 0.8 |
| if "phd" in degree or "doctorate" in degree: |
| score = 0.9 |
| for uni in TIER1_EDU: |
| if uni in degree or uni in status: |
| score = min(score + 0.15, 1.0) |
| break |
| return score |
|
|
|
|
| def compute_jd_quality(raw_text: str, parsed: dict[str, Any], candidate_count: int = 0) -> dict[str, Any]: |
| required_skills = parsed.get("required_skills") or [] |
| skill_count = len(required_skills) |
|
|
| vagueness_score = 1.0 |
| if skill_count >= 5: |
| vagueness_score = 0.2 |
| elif skill_count >= 3: |
| vagueness_score = 0.5 |
| elif skill_count >= 1: |
| vagueness_score = 0.75 |
|
|
| word_count = len(raw_text.split()) |
| if word_count < 50: |
| vagueness_score = min(vagueness_score + 0.3, 1.0) |
|
|
| contradictions = [] |
| min_yoe = parsed.get("min_yoe") |
| engineer_type = (parsed.get("engineer_type") or "").lower() |
| if min_yoe and min_yoe >= 5 and "junior" in raw_text.lower(): |
| contradictions.append("Requires 5+ YOE but mentions junior role") |
| if min_yoe and min_yoe <= 1 and "senior" in raw_text.lower(): |
| contradictions.append("Entry-level YOE but expects senior candidate") |
|
|
| breadth_score = 0.0 |
| if candidate_count > 0 and skill_count < 2: |
| breadth_score = 0.9 |
|
|
| warnings = [] |
| if vagueness_score > 0.6: |
| warnings.append("JD is too vague — add more specific skill requirements for better match quality") |
| if contradictions: |
| warnings.append(f"Contradictions detected: {'; '.join(contradictions)}") |
| if breadth_score > 0.7: |
| warnings.append("Requirements are too broad — almost all candidates will match") |
|
|
| overall = "good" |
| if vagueness_score > 0.6 or contradictions or breadth_score > 0.7: |
| overall = "poor" |
| elif vagueness_score > 0.35: |
| overall = "fair" |
|
|
| return { |
| "overall": overall, |
| "vagueness_score": round(vagueness_score, 3), |
| "breadth_score": round(breadth_score, 3), |
| "skill_count": skill_count, |
| "contradictions": contradictions, |
| "warnings": warnings, |
| } |
|
|
|
|
| def parse_jd_requirements(raw_text: str) -> dict[str, Any]: |
| skills = [] |
| skill_patterns = [ |
| r"\b(python|javascript|typescript|java|go|golang|rust|c\+\+|ruby|php|scala|kotlin|swift)\b", |
| r"\b(react|angular|vue|nextjs|fastapi|django|flask|express|springboot|rails)\b", |
| r"\b(postgresql|mysql|mongodb|redis|elasticsearch|kafka|rabbitmq|cassandra)\b", |
| r"\b(aws|gcp|azure|docker|kubernetes|terraform|ansible|ci\/cd|devops)\b", |
| r"\b(machine learning|deep learning|nlp|llm|rag|vector|embedding|pytorch|tensorflow)\b", |
| r"\b(sql|nosql|graphql|rest|grpc|microservices|api)\b", |
| ] |
| for pattern in skill_patterns: |
| found = re.findall(pattern, raw_text, re.IGNORECASE) |
| skills.extend([f.lower() for f in found]) |
| skills = list(dict.fromkeys(skills)) |
|
|
| yoe_match_obj = re.search(r"(\d+)\+?\s*(?:years?|yrs?)\s*(?:of\s*)?(?:experience|exp)", raw_text, re.IGNORECASE) |
| min_yoe = float(yoe_match_obj.group(1)) if yoe_match_obj else None |
|
|
| role_type = None |
| if re.search(r"\bfull.?time\b", raw_text, re.IGNORECASE): |
| role_type = "full-time" |
| elif re.search(r"\bcontract\b", raw_text, re.IGNORECASE): |
| role_type = "contract" |
| elif re.search(r"\bpart.?time\b", raw_text, re.IGNORECASE): |
| role_type = "part-time" |
|
|
| engineer_type = None |
| if re.search(r"\bbackend\b", raw_text, re.IGNORECASE): |
| engineer_type = "backend" |
| elif re.search(r"\bfrontend\b", raw_text, re.IGNORECASE): |
| engineer_type = "frontend" |
| elif re.search(r"\bfullstack\b|full.?stack\b", raw_text, re.IGNORECASE): |
| engineer_type = "fullstack" |
| elif re.search(r"\bai\s+engineer|ml\s+engineer|machine\s+learning", raw_text, re.IGNORECASE): |
| engineer_type = "ai" |
| elif re.search(r"\bdata\s+engineer\b", raw_text, re.IGNORECASE): |
| engineer_type = "data" |
|
|
| remote_allowed = bool(re.search(r"\bremote\b", raw_text, re.IGNORECASE)) |
|
|
| location_match = re.search( |
| r"\b(bangalore|mumbai|delhi|hyderabad|chennai|pune|kolkata|remote|india|us|usa|uk|london|new york|san francisco)\b", |
| raw_text, re.IGNORECASE |
| ) |
| location = location_match.group(0).title() if location_match else None |
|
|
| return { |
| "required_skills": skills, |
| "min_yoe": min_yoe, |
| "max_yoe": None, |
| "role_type": role_type, |
| "engineer_type": engineer_type, |
| "remote_allowed": remote_allowed, |
| "location": location, |
| } |
|
|