""" Scoring Engine Takes match analysis and produces raw probability scores. v1: LLM-based scoring with structured prompts v2: Will use trained ML model with LLM features as inputs """ import json from typing import Optional from .feature_extractor import LLMClient, _extract_json from .prompts.scoring import PROBABILITY_SCORING_PROMPT class ScoringEngine: """Produces probability scores from match analysis.""" def __init__(self, llm_client: Optional[LLMClient] = None): self.llm = llm_client or LLMClient() def score(self, match_analysis: dict) -> dict: """Generate probability scores from match analysis.""" prompt = PROBABILITY_SCORING_PROMPT.format( match_analysis=json.dumps(match_analysis, indent=2), ) response = self.llm.complete(prompt, temperature=0.1) return _extract_json(response) def score_deterministic(self, match_analysis: dict) -> dict: """ Rule-based scoring fallback. No LLM needed. Useful for testing, offline mode, and as v2 baseline. """ skill_match = match_analysis.get("skill_match_analysis", {}) seniority = match_analysis.get("seniority_alignment", {}) experience = match_analysis.get("experience_depth", {}) context = match_analysis.get("context_fit", {}) risks = match_analysis.get("risk_flags", []) # Shortlist scoring coverage = skill_match.get("coverage_ratio", 0.0) missing_critical = len(skill_match.get("missing_must_haves", [])) shortlist_base = coverage * 70 # 0-70 from skill coverage # Seniority alignment bonus/penalty alignment = seniority.get("alignment", "aligned") seniority_mod = { "aligned": 15, "slightly_under": 5, "slightly_over": 0, "underqualified": -20, "overqualified": -10, }.get(alignment, 0) # Experience depth bonus depth = experience.get("depth_assessment", "adequate") depth_mod = { "exceptional": 15, "strong": 10, "adequate": 0, "insufficient": -20, }.get(depth, 0) shortlist_raw = max(5, min(92, shortlist_base + seniority_mod + depth_mod)) # Apply hard caps shortlist_caps = [] if missing_critical >= 3: shortlist_raw = min(shortlist_raw, 10) shortlist_caps.append("3+ missing critical skills -> cap 10%") elif missing_critical >= 2: shortlist_raw = min(shortlist_raw, 25) shortlist_caps.append("2 missing critical skills -> cap 25%") elif missing_critical >= 1: shortlist_raw = min(shortlist_raw, 45) shortlist_caps.append("1 missing critical skill -> cap 45%") # Offer acceptance scoring comp_fit = context.get("compensation_alignment_estimate", "unclear") comp_score = { "likely_aligned": 70, "unclear": 50, "likely_below": 30, "likely_above": 25, }.get(comp_fit, 50) location_fit = context.get("remote_fit", "possible") location_score = { "compatible": 80, "possible": 50, "incompatible": 15, }.get(location_fit, 50) stage_fit = context.get("company_stage_fit", "some_experience") stage_score = { "strong_experience": 75, "some_experience": 55, "no_experience": 35, }.get(stage_fit, 55) offer_raw = (comp_score * 0.35 + location_score * 0.25 + stage_score * 0.20 + 50 * 0.20) # 50 for unknowns offer_raw = max(5, min(92, offer_raw)) offer_caps = [] if location_fit == "incompatible": offer_raw = min(offer_raw, 15) offer_caps.append("location incompatible -> cap 15%") if comp_fit == "likely_above": offer_raw = min(offer_raw, 35) offer_caps.append("comp likely above band -> cap 35%") # Retention scoring (uses risk flags) retention_raw = 65 # Base rate assumption high_risks = [r for r in risks if r.get("severity") == "high" and r.get("category") == "retention"] medium_risks = [r for r in risks if r.get("severity") == "medium" and r.get("category") == "retention"] retention_raw -= len(high_risks) * 20 retention_raw -= len(medium_risks) * 10 if alignment == "overqualified": retention_raw -= 15 retention_raw = max(5, min(92, retention_raw)) retention_caps = [] # Overall hire probability (weighted blend, not multiplicative) interview_pass = min(80, shortlist_raw * 0.6 + 15) overall_raw = ( shortlist_raw * 0.45 + interview_pass * 0.25 + offer_raw * 0.30 ) overall_raw = max(5, min(92, overall_raw)) # Confidence data_signals = ( len(skill_match.get("matched_must_haves", [])) + len(skill_match.get("matched_preferred", [])) + len(match_analysis.get("positive_signals", [])) + len(match_analysis.get("risk_flags", [])) # risks are still data + (1 if experience.get("years_relevant") else 0) + (1 if context.get("industry_overlap") else 0) + (1 if context.get("compensation_alignment_estimate", "unclear") != "unclear" else 0) ) if data_signals >= 12 and coverage >= 0.7: confidence = "high" elif data_signals >= 5 and coverage >= 0.4: confidence = "medium" else: confidence = "low" return { "shortlist_probability": { "value": round(shortlist_raw, 1), "hard_caps_applied": shortlist_caps, }, "interview_pass_estimate": { "value": round(interview_pass, 1), }, "offer_acceptance_probability": { "value": round(offer_raw, 1), "hard_caps_applied": offer_caps, }, "retention_6m_probability": { "value": round(retention_raw, 1), "hard_caps_applied": retention_caps, }, "overall_hire_probability": { "value": round(overall_raw, 1), "formula_inputs": { "p_shortlist": round(shortlist_raw, 1), "p_interview_pass": round(interview_pass, 1), "p_offer_accept": round(offer_raw, 1), }, }, "confidence_level": confidence, }