File size: 6,696 Bytes
ab7dfd9 79a483d ab7dfd9 79a483d ab7dfd9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | """
Scoring Engine
Takes match analysis and produces raw probability scores.
v1: LLM-based scoring with structured prompts
v2: Will use trained ML model with LLM features as inputs
"""
import json
from typing import Optional
from .feature_extractor import LLMClient, _extract_json
from .prompts.scoring import PROBABILITY_SCORING_PROMPT
class ScoringEngine:
"""Produces probability scores from match analysis."""
def __init__(self, llm_client: Optional[LLMClient] = None):
self.llm = llm_client or LLMClient()
def score(self, match_analysis: dict) -> dict:
"""Generate probability scores from match analysis."""
prompt = PROBABILITY_SCORING_PROMPT.format(
match_analysis=json.dumps(match_analysis, indent=2),
)
response = self.llm.complete(prompt, temperature=0.1)
return _extract_json(response)
def score_deterministic(self, match_analysis: dict) -> dict:
"""
Rule-based scoring fallback. No LLM needed.
Useful for testing, offline mode, and as v2 baseline.
"""
skill_match = match_analysis.get("skill_match_analysis", {})
seniority = match_analysis.get("seniority_alignment", {})
experience = match_analysis.get("experience_depth", {})
context = match_analysis.get("context_fit", {})
risks = match_analysis.get("risk_flags", [])
# Shortlist scoring
coverage = skill_match.get("coverage_ratio", 0.0)
missing_critical = len(skill_match.get("missing_must_haves", []))
shortlist_base = coverage * 70 # 0-70 from skill coverage
# Seniority alignment bonus/penalty
alignment = seniority.get("alignment", "aligned")
seniority_mod = {
"aligned": 15,
"slightly_under": 5,
"slightly_over": 0,
"underqualified": -20,
"overqualified": -10,
}.get(alignment, 0)
# Experience depth bonus
depth = experience.get("depth_assessment", "adequate")
depth_mod = {
"exceptional": 15,
"strong": 10,
"adequate": 0,
"insufficient": -20,
}.get(depth, 0)
shortlist_raw = max(5, min(92, shortlist_base + seniority_mod + depth_mod))
# Apply hard caps
shortlist_caps = []
if missing_critical >= 3:
shortlist_raw = min(shortlist_raw, 10)
shortlist_caps.append("3+ missing critical skills -> cap 10%")
elif missing_critical >= 2:
shortlist_raw = min(shortlist_raw, 25)
shortlist_caps.append("2 missing critical skills -> cap 25%")
elif missing_critical >= 1:
shortlist_raw = min(shortlist_raw, 45)
shortlist_caps.append("1 missing critical skill -> cap 45%")
# Offer acceptance scoring
comp_fit = context.get("compensation_alignment_estimate", "unclear")
comp_score = {
"likely_aligned": 70,
"unclear": 50,
"likely_below": 30,
"likely_above": 25,
}.get(comp_fit, 50)
location_fit = context.get("remote_fit", "possible")
location_score = {
"compatible": 80,
"possible": 50,
"incompatible": 15,
}.get(location_fit, 50)
stage_fit = context.get("company_stage_fit", "some_experience")
stage_score = {
"strong_experience": 75,
"some_experience": 55,
"no_experience": 35,
}.get(stage_fit, 55)
offer_raw = (comp_score * 0.35 + location_score * 0.25
+ stage_score * 0.20 + 50 * 0.20) # 50 for unknowns
offer_raw = max(5, min(92, offer_raw))
offer_caps = []
if location_fit == "incompatible":
offer_raw = min(offer_raw, 15)
offer_caps.append("location incompatible -> cap 15%")
if comp_fit == "likely_above":
offer_raw = min(offer_raw, 35)
offer_caps.append("comp likely above band -> cap 35%")
# Retention scoring (uses risk flags)
retention_raw = 65 # Base rate assumption
high_risks = [r for r in risks if r.get("severity") == "high" and r.get("category") == "retention"]
medium_risks = [r for r in risks if r.get("severity") == "medium" and r.get("category") == "retention"]
retention_raw -= len(high_risks) * 20
retention_raw -= len(medium_risks) * 10
if alignment == "overqualified":
retention_raw -= 15
retention_raw = max(5, min(92, retention_raw))
retention_caps = []
# Overall hire probability (weighted blend, not multiplicative)
interview_pass = min(80, shortlist_raw * 0.6 + 15)
overall_raw = (
shortlist_raw * 0.45
+ interview_pass * 0.25
+ offer_raw * 0.30
)
overall_raw = max(5, min(92, overall_raw))
# Confidence
data_signals = (
len(skill_match.get("matched_must_haves", []))
+ len(skill_match.get("matched_preferred", []))
+ len(match_analysis.get("positive_signals", []))
+ len(match_analysis.get("risk_flags", [])) # risks are still data
+ (1 if experience.get("years_relevant") else 0)
+ (1 if context.get("industry_overlap") else 0)
+ (1 if context.get("compensation_alignment_estimate", "unclear") != "unclear" else 0)
)
if data_signals >= 12 and coverage >= 0.7:
confidence = "high"
elif data_signals >= 5 and coverage >= 0.4:
confidence = "medium"
else:
confidence = "low"
return {
"shortlist_probability": {
"value": round(shortlist_raw, 1),
"hard_caps_applied": shortlist_caps,
},
"interview_pass_estimate": {
"value": round(interview_pass, 1),
},
"offer_acceptance_probability": {
"value": round(offer_raw, 1),
"hard_caps_applied": offer_caps,
},
"retention_6m_probability": {
"value": round(retention_raw, 1),
"hard_caps_applied": retention_caps,
},
"overall_hire_probability": {
"value": round(overall_raw, 1),
"formula_inputs": {
"p_shortlist": round(shortlist_raw, 1),
"p_interview_pass": round(interview_pass, 1),
"p_offer_accept": round(offer_raw, 1),
},
},
"confidence_level": confidence,
}
|