recruitment-intelligence / src /scoring_engine.py
Niketjain2002's picture
Fix score calibration: use weighted blend instead of multiplicative formula
79a483d verified
"""
Scoring Engine
Takes match analysis and produces raw probability scores.
v1: LLM-based scoring with structured prompts
v2: Will use trained ML model with LLM features as inputs
"""
import json
from typing import Optional
from .feature_extractor import LLMClient, _extract_json
from .prompts.scoring import PROBABILITY_SCORING_PROMPT
class ScoringEngine:
"""Produces probability scores from match analysis."""
def __init__(self, llm_client: Optional[LLMClient] = None):
self.llm = llm_client or LLMClient()
def score(self, match_analysis: dict) -> dict:
"""Generate probability scores from match analysis."""
prompt = PROBABILITY_SCORING_PROMPT.format(
match_analysis=json.dumps(match_analysis, indent=2),
)
response = self.llm.complete(prompt, temperature=0.1)
return _extract_json(response)
def score_deterministic(self, match_analysis: dict) -> dict:
"""
Rule-based scoring fallback. No LLM needed.
Useful for testing, offline mode, and as v2 baseline.
"""
skill_match = match_analysis.get("skill_match_analysis", {})
seniority = match_analysis.get("seniority_alignment", {})
experience = match_analysis.get("experience_depth", {})
context = match_analysis.get("context_fit", {})
risks = match_analysis.get("risk_flags", [])
# Shortlist scoring
coverage = skill_match.get("coverage_ratio", 0.0)
missing_critical = len(skill_match.get("missing_must_haves", []))
shortlist_base = coverage * 70 # 0-70 from skill coverage
# Seniority alignment bonus/penalty
alignment = seniority.get("alignment", "aligned")
seniority_mod = {
"aligned": 15,
"slightly_under": 5,
"slightly_over": 0,
"underqualified": -20,
"overqualified": -10,
}.get(alignment, 0)
# Experience depth bonus
depth = experience.get("depth_assessment", "adequate")
depth_mod = {
"exceptional": 15,
"strong": 10,
"adequate": 0,
"insufficient": -20,
}.get(depth, 0)
shortlist_raw = max(5, min(92, shortlist_base + seniority_mod + depth_mod))
# Apply hard caps
shortlist_caps = []
if missing_critical >= 3:
shortlist_raw = min(shortlist_raw, 10)
shortlist_caps.append("3+ missing critical skills -> cap 10%")
elif missing_critical >= 2:
shortlist_raw = min(shortlist_raw, 25)
shortlist_caps.append("2 missing critical skills -> cap 25%")
elif missing_critical >= 1:
shortlist_raw = min(shortlist_raw, 45)
shortlist_caps.append("1 missing critical skill -> cap 45%")
# Offer acceptance scoring
comp_fit = context.get("compensation_alignment_estimate", "unclear")
comp_score = {
"likely_aligned": 70,
"unclear": 50,
"likely_below": 30,
"likely_above": 25,
}.get(comp_fit, 50)
location_fit = context.get("remote_fit", "possible")
location_score = {
"compatible": 80,
"possible": 50,
"incompatible": 15,
}.get(location_fit, 50)
stage_fit = context.get("company_stage_fit", "some_experience")
stage_score = {
"strong_experience": 75,
"some_experience": 55,
"no_experience": 35,
}.get(stage_fit, 55)
offer_raw = (comp_score * 0.35 + location_score * 0.25
+ stage_score * 0.20 + 50 * 0.20) # 50 for unknowns
offer_raw = max(5, min(92, offer_raw))
offer_caps = []
if location_fit == "incompatible":
offer_raw = min(offer_raw, 15)
offer_caps.append("location incompatible -> cap 15%")
if comp_fit == "likely_above":
offer_raw = min(offer_raw, 35)
offer_caps.append("comp likely above band -> cap 35%")
# Retention scoring (uses risk flags)
retention_raw = 65 # Base rate assumption
high_risks = [r for r in risks if r.get("severity") == "high" and r.get("category") == "retention"]
medium_risks = [r for r in risks if r.get("severity") == "medium" and r.get("category") == "retention"]
retention_raw -= len(high_risks) * 20
retention_raw -= len(medium_risks) * 10
if alignment == "overqualified":
retention_raw -= 15
retention_raw = max(5, min(92, retention_raw))
retention_caps = []
# Overall hire probability (weighted blend, not multiplicative)
interview_pass = min(80, shortlist_raw * 0.6 + 15)
overall_raw = (
shortlist_raw * 0.45
+ interview_pass * 0.25
+ offer_raw * 0.30
)
overall_raw = max(5, min(92, overall_raw))
# Confidence
data_signals = (
len(skill_match.get("matched_must_haves", []))
+ len(skill_match.get("matched_preferred", []))
+ len(match_analysis.get("positive_signals", []))
+ len(match_analysis.get("risk_flags", [])) # risks are still data
+ (1 if experience.get("years_relevant") else 0)
+ (1 if context.get("industry_overlap") else 0)
+ (1 if context.get("compensation_alignment_estimate", "unclear") != "unclear" else 0)
)
if data_signals >= 12 and coverage >= 0.7:
confidence = "high"
elif data_signals >= 5 and coverage >= 0.4:
confidence = "medium"
else:
confidence = "low"
return {
"shortlist_probability": {
"value": round(shortlist_raw, 1),
"hard_caps_applied": shortlist_caps,
},
"interview_pass_estimate": {
"value": round(interview_pass, 1),
},
"offer_acceptance_probability": {
"value": round(offer_raw, 1),
"hard_caps_applied": offer_caps,
},
"retention_6m_probability": {
"value": round(retention_raw, 1),
"hard_caps_applied": retention_caps,
},
"overall_hire_probability": {
"value": round(overall_raw, 1),
"formula_inputs": {
"p_shortlist": round(shortlist_raw, 1),
"p_interview_pass": round(interview_pass, 1),
"p_offer_accept": round(offer_raw, 1),
},
},
"confidence_level": confidence,
}