Spaces:

Niketjain2002
/

recruitment-intelligence

Sleeping

App Files Files Community

recruitment-intelligence / src /scoring_engine.py

Niketjain2002

Fix score calibration: use weighted blend instead of multiplicative formula

79a483d verified 4 days ago

raw

history blame contribute delete

6.7 kB

	"""
	Scoring Engine

	Takes match analysis and produces raw probability scores.
	v1: LLM-based scoring with structured prompts
	v2: Will use trained ML model with LLM features as inputs
	"""

	import json
	from typing import Optional

	from .feature_extractor import LLMClient, _extract_json
	from .prompts.scoring import PROBABILITY_SCORING_PROMPT


	class ScoringEngine:
	"""Produces probability scores from match analysis."""

	def __init__(self, llm_client: Optional[LLMClient] = None):
	self.llm = llm_client or LLMClient()

	def score(self, match_analysis: dict) -> dict:
	"""Generate probability scores from match analysis."""
	prompt = PROBABILITY_SCORING_PROMPT.format(
	match_analysis=json.dumps(match_analysis, indent=2),
	)
	response = self.llm.complete(prompt, temperature=0.1)
	return _extract_json(response)

	def score_deterministic(self, match_analysis: dict) -> dict:
	"""
	Rule-based scoring fallback. No LLM needed.
	Useful for testing, offline mode, and as v2 baseline.
	"""
	skill_match = match_analysis.get("skill_match_analysis", {})
	seniority = match_analysis.get("seniority_alignment", {})
	experience = match_analysis.get("experience_depth", {})
	context = match_analysis.get("context_fit", {})
	risks = match_analysis.get("risk_flags", [])

	# Shortlist scoring
	coverage = skill_match.get("coverage_ratio", 0.0)
	missing_critical = len(skill_match.get("missing_must_haves", []))

	shortlist_base = coverage * 70 # 0-70 from skill coverage

	# Seniority alignment bonus/penalty
	alignment = seniority.get("alignment", "aligned")
	seniority_mod = {
	"aligned": 15,
	"slightly_under": 5,
	"slightly_over": 0,
	"underqualified": -20,
	"overqualified": -10,
	}.get(alignment, 0)

	# Experience depth bonus
	depth = experience.get("depth_assessment", "adequate")
	depth_mod = {
	"exceptional": 15,
	"strong": 10,
	"adequate": 0,
	"insufficient": -20,
	}.get(depth, 0)

	shortlist_raw = max(5, min(92, shortlist_base + seniority_mod + depth_mod))

	# Apply hard caps
	shortlist_caps = []
	if missing_critical >= 3:
	shortlist_raw = min(shortlist_raw, 10)
	shortlist_caps.append("3+ missing critical skills -> cap 10%")
	elif missing_critical >= 2:
	shortlist_raw = min(shortlist_raw, 25)
	shortlist_caps.append("2 missing critical skills -> cap 25%")
	elif missing_critical >= 1:
	shortlist_raw = min(shortlist_raw, 45)
	shortlist_caps.append("1 missing critical skill -> cap 45%")

	# Offer acceptance scoring
	comp_fit = context.get("compensation_alignment_estimate", "unclear")
	comp_score = {
	"likely_aligned": 70,
	"unclear": 50,
	"likely_below": 30,
	"likely_above": 25,
	}.get(comp_fit, 50)

	location_fit = context.get("remote_fit", "possible")
	location_score = {
	"compatible": 80,
	"possible": 50,
	"incompatible": 15,
	}.get(location_fit, 50)

	stage_fit = context.get("company_stage_fit", "some_experience")
	stage_score = {
	"strong_experience": 75,
	"some_experience": 55,
	"no_experience": 35,
	}.get(stage_fit, 55)

	offer_raw = (comp_score * 0.35 + location_score * 0.25
	+ stage_score * 0.20 + 50 * 0.20) # 50 for unknowns
	offer_raw = max(5, min(92, offer_raw))

	offer_caps = []
	if location_fit == "incompatible":
	offer_raw = min(offer_raw, 15)
	offer_caps.append("location incompatible -> cap 15%")
	if comp_fit == "likely_above":
	offer_raw = min(offer_raw, 35)
	offer_caps.append("comp likely above band -> cap 35%")

	# Retention scoring (uses risk flags)
	retention_raw = 65 # Base rate assumption

	high_risks = [r for r in risks if r.get("severity") == "high" and r.get("category") == "retention"]
	medium_risks = [r for r in risks if r.get("severity") == "medium" and r.get("category") == "retention"]

	retention_raw -= len(high_risks) * 20
	retention_raw -= len(medium_risks) * 10

	if alignment == "overqualified":
	retention_raw -= 15

	retention_raw = max(5, min(92, retention_raw))
	retention_caps = []

	# Overall hire probability (weighted blend, not multiplicative)
	interview_pass = min(80, shortlist_raw * 0.6 + 15)
	overall_raw = (
	shortlist_raw * 0.45
	+ interview_pass * 0.25
	+ offer_raw * 0.30
	)
	overall_raw = max(5, min(92, overall_raw))

	# Confidence
	data_signals = (
	len(skill_match.get("matched_must_haves", []))
	+ len(skill_match.get("matched_preferred", []))
	+ len(match_analysis.get("positive_signals", []))
	+ len(match_analysis.get("risk_flags", [])) # risks are still data
	+ (1 if experience.get("years_relevant") else 0)
	+ (1 if context.get("industry_overlap") else 0)
	+ (1 if context.get("compensation_alignment_estimate", "unclear") != "unclear" else 0)
	)

	if data_signals >= 12 and coverage >= 0.7:
	confidence = "high"
	elif data_signals >= 5 and coverage >= 0.4:
	confidence = "medium"
	else:
	confidence = "low"

	return {
	"shortlist_probability": {
	"value": round(shortlist_raw, 1),
	"hard_caps_applied": shortlist_caps,
	},
	"interview_pass_estimate": {
	"value": round(interview_pass, 1),
	},
	"offer_acceptance_probability": {
	"value": round(offer_raw, 1),
	"hard_caps_applied": offer_caps,
	},
	"retention_6m_probability": {
	"value": round(retention_raw, 1),
	"hard_caps_applied": retention_caps,
	},
	"overall_hire_probability": {
	"value": round(overall_raw, 1),
	"formula_inputs": {
	"p_shortlist": round(shortlist_raw, 1),
	"p_interview_pass": round(interview_pass, 1),
	"p_offer_accept": round(offer_raw, 1),
	},
	},
	"confidence_level": confidence,
	}