Niketjain2002 commited on
Commit
ab7dfd9
·
verified ·
1 Parent(s): ef92999

Upload src/scoring_engine.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/scoring_engine.py +178 -0
src/scoring_engine.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Scoring Engine
3
+
4
+ Takes match analysis and produces raw probability scores.
5
+ v1: LLM-based scoring with structured prompts
6
+ v2: Will use trained ML model with LLM features as inputs
7
+ """
8
+
9
+ import json
10
+ from typing import Optional
11
+
12
+ from .feature_extractor import LLMClient, _extract_json
13
+ from .prompts.scoring import PROBABILITY_SCORING_PROMPT
14
+
15
+
16
+ class ScoringEngine:
17
+ """Produces probability scores from match analysis."""
18
+
19
+ def __init__(self, llm_client: Optional[LLMClient] = None):
20
+ self.llm = llm_client or LLMClient()
21
+
22
+ def score(self, match_analysis: dict) -> dict:
23
+ """Generate probability scores from match analysis."""
24
+ prompt = PROBABILITY_SCORING_PROMPT.format(
25
+ match_analysis=json.dumps(match_analysis, indent=2),
26
+ )
27
+ response = self.llm.complete(prompt, temperature=0.1)
28
+ return _extract_json(response)
29
+
30
+ def score_deterministic(self, match_analysis: dict) -> dict:
31
+ """
32
+ Rule-based scoring fallback. No LLM needed.
33
+ Useful for testing, offline mode, and as v2 baseline.
34
+ """
35
+ skill_match = match_analysis.get("skill_match_analysis", {})
36
+ seniority = match_analysis.get("seniority_alignment", {})
37
+ experience = match_analysis.get("experience_depth", {})
38
+ context = match_analysis.get("context_fit", {})
39
+ risks = match_analysis.get("risk_flags", [])
40
+
41
+ # Shortlist scoring
42
+ coverage = skill_match.get("coverage_ratio", 0.0)
43
+ missing_critical = len(skill_match.get("missing_must_haves", []))
44
+
45
+ shortlist_base = coverage * 70 # 0-70 from skill coverage
46
+
47
+ # Seniority alignment bonus/penalty
48
+ alignment = seniority.get("alignment", "aligned")
49
+ seniority_mod = {
50
+ "aligned": 15,
51
+ "slightly_under": 5,
52
+ "slightly_over": 0,
53
+ "underqualified": -20,
54
+ "overqualified": -10,
55
+ }.get(alignment, 0)
56
+
57
+ # Experience depth bonus
58
+ depth = experience.get("depth_assessment", "adequate")
59
+ depth_mod = {
60
+ "exceptional": 15,
61
+ "strong": 10,
62
+ "adequate": 0,
63
+ "insufficient": -20,
64
+ }.get(depth, 0)
65
+
66
+ shortlist_raw = max(5, min(92, shortlist_base + seniority_mod + depth_mod))
67
+
68
+ # Apply hard caps
69
+ shortlist_caps = []
70
+ if missing_critical >= 3:
71
+ shortlist_raw = min(shortlist_raw, 10)
72
+ shortlist_caps.append("3+ missing critical skills -> cap 10%")
73
+ elif missing_critical >= 2:
74
+ shortlist_raw = min(shortlist_raw, 25)
75
+ shortlist_caps.append("2 missing critical skills -> cap 25%")
76
+ elif missing_critical >= 1:
77
+ shortlist_raw = min(shortlist_raw, 45)
78
+ shortlist_caps.append("1 missing critical skill -> cap 45%")
79
+
80
+ # Offer acceptance scoring
81
+ comp_fit = context.get("compensation_alignment_estimate", "unclear")
82
+ comp_score = {
83
+ "likely_aligned": 70,
84
+ "unclear": 50,
85
+ "likely_below": 30,
86
+ "likely_above": 25,
87
+ }.get(comp_fit, 50)
88
+
89
+ location_fit = context.get("remote_fit", "possible")
90
+ location_score = {
91
+ "compatible": 80,
92
+ "possible": 50,
93
+ "incompatible": 15,
94
+ }.get(location_fit, 50)
95
+
96
+ stage_fit = context.get("company_stage_fit", "some_experience")
97
+ stage_score = {
98
+ "strong_experience": 75,
99
+ "some_experience": 55,
100
+ "no_experience": 35,
101
+ }.get(stage_fit, 55)
102
+
103
+ offer_raw = (comp_score * 0.35 + location_score * 0.25
104
+ + stage_score * 0.20 + 50 * 0.20) # 50 for unknowns
105
+ offer_raw = max(5, min(92, offer_raw))
106
+
107
+ offer_caps = []
108
+ if location_fit == "incompatible":
109
+ offer_raw = min(offer_raw, 15)
110
+ offer_caps.append("location incompatible -> cap 15%")
111
+ if comp_fit == "likely_above":
112
+ offer_raw = min(offer_raw, 35)
113
+ offer_caps.append("comp likely above band -> cap 35%")
114
+
115
+ # Retention scoring (uses risk flags)
116
+ retention_raw = 65 # Base rate assumption
117
+
118
+ high_risks = [r for r in risks if r.get("severity") == "high" and r.get("category") == "retention"]
119
+ medium_risks = [r for r in risks if r.get("severity") == "medium" and r.get("category") == "retention"]
120
+
121
+ retention_raw -= len(high_risks) * 20
122
+ retention_raw -= len(medium_risks) * 10
123
+
124
+ if alignment == "overqualified":
125
+ retention_raw -= 15
126
+
127
+ retention_raw = max(5, min(92, retention_raw))
128
+ retention_caps = []
129
+
130
+ # Overall hire probability
131
+ interview_pass = min(80, shortlist_raw * 0.6 + 15)
132
+ overall_raw = (shortlist_raw / 100) * (interview_pass / 100) * (offer_raw / 100) * 100
133
+ overall_raw = max(5, min(92, overall_raw))
134
+
135
+ # Confidence
136
+ data_signals = (
137
+ len(skill_match.get("matched_must_haves", []))
138
+ + len(skill_match.get("matched_preferred", []))
139
+ + len(match_analysis.get("positive_signals", []))
140
+ + len(match_analysis.get("risk_flags", [])) # risks are still data
141
+ + (1 if experience.get("years_relevant") else 0)
142
+ + (1 if context.get("industry_overlap") else 0)
143
+ + (1 if context.get("compensation_alignment_estimate", "unclear") != "unclear" else 0)
144
+ )
145
+
146
+ if data_signals >= 12 and coverage >= 0.7:
147
+ confidence = "high"
148
+ elif data_signals >= 5 and coverage >= 0.4:
149
+ confidence = "medium"
150
+ else:
151
+ confidence = "low"
152
+
153
+ return {
154
+ "shortlist_probability": {
155
+ "value": round(shortlist_raw, 1),
156
+ "hard_caps_applied": shortlist_caps,
157
+ },
158
+ "interview_pass_estimate": {
159
+ "value": round(interview_pass, 1),
160
+ },
161
+ "offer_acceptance_probability": {
162
+ "value": round(offer_raw, 1),
163
+ "hard_caps_applied": offer_caps,
164
+ },
165
+ "retention_6m_probability": {
166
+ "value": round(retention_raw, 1),
167
+ "hard_caps_applied": retention_caps,
168
+ },
169
+ "overall_hire_probability": {
170
+ "value": round(overall_raw, 1),
171
+ "formula_inputs": {
172
+ "p_shortlist": round(shortlist_raw, 1),
173
+ "p_interview_pass": round(interview_pass, 1),
174
+ "p_offer_accept": round(offer_raw, 1),
175
+ },
176
+ },
177
+ "confidence_level": confidence,
178
+ }