File size: 5,591 Bytes
ab13a8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import asyncio
import json
import re
from typing import List, Dict, Any
from app.utils.groq_client import get_groq_completion
from app.models.schemas import (
    Candidate, NormalizedCandidate, RerankResult, 
    DeepReview, FinalShortlist, FinalRank, EvaluationResponse
)
from app.services.matching_service import match_service
from app.prompts.templates import (
    STAGE1_NORMALIZATION_PROMPT,
    STAGE3_RERANK_PROMPT,
    STAGE4_DEEP_REVIEW_PROMPT,
    STAGE5_FINAL_SELECTION_PROMPT
)

# Concurrency Throttling
sem = asyncio.Semaphore(3)

async def get_completion_with_sem(messages):
    async with sem:
        return await get_groq_completion(messages)

async def normalize_candidate(jd: str, candidate: Candidate) -> NormalizedCandidate:
    candidate_raw = candidate.model_dump_json()
    resp = await get_completion_with_sem([
        {"role": "system", "content": "You are a professional data normalizer. Output JSON ONLY."},
        {"role": "user", "content": STAGE1_NORMALIZATION_PROMPT.format(jd=jd, candidate_raw=candidate_raw)}
    ])
    try:
        match = re.search(r'\{.*\}', resp, re.DOTALL)
        data = json.loads(match.group() if match else resp)
        return NormalizedCandidate(**data)
    except Exception as e:
        print(f"Failed to normalize {candidate.name}: {e}")
        # Return a fallback object
        return NormalizedCandidate(
            candidate_id=candidate.id, name=candidate.name, normalized_title="Unknown",
            experience_years=0, primary_skills=[], secondary_skills=[],
            backend_score=0, frontend_score=0, cloud_score=0, database_score=0,
            notice_period_days=0, location="Unknown", employment_status="Unknown",
            salary_expectation="Unknown", flags=["Parsing Error"]
        )

async def rerank_candidate(jd: str, normalized: NormalizedCandidate) -> RerankResult:
    resp = await get_completion_with_sem([
        {"role": "system", "content": "You are a recruitment scoring engine. Output JSON ONLY."},
        {"role": "user", "content": STAGE3_RERANK_PROMPT.format(jd=jd, normalized_candidate=normalized.model_dump_json())}
    ])
    try:
        match = re.search(r'\{.*\}', resp, re.DOTALL)
        data = json.loads(match.group() if match else resp)
        return RerankResult(**data)
    except:
        return RerankResult(candidate_id=normalized.candidate_id, scores={}, final_score=0, decision="reject")

async def review_candidate(jd: str, candidate_data: str, score: float, cand_id: str) -> DeepReview:
    resp = await get_completion_with_sem([
        {"role": "system", "content": "You are a senior hiring evaluator. Output JSON ONLY."},
        {"role": "user", "content": STAGE4_DEEP_REVIEW_PROMPT.format(jd=jd, candidate_data=candidate_data, score=score)}
    ])
    try:
        match = re.search(r'\{.*\}', resp, re.DOTALL)
        data = json.loads(match.group() if match else resp)
        data["candidate_id"] = cand_id
        return DeepReview(**data)
    except:
        return DeepReview(candidate_id=cand_id, verdict="reject", why="Error in evaluation", strengths=[], risks=[], hidden_signal="", confidence=0)

async def perform_hybrid_evaluation(jd: str, candidates: List[Candidate]) -> EvaluationResponse:
    # 1. Normalization (Stage 1) - All candidates
    normalization_tasks = [normalize_candidate(jd, c) for c in candidates]
    normalized_candidates = await asyncio.gather(*normalization_tasks)
    
    # Map for easy lookup
    normalized_map = {n.candidate_id: n for n in normalized_candidates}
    candidate_map = {c.id: c for c in candidates}
    
    # 2. Embedding Matching (Stage 2) - Retrieves Top 20
    # We pass the normalized summary/skills for better matching
    top_20 = await match_service.get_top_candidates(jd, candidates)
    
    # 3. Deterministic Reranking (Stage 3) - Top 20 -> Top 10
    rerank_tasks = [rerank_candidate(jd, normalized_map[c.id]) for c in top_20]
    rerank_results = await asyncio.gather(*rerank_tasks)
    rerank_results.sort(key=lambda x: x.final_score, reverse=True)
    top_10_results = rerank_results[:10]
    
    # 4. LLM Deep Review (Stage 4) - Top 5 Only
    top_5_for_review = top_10_results[:5]
    review_tasks = [
        review_candidate(
            jd, 
            candidate_map[r.candidate_id].model_dump_json(), 
            r.final_score, 
            r.candidate_id
        ) for r in top_5_for_review
    ]
    review_results = await asyncio.gather(*review_tasks)
    review_map = {rev.candidate_id: rev for rev in review_results}
    
    # 5. Final Selection (Stage 5)
    reviews_json = json.dumps([r.model_dump() for r in review_results])
    final_resp = await get_completion_with_sem([
        {"role": "system", "content": "You are the final hiring decision officer. Output JSON ONLY."},
        {"role": "user", "content": STAGE5_FINAL_SELECTION_PROMPT.format(all_top_5_results=reviews_json)}
    ])
    
    try:
        match = re.search(r'\{.*\}', final_resp, re.DOTALL)
        final_data = json.loads(match.group() if match else final_resp)
        shortlist = FinalShortlist(**final_data)
    except:
        # Fallback ranking if synthesis fails
        shortlist = FinalShortlist(final_ranking=[
            FinalRank(rank=i+1, candidate_id=r.candidate_id, name=candidate_map[r.candidate_id].name, decision=r.decision, reason="Automatic ranking")
            for i, r in enumerate(top_5_for_review)
        ])

    return EvaluationResponse(
        shortlist=shortlist.final_ranking,
        details={rev.candidate_id: rev.model_dump() for rev in review_results}
    )