File size: 1,990 Bytes
ab13a8a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from typing import List
from app.models.schemas import Candidate

class MatchService:
    def __init__(self):
        self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
        self.index_name = os.getenv("PINECONE_INDEX", "coderound")
        self.index = self.pc.Index(self.index_name)
        
        # Load local embedding model
        model_name = os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3")
        self.model = SentenceTransformer(model_name)
        
        self.top_k = int(os.getenv("STAGE2_TOP_K", "20"))

    def get_embedding(self, text: str):
        return self.model.encode(text).tolist()

    async def get_top_candidates(self, jd: str, candidates: List[Candidate]) -> List[Candidate]:
        # 1. Prepare vectors for batch upload
        vectors = []
        candidate_map = {}
        
        for c in candidates:
            # Combine fields for semantic weight
            search_text = f"{c.name} {c.skills} {c.experience} {c.projects} {c.resume_text}"
            embedding = self.get_embedding(search_text)
            
            vectors.append({
                "id": c.id,
                "values": embedding,
                "metadata": {"name": c.name, "email": c.email}
            })
            candidate_map[c.id] = c
            
        # 2. Upsert to Pinecone
        self.index.upsert(vectors=vectors)
        
        # 3. Embed JD and Query
        jd_embedding = self.get_embedding(jd)
        query_results = self.index.query(
            vector=jd_embedding,
            top_k=self.top_k,
            include_metadata=True
        )
        
        # 4. Map back to Candidate objects
        top_candidates = []
        for match in query_results.matches:
            if match.id in candidate_map:
                top_candidates.append(candidate_map[match.id])
                
        return top_candidates

match_service = MatchService()