File size: 1,990 Bytes
ab13a8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | import os
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from typing import List
from app.models.schemas import Candidate
class MatchService:
def __init__(self):
self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
self.index_name = os.getenv("PINECONE_INDEX", "coderound")
self.index = self.pc.Index(self.index_name)
# Load local embedding model
model_name = os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3")
self.model = SentenceTransformer(model_name)
self.top_k = int(os.getenv("STAGE2_TOP_K", "20"))
def get_embedding(self, text: str):
return self.model.encode(text).tolist()
async def get_top_candidates(self, jd: str, candidates: List[Candidate]) -> List[Candidate]:
# 1. Prepare vectors for batch upload
vectors = []
candidate_map = {}
for c in candidates:
# Combine fields for semantic weight
search_text = f"{c.name} {c.skills} {c.experience} {c.projects} {c.resume_text}"
embedding = self.get_embedding(search_text)
vectors.append({
"id": c.id,
"values": embedding,
"metadata": {"name": c.name, "email": c.email}
})
candidate_map[c.id] = c
# 2. Upsert to Pinecone
self.index.upsert(vectors=vectors)
# 3. Embed JD and Query
jd_embedding = self.get_embedding(jd)
query_results = self.index.query(
vector=jd_embedding,
top_k=self.top_k,
include_metadata=True
)
# 4. Map back to Candidate objects
top_candidates = []
for match in query_results.matches:
if match.id in candidate_map:
top_candidates.append(candidate_map[match.id])
return top_candidates
match_service = MatchService()
|