| import os |
| from pinecone import Pinecone |
| from sentence_transformers import SentenceTransformer |
| from typing import List |
| from app.models.schemas import Candidate |
|
|
| class MatchService: |
| def __init__(self): |
| self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) |
| self.index_name = os.getenv("PINECONE_INDEX", "coderound") |
| self.index = self.pc.Index(self.index_name) |
| |
| |
| model_name = os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3") |
| self.model = SentenceTransformer(model_name) |
| |
| self.top_k = int(os.getenv("STAGE2_TOP_K", "20")) |
|
|
| def get_embedding(self, text: str): |
| return self.model.encode(text).tolist() |
|
|
| async def get_top_candidates(self, jd: str, candidates: List[Candidate]) -> List[Candidate]: |
| |
| vectors = [] |
| candidate_map = {} |
| |
| for c in candidates: |
| |
| search_text = f"{c.name} {c.skills} {c.experience} {c.projects} {c.resume_text}" |
| embedding = self.get_embedding(search_text) |
| |
| vectors.append({ |
| "id": c.id, |
| "values": embedding, |
| "metadata": {"name": c.name, "email": c.email} |
| }) |
| candidate_map[c.id] = c |
| |
| |
| self.index.upsert(vectors=vectors) |
| |
| |
| jd_embedding = self.get_embedding(jd) |
| query_results = self.index.query( |
| vector=jd_embedding, |
| top_k=self.top_k, |
| include_metadata=True |
| ) |
| |
| |
| top_candidates = [] |
| for match in query_results.matches: |
| if match.id in candidate_map: |
| top_candidates.append(candidate_map[match.id]) |
| |
| return top_candidates |
|
|
| match_service = MatchService() |
|
|