Coderound / proj /backend /app /services /matching_service.py
cloud450's picture
Upload 42 files
ab13a8a verified
import os
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
from typing import List
from app.models.schemas import Candidate
class MatchService:
def __init__(self):
self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
self.index_name = os.getenv("PINECONE_INDEX", "coderound")
self.index = self.pc.Index(self.index_name)
# Load local embedding model
model_name = os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3")
self.model = SentenceTransformer(model_name)
self.top_k = int(os.getenv("STAGE2_TOP_K", "20"))
def get_embedding(self, text: str):
return self.model.encode(text).tolist()
async def get_top_candidates(self, jd: str, candidates: List[Candidate]) -> List[Candidate]:
# 1. Prepare vectors for batch upload
vectors = []
candidate_map = {}
for c in candidates:
# Combine fields for semantic weight
search_text = f"{c.name} {c.skills} {c.experience} {c.projects} {c.resume_text}"
embedding = self.get_embedding(search_text)
vectors.append({
"id": c.id,
"values": embedding,
"metadata": {"name": c.name, "email": c.email}
})
candidate_map[c.id] = c
# 2. Upsert to Pinecone
self.index.upsert(vectors=vectors)
# 3. Embed JD and Query
jd_embedding = self.get_embedding(jd)
query_results = self.index.query(
vector=jd_embedding,
top_k=self.top_k,
include_metadata=True
)
# 4. Map back to Candidate objects
top_candidates = []
for match in query_results.matches:
if match.id in candidate_map:
top_candidates.append(candidate_map[match.id])
return top_candidates
match_service = MatchService()