Spaces:

cloud450
/

Coderound

No application file

Upload 42 files

ab13a8a verified about 2 months ago

1.99 kB

	import os
	from pinecone import Pinecone
	from sentence_transformers import SentenceTransformer
	from typing import List
	from app.models.schemas import Candidate

	class MatchService:
	def __init__(self):
	self.pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
	self.index_name = os.getenv("PINECONE_INDEX", "coderound")
	self.index = self.pc.Index(self.index_name)

	# Load local embedding model
	model_name = os.getenv("EMBEDDING_MODEL", "BAAI/bge-m3")
	self.model = SentenceTransformer(model_name)

	self.top_k = int(os.getenv("STAGE2_TOP_K", "20"))

	def get_embedding(self, text: str):
	return self.model.encode(text).tolist()

	async def get_top_candidates(self, jd: str, candidates: List[Candidate]) -> List[Candidate]:
	# 1. Prepare vectors for batch upload
	vectors = []
	candidate_map = {}

	for c in candidates:
	# Combine fields for semantic weight
	search_text = f"{c.name} {c.skills} {c.experience} {c.projects} {c.resume_text}"
	embedding = self.get_embedding(search_text)

	vectors.append({
	"id": c.id,
	"values": embedding,
	"metadata": {"name": c.name, "email": c.email}
	})
	candidate_map[c.id] = c

	# 2. Upsert to Pinecone
	self.index.upsert(vectors=vectors)

	# 3. Embed JD and Query
	jd_embedding = self.get_embedding(jd)
	query_results = self.index.query(
	vector=jd_embedding,
	top_k=self.top_k,
	include_metadata=True
	)

	# 4. Map back to Candidate objects
	top_candidates = []
	for match in query_results.matches:
	if match.id in candidate_map:
	top_candidates.append(candidate_map[match.id])

	return top_candidates

	match_service = MatchService()