import numpy as np from typing import Dict, Any, List from supabase import Client def cosine_similarity(v1: Any, v2: Any) -> float: """Calculates cosine similarity between two vectors, handling both lists and pgvector strings.""" def parse_vector(v): if isinstance(v, str): try: # Remove brackets and split by comma return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()] except Exception: return [] return v if isinstance(v, list) else [] vec1 = parse_vector(v1) vec2 = parse_vector(v2) if not vec1 or not vec2 or len(vec1) != len(vec2): return 0.0 a = np.array(vec1) b = np.array(vec2) # Check if vectors are zero vectors if np.all(a == 0) or np.all(b == 0): return 0.0 dot_product = np.dot(a, b) norm_a = np.linalg.norm(a) norm_b = np.linalg.norm(b) if norm_a == 0 or norm_b == 0: return 0.0 return float(dot_product / (norm_a * norm_b)) async def calculate_granular_match_score(client: Client, candidate_id: str, job_id: str) -> Dict[str, Any]: """ Fetches embeddings for candidate and job, calculates entity-wise similarity, and returns a weighted total score. """ print(f"📊 Calculating granular match score for Candidate: {candidate_id}, Job: {job_id}") # 1. Fetch Embeddings try: profile_resp = client.table("profile_embeddings").select("*").eq("id", candidate_id).execute() job_resp = client.table("job_embeddings").select("*").eq("job_id", job_id).execute() if not profile_resp.data: print(f"⚠️ No profile embeddings found for {candidate_id}") return {"total_score": 0, "breakdown": {}, "error": "Profile embeddings missing"} if not job_resp.data: print(f"⚠️ No job embeddings found for {job_id}") return {"total_score": 0, "breakdown": {}, "error": "Job embeddings missing"} profile_emb = profile_resp.data[0] job_emb = job_resp.data[0] except Exception as e: print(f"❌ Database error in match score: {e}") return {"total_score": 0, "breakdown": {}, "error": str(e)} # 2. Define Weights (Matching SQL function public.match_profile_job) WEIGHTS = { "technical_skills": 0.35, "experience": 0.20, "projects": 0.15, "skills": 0.10, "education": 0.10, "certifications": 0.10 } # 3. Calculate Individual Similarities scores = {} # Technical Skills scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills")) # Experience scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience")) # Projects (Compare profile projects vs job technical skills) scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills")) # Skills scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills")) # Education scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education")) # Certifications (Compare profile certs vs job technical skills or skills) job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills") scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target) # 4. Calculate Weighted Total total_score = 0 available_weight = 0 for key, weight in WEIGHTS.items(): if scores.get(key) is not None: # Scale to 100 like SQL total_score += (scores[key] * 100) * weight available_weight += weight # Normalize if available_weight > 0: final_score = total_score / available_weight else: final_score = 0 return { "total_score": round(final_score, 1), "breakdown": {k: round(v * 100, 1) for k, v in scores.items()}, "weights": WEIGHTS }