iris_backend / backend /src /matching /similarity.py
Saandraahh's picture
Implemented clustering
4b3a33f
import numpy as np
from typing import Dict, Any, List
from supabase import Client
def cosine_similarity(v1: Any, v2: Any) -> float:
"""Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
def parse_vector(v):
if isinstance(v, str):
try:
# Remove brackets and split by comma
return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
except Exception:
return []
return v if isinstance(v, list) else []
vec1 = parse_vector(v1)
vec2 = parse_vector(v2)
if not vec1 or not vec2 or len(vec1) != len(vec2):
return 0.0
a = np.array(vec1)
b = np.array(vec2)
# Check if vectors are zero vectors
if np.all(a == 0) or np.all(b == 0):
return 0.0
dot_product = np.dot(a, b)
norm_a = np.linalg.norm(a)
norm_b = np.linalg.norm(b)
if norm_a == 0 or norm_b == 0:
return 0.0
return float(dot_product / (norm_a * norm_b))
async def calculate_granular_match_score(client: Client, candidate_id: str, job_id: str) -> Dict[str, Any]:
"""
Fetches embeddings for candidate and job, calculates entity-wise similarity,
and returns a weighted total score.
"""
print(f"📊 Calculating granular match score for Candidate: {candidate_id}, Job: {job_id}")
# 1. Fetch Embeddings
try:
profile_resp = client.table("profile_embeddings").select("*").eq("id", candidate_id).execute()
job_resp = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()
if not profile_resp.data:
print(f"⚠️ No profile embeddings found for {candidate_id}")
return {"total_score": 0, "breakdown": {}, "error": "Profile embeddings missing"}
if not job_resp.data:
print(f"⚠️ No job embeddings found for {job_id}")
return {"total_score": 0, "breakdown": {}, "error": "Job embeddings missing"}
profile_emb = profile_resp.data[0]
job_emb = job_resp.data[0]
except Exception as e:
print(f"❌ Database error in match score: {e}")
return {"total_score": 0, "breakdown": {}, "error": str(e)}
# 2. Define Weights (Matching SQL function public.match_profile_job)
WEIGHTS = {
"technical_skills": 0.35,
"experience": 0.20,
"projects": 0.15,
"skills": 0.10,
"education": 0.10,
"certifications": 0.10
}
# 3. Calculate Individual Similarities
scores = {}
# Technical Skills
scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
# Experience
scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))
# Projects (Compare profile projects vs job technical skills)
scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))
# Skills
scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
# Education
scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))
# Certifications (Compare profile certs vs job technical skills or skills)
job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)
# 4. Calculate Weighted Total
total_score = 0
available_weight = 0
for key, weight in WEIGHTS.items():
if scores.get(key) is not None:
# Scale to 100 like SQL
total_score += (scores[key] * 100) * weight
available_weight += weight
# Normalize
if available_weight > 0:
final_score = total_score / available_weight
else:
final_score = 0
return {
"total_score": round(final_score, 1),
"breakdown": {k: round(v * 100, 1) for k, v in scores.items()},
"weights": WEIGHTS
}