Spaces:

sameer2026
/

iris_backend

Sleeping

App Files Files Community

iris_backend / backend /src /matching /similarity.py

Saandraahh

Implemented clustering

4b3a33f 2 months ago

raw

history blame contribute delete

4.23 kB


	import numpy as np
	from typing import Dict, Any, List
	from supabase import Client

	def cosine_similarity(v1: Any, v2: Any) -> float:
	"""Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
	def parse_vector(v):
	if isinstance(v, str):
	try:
	# Remove brackets and split by comma
	return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
	except Exception:
	return []
	return v if isinstance(v, list) else []

	vec1 = parse_vector(v1)
	vec2 = parse_vector(v2)

	if not vec1 or not vec2 or len(vec1) != len(vec2):
	return 0.0

	a = np.array(vec1)
	b = np.array(vec2)

	# Check if vectors are zero vectors
	if np.all(a == 0) or np.all(b == 0):
	return 0.0

	dot_product = np.dot(a, b)
	norm_a = np.linalg.norm(a)
	norm_b = np.linalg.norm(b)

	if norm_a == 0 or norm_b == 0:
	return 0.0

	return float(dot_product / (norm_a * norm_b))

	async def calculate_granular_match_score(client: Client, candidate_id: str, job_id: str) -> Dict[str, Any]:
	"""
	Fetches embeddings for candidate and job, calculates entity-wise similarity,
	and returns a weighted total score.
	"""
	print(f"📊 Calculating granular match score for Candidate: {candidate_id}, Job: {job_id}")

	# 1. Fetch Embeddings
	try:
	profile_resp = client.table("profile_embeddings").select("*").eq("id", candidate_id).execute()
	job_resp = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()

	if not profile_resp.data:
	print(f"⚠️ No profile embeddings found for {candidate_id}")
	return {"total_score": 0, "breakdown": {}, "error": "Profile embeddings missing"}

	if not job_resp.data:
	print(f"⚠️ No job embeddings found for {job_id}")
	return {"total_score": 0, "breakdown": {}, "error": "Job embeddings missing"}

	profile_emb = profile_resp.data[0]
	job_emb = job_resp.data[0]

	except Exception as e:
	print(f"❌ Database error in match score: {e}")
	return {"total_score": 0, "breakdown": {}, "error": str(e)}

	# 2. Define Weights (Matching SQL function public.match_profile_job)
	WEIGHTS = {
	"technical_skills": 0.35,
	"experience": 0.20,
	"projects": 0.15,
	"skills": 0.10,
	"education": 0.10,
	"certifications": 0.10
	}

	# 3. Calculate Individual Similarities
	scores = {}

	# Technical Skills
	scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))

	# Experience
	scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))

	# Projects (Compare profile projects vs job technical skills)
	scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))

	# Skills
	scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))

	# Education
	scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))

	# Certifications (Compare profile certs vs job technical skills or skills)
	job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
	scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)

	# 4. Calculate Weighted Total
	total_score = 0
	available_weight = 0

	for key, weight in WEIGHTS.items():
	if scores.get(key) is not None:
	# Scale to 100 like SQL
	total_score += (scores[key] * 100) * weight
	available_weight += weight

	# Normalize
	if available_weight > 0:
	final_score = total_score / available_weight
	else:
	final_score = 0

	return {
	"total_score": round(final_score, 1),
	"breakdown": {k: round(v * 100, 1) for k, v in scores.items()},
	"weights": WEIGHTS
	}