Spaces:

sameer2026
/

iris_backend

Sleeping

File size: 4,230 Bytes


import numpy as np
from typing import Dict, Any, List
from supabase import Client

def cosine_similarity(v1: Any, v2: Any) -> float:
    """Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
    def parse_vector(v):
        if isinstance(v, str):
            try:
                # Remove brackets and split by comma
                return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
            except Exception:
                return []
        return v if isinstance(v, list) else []

    vec1 = parse_vector(v1)
    vec2 = parse_vector(v2)

    if not vec1 or not vec2 or len(vec1) != len(vec2):
        return 0.0
    
    a = np.array(vec1)
    b = np.array(vec2)
    
    # Check if vectors are zero vectors
    if np.all(a == 0) or np.all(b == 0):
        return 0.0
        
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    
    if norm_a == 0 or norm_b == 0:
        return 0.0
        
    return float(dot_product / (norm_a * norm_b))

async def calculate_granular_match_score(client: Client, candidate_id: str, job_id: str) -> Dict[str, Any]:
    """
    Fetches embeddings for candidate and job, calculates entity-wise similarity,
    and returns a weighted total score.
    """
    print(f"📊 Calculating granular match score for Candidate: {candidate_id}, Job: {job_id}")
    
    # 1. Fetch Embeddings
    try:
        profile_resp = client.table("profile_embeddings").select("*").eq("id", candidate_id).execute()
        job_resp = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()
        
        if not profile_resp.data:
            print(f"⚠️ No profile embeddings found for {candidate_id}")
            return {"total_score": 0, "breakdown": {}, "error": "Profile embeddings missing"}
            
        if not job_resp.data:
            print(f"⚠️ No job embeddings found for {job_id}")
            return {"total_score": 0, "breakdown": {}, "error": "Job embeddings missing"}
            
        profile_emb = profile_resp.data[0]
        job_emb = job_resp.data[0]
        
    except Exception as e:
        print(f"❌ Database error in match score: {e}")
        return {"total_score": 0, "breakdown": {}, "error": str(e)}

    # 2. Define Weights (Matching SQL function public.match_profile_job)
    WEIGHTS = {
        "technical_skills": 0.35,
        "experience": 0.20,
        "projects": 0.15,
        "skills": 0.10,
        "education": 0.10,
        "certifications": 0.10
    }
    
    # 3. Calculate Individual Similarities
    scores = {}
    
    # Technical Skills
    scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
    
    # Experience
    scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))
    
    # Projects (Compare profile projects vs job technical skills)
    scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))
    
    # Skills
    scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
    
    # Education
    scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))
    
    # Certifications (Compare profile certs vs job technical skills or skills)
    job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
    scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)
    
    # 4. Calculate Weighted Total
    total_score = 0
    available_weight = 0
    
    for key, weight in WEIGHTS.items():
        if scores.get(key) is not None:
            # Scale to 100 like SQL
            total_score += (scores[key] * 100) * weight
            available_weight += weight
            
    # Normalize
    if available_weight > 0:
        final_score = total_score / available_weight
    else:
        final_score = 0
        
    return {
        "total_score": round(final_score, 1),
        "breakdown": {k: round(v * 100, 1) for k, v in scores.items()},
        "weights": WEIGHTS
    }