File size: 4,230 Bytes
ad01d65
 
 
 
 
4b3a33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad01d65
 
4b3a33f
 
ad01d65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4b3a33f
ad01d65
 
 
4b3a33f
 
 
ad01d65
 
 
 
 
 
4b3a33f
ad01d65
 
4b3a33f
 
 
 
 
 
 
 
 
 
 
ad01d65
4b3a33f
 
 
ad01d65
 
 
 
 
 
 
4b3a33f
 
ad01d65
 
4b3a33f
ad01d65
4b3a33f
ad01d65
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

import numpy as np
from typing import Dict, Any, List
from supabase import Client

def cosine_similarity(v1: Any, v2: Any) -> float:
    """Calculates cosine similarity between two vectors, handling both lists and pgvector strings."""
    def parse_vector(v):
        if isinstance(v, str):
            try:
                # Remove brackets and split by comma
                return [float(x.strip()) for x in v.strip('[]').split(',') if x.strip()]
            except Exception:
                return []
        return v if isinstance(v, list) else []

    vec1 = parse_vector(v1)
    vec2 = parse_vector(v2)

    if not vec1 or not vec2 or len(vec1) != len(vec2):
        return 0.0
    
    a = np.array(vec1)
    b = np.array(vec2)
    
    # Check if vectors are zero vectors
    if np.all(a == 0) or np.all(b == 0):
        return 0.0
        
    dot_product = np.dot(a, b)
    norm_a = np.linalg.norm(a)
    norm_b = np.linalg.norm(b)
    
    if norm_a == 0 or norm_b == 0:
        return 0.0
        
    return float(dot_product / (norm_a * norm_b))

async def calculate_granular_match_score(client: Client, candidate_id: str, job_id: str) -> Dict[str, Any]:
    """
    Fetches embeddings for candidate and job, calculates entity-wise similarity,
    and returns a weighted total score.
    """
    print(f"📊 Calculating granular match score for Candidate: {candidate_id}, Job: {job_id}")
    
    # 1. Fetch Embeddings
    try:
        profile_resp = client.table("profile_embeddings").select("*").eq("id", candidate_id).execute()
        job_resp = client.table("job_embeddings").select("*").eq("job_id", job_id).execute()
        
        if not profile_resp.data:
            print(f"⚠️ No profile embeddings found for {candidate_id}")
            return {"total_score": 0, "breakdown": {}, "error": "Profile embeddings missing"}
            
        if not job_resp.data:
            print(f"⚠️ No job embeddings found for {job_id}")
            return {"total_score": 0, "breakdown": {}, "error": "Job embeddings missing"}
            
        profile_emb = profile_resp.data[0]
        job_emb = job_resp.data[0]
        
    except Exception as e:
        print(f"❌ Database error in match score: {e}")
        return {"total_score": 0, "breakdown": {}, "error": str(e)}

    # 2. Define Weights (Matching SQL function public.match_profile_job)
    WEIGHTS = {
        "technical_skills": 0.35,
        "experience": 0.20,
        "projects": 0.15,
        "skills": 0.10,
        "education": 0.10,
        "certifications": 0.10
    }
    
    # 3. Calculate Individual Similarities
    scores = {}
    
    # Technical Skills
    scores["technical_skills"] = cosine_similarity(profile_emb.get("technical_skills"), job_emb.get("technical_skills"))
    
    # Experience
    scores["experience"] = cosine_similarity(profile_emb.get("experience"), job_emb.get("work_experience"))
    
    # Projects (Compare profile projects vs job technical skills)
    scores["projects"] = cosine_similarity(profile_emb.get("projects"), job_emb.get("technical_skills"))
    
    # Skills
    scores["skills"] = cosine_similarity(profile_emb.get("skills"), job_emb.get("skills"))
    
    # Education
    scores["education"] = cosine_similarity(profile_emb.get("education"), job_emb.get("education"))
    
    # Certifications (Compare profile certs vs job technical skills or skills)
    job_target = job_emb.get("technical_skills") if job_emb.get("technical_skills") else job_emb.get("skills")
    scores["certifications"] = cosine_similarity(profile_emb.get("certifications"), job_target)
    
    # 4. Calculate Weighted Total
    total_score = 0
    available_weight = 0
    
    for key, weight in WEIGHTS.items():
        if scores.get(key) is not None:
            # Scale to 100 like SQL
            total_score += (scores[key] * 100) * weight
            available_weight += weight
            
    # Normalize
    if available_weight > 0:
        final_score = total_score / available_weight
    else:
        final_score = 0
        
    return {
        "total_score": round(final_score, 1),
        "breakdown": {k: round(v * 100, 1) for k, v in scores.items()},
        "weights": WEIGHTS
    }