File size: 2,428 Bytes
f15d7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
Matching algorithms for HRHUB.
Contains cosine similarity and matching logic.
"""

import numpy as np
from typing import List, Tuple
from sklearn.metrics.pairwise import cosine_similarity


def compute_similarity(
    candidate_embedding: np.ndarray,
    company_embeddings: np.ndarray
) -> np.ndarray:
    """
    Compute cosine similarity between candidate and all companies.
    
    Args:
        candidate_embedding: Single candidate vector (384,)
        company_embeddings: All company vectors (N, 384)
    
    Returns:
        Similarity scores array (N,)
    """
    
    # Reshape candidate to (1, 384) for sklearn
    candidate_reshaped = candidate_embedding.reshape(1, -1)
    
    # Compute cosine similarity
    similarities = cosine_similarity(candidate_reshaped, company_embeddings)
    
    # Return as 1D array
    return similarities.flatten()


def find_top_matches(
    candidate_embedding: np.ndarray,
    company_embeddings: np.ndarray,
    top_k: int = 10,
    min_score: float = 0.0
) -> List[Tuple[int, float]]:
    """
    Find top K company matches for a candidate.
    
    Args:
        candidate_embedding: Candidate vector
        company_embeddings: All company vectors
        top_k: Number of top matches to return
        min_score: Minimum similarity score threshold
    
    Returns:
        List of (company_index, similarity_score) tuples
    """
    
    # Compute all similarities
    similarities = compute_similarity(candidate_embedding, company_embeddings)
    
    # Filter by minimum score
    valid_indices = np.where(similarities >= min_score)[0]
    valid_scores = similarities[valid_indices]
    
    # Sort by score (descending)
    sorted_idx = np.argsort(valid_scores)[::-1]
    
    # Get top K
    top_indices = valid_indices[sorted_idx][:top_k]
    top_scores = valid_scores[sorted_idx][:top_k]
    
    # Return as list of tuples
    return list(zip(top_indices.tolist(), top_scores.tolist()))


def compute_match_strength(score: float) -> str:
    """
    Convert similarity score to human-readable strength.
    
    Args:
        score: Similarity score (0-1)
    
    Returns:
        Match strength label
    """
    
    if score >= 0.8:
        return "🔥 Excellent"
    elif score >= 0.7:
        return "✨ Very Good"
    elif score >= 0.6:
        return "👍 Good"
    elif score >= 0.5:
        return "✓ Fair"
    else:
        return "⚠ Weak"