Spaces:
Sleeping
Sleeping
| """ | |
| Matching algorithms for HRHUB. | |
| Contains cosine similarity and matching logic. | |
| """ | |
| import numpy as np | |
| from typing import List, Tuple | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def compute_similarity( | |
| candidate_embedding: np.ndarray, | |
| company_embeddings: np.ndarray | |
| ) -> np.ndarray: | |
| """ | |
| Compute cosine similarity between candidate and all companies. | |
| Args: | |
| candidate_embedding: Single candidate vector (384,) | |
| company_embeddings: All company vectors (N, 384) | |
| Returns: | |
| Similarity scores array (N,) | |
| """ | |
| # Reshape candidate to (1, 384) for sklearn | |
| candidate_reshaped = candidate_embedding.reshape(1, -1) | |
| # Compute cosine similarity | |
| similarities = cosine_similarity(candidate_reshaped, company_embeddings) | |
| # Return as 1D array | |
| return similarities.flatten() | |
| def find_top_matches( | |
| candidate_embedding: np.ndarray, | |
| company_embeddings: np.ndarray, | |
| top_k: int = 10, | |
| min_score: float = 0.0 | |
| ) -> List[Tuple[int, float]]: | |
| """ | |
| Find top K company matches for a candidate. | |
| Args: | |
| candidate_embedding: Candidate vector | |
| company_embeddings: All company vectors | |
| top_k: Number of top matches to return | |
| min_score: Minimum similarity score threshold | |
| Returns: | |
| List of (company_index, similarity_score) tuples | |
| """ | |
| # Compute all similarities | |
| similarities = compute_similarity(candidate_embedding, company_embeddings) | |
| # Filter by minimum score | |
| valid_indices = np.where(similarities >= min_score)[0] | |
| valid_scores = similarities[valid_indices] | |
| # Sort by score (descending) | |
| sorted_idx = np.argsort(valid_scores)[::-1] | |
| # Get top K | |
| top_indices = valid_indices[sorted_idx][:top_k] | |
| top_scores = valid_scores[sorted_idx][:top_k] | |
| # Return as list of tuples | |
| return list(zip(top_indices.tolist(), top_scores.tolist())) | |
| def compute_match_strength(score: float) -> str: | |
| """ | |
| Convert similarity score to human-readable strength. | |
| Args: | |
| score: Similarity score (0-1) | |
| Returns: | |
| Match strength label | |
| """ | |
| if score >= 0.8: | |
| return "🔥 Excellent" | |
| elif score >= 0.7: | |
| return "✨ Very Good" | |
| elif score >= 0.6: | |
| return "👍 Good" | |
| elif score >= 0.5: | |
| return "✓ Fair" | |
| else: | |
| return "⚠ Weak" | |