Spaces:

Rogersurf
/

hrhub

Running

File size: 1,490 Bytes

100f669

import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity

def load_embeddings():
    """Load pre-computed embeddings and metadata."""
    
    # Load embeddings
    candidate_embeddings = np.load('data/processed/candidate_embeddings.npy')
    company_embeddings = np.load('data/processed/company_embeddings.npy')
    
    # Load metadata
    with open('data/processed/candidates_processed.pkl', 'rb') as f:
        candidates_df = pickle.load(f)
    
    with open('data/processed/companies_processed.pkl', 'rb') as f:
        companies_df = pickle.load(f)
    
    return candidate_embeddings, company_embeddings, candidates_df, companies_df

def find_top_matches(candidate_idx, candidate_embeddings, company_embeddings, companies_df, top_k=10):
    """Find top K company matches for a candidate."""
    
    # Get candidate embedding
    candidate_vec = candidate_embeddings[candidate_idx].reshape(1, -1)
    
    # Calculate similarities
    similarities = cosine_similarity(candidate_vec, company_embeddings)[0]
    
    # Get top K indices
    top_indices = np.argsort(similarities)[::-1][:top_k]
    
    # Build results
    matches = []
    for idx in top_indices:
        matches.append({
            'company_id': idx,
            'company_name': companies_df.iloc[idx].get('name', f'Company {idx}'),
            'job_title': companies_df.iloc[idx].get('title', 'N/A'),
            'score': float(similarities[idx])
        })
    
    return matches