File size: 1,490 Bytes
100f669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import numpy as np
import pickle
from sklearn.metrics.pairwise import cosine_similarity

def load_embeddings():
    """Load pre-computed embeddings and metadata."""
    
    # Load embeddings
    candidate_embeddings = np.load('data/processed/candidate_embeddings.npy')
    company_embeddings = np.load('data/processed/company_embeddings.npy')
    
    # Load metadata
    with open('data/processed/candidates_processed.pkl', 'rb') as f:
        candidates_df = pickle.load(f)
    
    with open('data/processed/companies_processed.pkl', 'rb') as f:
        companies_df = pickle.load(f)
    
    return candidate_embeddings, company_embeddings, candidates_df, companies_df

def find_top_matches(candidate_idx, candidate_embeddings, company_embeddings, companies_df, top_k=10):
    """Find top K company matches for a candidate."""
    
    # Get candidate embedding
    candidate_vec = candidate_embeddings[candidate_idx].reshape(1, -1)
    
    # Calculate similarities
    similarities = cosine_similarity(candidate_vec, company_embeddings)[0]
    
    # Get top K indices
    top_indices = np.argsort(similarities)[::-1][:top_k]
    
    # Build results
    matches = []
    for idx in top_indices:
        matches.append({
            'company_id': idx,
            'company_name': companies_df.iloc[idx].get('name', f'Company {idx}'),
            'job_title': companies_df.iloc[idx].get('title', 'N/A'),
            'score': float(similarities[idx])
        })
    
    return matches