|
|
import numpy as np |
|
|
import pickle |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
|
|
|
def load_embeddings(): |
|
|
"""Load pre-computed embeddings and metadata.""" |
|
|
|
|
|
|
|
|
candidate_embeddings = np.load('data/processed/candidate_embeddings.npy') |
|
|
company_embeddings = np.load('data/processed/company_embeddings.npy') |
|
|
|
|
|
|
|
|
with open('data/processed/candidates_processed.pkl', 'rb') as f: |
|
|
candidates_df = pickle.load(f) |
|
|
|
|
|
with open('data/processed/companies_processed.pkl', 'rb') as f: |
|
|
companies_df = pickle.load(f) |
|
|
|
|
|
return candidate_embeddings, company_embeddings, candidates_df, companies_df |
|
|
|
|
|
def find_top_matches(candidate_idx, candidate_embeddings, company_embeddings, companies_df, top_k=10): |
|
|
"""Find top K company matches for a candidate.""" |
|
|
|
|
|
|
|
|
candidate_vec = candidate_embeddings[candidate_idx].reshape(1, -1) |
|
|
|
|
|
|
|
|
similarities = cosine_similarity(candidate_vec, company_embeddings)[0] |
|
|
|
|
|
|
|
|
top_indices = np.argsort(similarities)[::-1][:top_k] |
|
|
|
|
|
|
|
|
matches = [] |
|
|
for idx in top_indices: |
|
|
matches.append({ |
|
|
'company_id': idx, |
|
|
'company_name': companies_df.iloc[idx].get('name', f'Company {idx}'), |
|
|
'job_title': companies_df.iloc[idx].get('title', 'N/A'), |
|
|
'score': float(similarities[idx]) |
|
|
}) |
|
|
|
|
|
return matches |