Spaces:

Rogersurf
/

hrhub

Running

hrhub / data /data_loader.py

Roger Surf

feat: integrate real data - app working with embeddings

100f669 13 days ago

1.49 kB

	import numpy as np
	import pickle
	from sklearn.metrics.pairwise import cosine_similarity

	def load_embeddings():
	"""Load pre-computed embeddings and metadata."""

	# Load embeddings
	candidate_embeddings = np.load('data/processed/candidate_embeddings.npy')
	company_embeddings = np.load('data/processed/company_embeddings.npy')

	# Load metadata
	with open('data/processed/candidates_processed.pkl', 'rb') as f:
	candidates_df = pickle.load(f)

	with open('data/processed/companies_processed.pkl', 'rb') as f:
	companies_df = pickle.load(f)

	return candidate_embeddings, company_embeddings, candidates_df, companies_df

	def find_top_matches(candidate_idx, candidate_embeddings, company_embeddings, companies_df, top_k=10):
	"""Find top K company matches for a candidate."""

	# Get candidate embedding
	candidate_vec = candidate_embeddings[candidate_idx].reshape(1, -1)

	# Calculate similarities
	similarities = cosine_similarity(candidate_vec, company_embeddings)[0]

	# Get top K indices
	top_indices = np.argsort(similarities)[::-1][:top_k]

	# Build results
	matches = []
	for idx in top_indices:
	matches.append({
	'company_id': idx,
	'company_name': companies_df.iloc[idx].get('name', f'Company {idx}'),
	'job_title': companies_df.iloc[idx].get('title', 'N/A'),
	'score': float(similarities[idx])
	})

	return matches