Spaces:

Aqdas
/

CV-Extractor

Build error

App Files Files Community

CV-Extractor / resume_recommender.py

Aqdas

Update resume_recommender.py

7cf3d3b verified over 1 year ago

raw

history blame contribute delete

2.64 kB

	from sklearn.metrics.pairwise import cosine_similarity
	from transformers import BertTokenizer, BertModel
	import torch
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer

	import pandas as pd
	import numpy as np

	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
	model = BertModel.from_pretrained('bert-base-uncased')

	def preprocess_text(text: str) -> str:
	nltk.download('stopwords')
	nltk.download('wordnet')
	stop_words = set(stopwords.words('english'))
	lemmatizer = WordNetLemmatizer()
	words = text.split()
	words = [lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in stop_words]
	return ' '.join(words)

	def get_bert_embedding(text: str):
	inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')
	outputs = model(**inputs)
	return outputs.last_hidden_state.mean(dim=1).detach().numpy()


	def recommend_resume(resume_df: pd.DataFrame, job_desc: str) -> pd.DataFrame:

	'''Take Resume and return the recommended resumes align with the job description'''
	recommended_resumes = pd.DataFrame()
	rows = resume_df.shape[0]
	job_description_embedding = get_bert_embedding(job_desc)
	all_similarity_scores = []
	for row in range(rows):
	print('This is row', row)
	print("This is row skill: ",resume_df['skills'].iloc[row])

	resume_skill_str = " ".join(resume_df['skills'].iloc[row])
	resume_aboutSec_str = " ".join(resume_df['about_section'].iloc[row])


	resume_text = preprocess_text(" ".join(resume_skill_str + " " + resume_aboutSec_str))
	resume_embedding = get_bert_embedding(resume_text)
	similarity_score = cosine_similarity(resume_embedding, job_description_embedding).flatten()[0]
	experience_match = resume_df['past_company_experience'].iloc[row] >= 0
	print('similarity_score: ',similarity_score, 'experience_match: ',experience_match )
	all_similarity_scores.append(similarity_score)
	matching_threshold = 0.0
	if experience_match:
	if similarity_score >= matching_threshold:
	recommended_resumes = pd.concat([recommended_resumes, resume_df.iloc[[row]]])
	print("Resume matches the job description.")
	else:
	print("similarity score is out of threshold")
	continue
	else:
	print("Resume does not match the job description.")
	print('one complete')
	recommended_resumes['similarity_score'] = all_similarity_scores
	return recommended_resumes