Spaces:

avimittal30
/

candidate-recommender

Sleeping

App Files Files Community

candidate-recommender / helper.py

avimittal30

pushing final changes

6c5d253 9 months ago

raw

history blame contribute delete

6.98 kB

	import re
	from datetime import datetime
	from collections import defaultdict
	from fuzzywuzzy import process, fuzz
	from parse_job_description import extract_job_details
	from data import resumes_data
	import pandas as pd
	# import multiprocessing as mp
	# from functools import partial

	def extract_experience(text):
	# Patterns for identifying Bachelor's and Master's degrees
	# print('Inside Extract Experience !!!')
	bachelors_patterns = [
	'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
	'bachelor of engineering', 'graduation'
	]
	masters_patterns = [
	'master', 'm.e.', 'me', 'master of engineering'
	]

	# Find all years in the text
	all_years = re.findall(r'\b(\d{4})\b', text)
	all_years = sorted(map(int, all_years), reverse=True)

	# look for Bachelor's degree year
	for pattern in bachelors_patterns:
	for year in all_years:
	if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
	current_year = datetime.now().year
	return current_year - year

	for pattern in masters_patterns:
	for year in all_years:
	if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
	current_year = datetime.now().year
	return current_year - year

	if all_years:
	current_year = datetime.now().year
	return current_year - all_years[0]

	return 0

	# current_time=datetime.now()
	# df=resumes_data()
	# exp=extract_experience(df['Resume'][10])
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)



	def extract_skills(text, job_details):
	job_skills=job_details['Skills']
	# print('Inside Extract Skills !!!')
	found_skills=[]
	for skill in job_skills:
	best_match = process.extractOne(skill.lower(), text.lower().split())
	if best_match[1] >= 95:
	found_skills.append(skill)
	return found_skills


	# current_time=datetime.now()
	# df=resumes_data()
	# exp=extract_skills(df['Resume'][10], job_details)
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)


	def extract_education(text, job_details):
	# print('Inside Extract Education!!!')
	education_patterns = job_details['Education']
	max_ratio=0
	for degree in education_patterns:
	# Fuzzy matching
	match_ratio = fuzz.partial_ratio(degree.lower(), text.lower())
	if match_ratio > max_ratio:
	max_ratio=match_ratio

	return max_ratio


	# current_time=datetime.now()
	# df=resumes_data()
	# exp=extract_education(df['Resume'][10], job_details)
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)



	def match_personality_traits(resume_traits,job_details, threshold=70):

	# print('Inside Match Personality!!!')
	"""
	Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.

	Parameters:
	- job_traits (list of str): Personality traits from the job description.
	- resume_traits (list of str): Personality traits from the candidate's resume.
	- threshold (int): Minimum similarity score (0-100) for a valid match.

	Returns:
	- dict: Mapping of job traits to best-matched resume traits with similarity scores.
	"""
	matches = {}
	job_traits= job_details['Personality Traits']

	for job_trait in job_traits:
	best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio)

	# Only return a match if the score meets the threshold
	if score >= threshold:
	matches[job_trait] = {'Matched Trait': best_match, 'Score': score}
	else:
	matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score}

	return matches


	# current_time=datetime.now()
	# df=resumes_data()
	# exp=match_personality_traits(df['Resume'][10], job_details)
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)

	def scoring(resume_text, job_description):
	# print('inside scoring ..............')
	# Extract all required information from the resume text and compare with job_description
	matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here
	traits = match_personality_traits(resume_text, job_description) # Pass job_description here
	experience = extract_experience(resume_text) # Pass job_description here
	education_relevance = extract_education(resume_text, job_description)/100 # Pass job_description here

	# Calculate trait flags
	trait_flags = list({
	trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1
	for trait in traits
	}.values())[0]

	# Return a dictionary with all calculated values
	return {
	'matched_skills': matched_skills,
	'experience': experience,
	'education_relevance': education_relevance,
	'trait_flag': trait_flags
	}


	# current_time=datetime.now()
	# df=resumes_data()
	# exp=scoring(df['Resume'][10], job_details)
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)



	def get_scores_optimized(df, job_description):

	# Calculate all scores in a single apply operation
	# print('Lets see how much time it takes now !!!!')

	results = df['Resume'].apply(lambda x: scoring(x, job_description))


	# Convert the series of dictionaries into a DataFrame and join with original
	scores_df = pd.DataFrame(results.tolist(), index=df.index)

	# Return the original dataframe with the new columns
	return pd.concat([df, scores_df], axis=1)


	# import pandas as pd
	# import multiprocessing as mp
	# from functools import partial
	# import time

	# First, ensure all the helper functions are defined at the module level
	# These are the functions called by scoring(): extract_skills, match_personality_traits,
	# extract_experience, and extract_education

	# def get_scores_optimized(df, job_description):
	# print('inside scores optimized..............')
	# start_time = time.time()

	# # Method 1: Use chunking with the original apply method
	# chunk_size = 32
	# results = []

	# for i in range(0, len(df), chunk_size):
	# chunk = df.iloc[i:i+chunk_size]
	# chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
	# results.extend(chunk_results.tolist())

	# # Convert the list of dictionaries into a DataFrame and join with original
	# scores_df = pd.DataFrame(results, index=df.index)

	# end_time = time.time()
	# print(f"Processing took {end_time - start_time:.2f} seconds")

	# # Return the original dataframe with the new columns
	# return pd.concat([df, scores_df], axis=1)


	# current_time=datetime.now()
	# df=resumes_data()
	# exp=get_scores_optimized(df, job_details)
	# print (exp)
	# end_time=datetime.now()
	# print('total time:', end_time-current_time)