avimittal30's picture
pushing final changes
6c5d253
import re
from datetime import datetime
from collections import defaultdict
from fuzzywuzzy import process, fuzz
from parse_job_description import extract_job_details
from data import resumes_data
import pandas as pd
# import multiprocessing as mp
# from functools import partial
def extract_experience(text):
# Patterns for identifying Bachelor's and Master's degrees
# print('Inside Extract Experience !!!')
bachelors_patterns = [
'bachelor', 'be', 'b.e.', 'b.tech', 'btech',
'bachelor of engineering', 'graduation'
]
masters_patterns = [
'master', 'm.e.', 'me', 'master of engineering'
]
# Find all years in the text
all_years = re.findall(r'\b(\d{4})\b', text)
all_years = sorted(map(int, all_years), reverse=True)
# look for Bachelor's degree year
for pattern in bachelors_patterns:
for year in all_years:
if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
current_year = datetime.now().year
return current_year - year
for pattern in masters_patterns:
for year in all_years:
if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE):
current_year = datetime.now().year
return current_year - year
if all_years:
current_year = datetime.now().year
return current_year - all_years[0]
return 0
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_experience(df['Resume'][10])
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def extract_skills(text, job_details):
job_skills=job_details['Skills']
# print('Inside Extract Skills !!!')
found_skills=[]
for skill in job_skills:
best_match = process.extractOne(skill.lower(), text.lower().split())
if best_match[1] >= 95:
found_skills.append(skill)
return found_skills
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_skills(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def extract_education(text, job_details):
# print('Inside Extract Education!!!')
education_patterns = job_details['Education']
max_ratio=0
for degree in education_patterns:
# Fuzzy matching
match_ratio = fuzz.partial_ratio(degree.lower(), text.lower())
if match_ratio > max_ratio:
max_ratio=match_ratio
return max_ratio
# current_time=datetime.now()
# df=resumes_data()
# exp=extract_education(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def match_personality_traits(resume_traits,job_details, threshold=70):
# print('Inside Match Personality!!!')
"""
Matches personality traits from a job description with those in a candidate's resume using fuzzy matching.
Parameters:
- job_traits (list of str): Personality traits from the job description.
- resume_traits (list of str): Personality traits from the candidate's resume.
- threshold (int): Minimum similarity score (0-100) for a valid match.
Returns:
- dict: Mapping of job traits to best-matched resume traits with similarity scores.
"""
matches = {}
job_traits= job_details['Personality Traits']
for job_trait in job_traits:
best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio)
# Only return a match if the score meets the threshold
if score >= threshold:
matches[job_trait] = {'Matched Trait': best_match, 'Score': score}
else:
matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score}
return matches
# current_time=datetime.now()
# df=resumes_data()
# exp=match_personality_traits(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def scoring(resume_text, job_description):
# print('inside scoring ..............')
# Extract all required information from the resume text and compare with job_description
matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here
traits = match_personality_traits(resume_text, job_description) # Pass job_description here
experience = extract_experience(resume_text) # Pass job_description here
education_relevance = extract_education(resume_text, job_description)/100 # Pass job_description here
# Calculate trait flags
trait_flags = list({
trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1
for trait in traits
}.values())[0]
# Return a dictionary with all calculated values
return {
'matched_skills': matched_skills,
'experience': experience,
'education_relevance': education_relevance,
'trait_flag': trait_flags
}
# current_time=datetime.now()
# df=resumes_data()
# exp=scoring(df['Resume'][10], job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)
def get_scores_optimized(df, job_description):
# Calculate all scores in a single apply operation
# print('Lets see how much time it takes now !!!!')
results = df['Resume'].apply(lambda x: scoring(x, job_description))
# Convert the series of dictionaries into a DataFrame and join with original
scores_df = pd.DataFrame(results.tolist(), index=df.index)
# Return the original dataframe with the new columns
return pd.concat([df, scores_df], axis=1)
# import pandas as pd
# import multiprocessing as mp
# from functools import partial
# import time
# First, ensure all the helper functions are defined at the module level
# These are the functions called by scoring(): extract_skills, match_personality_traits,
# extract_experience, and extract_education
# def get_scores_optimized(df, job_description):
# print('inside scores optimized..............')
# start_time = time.time()
# # Method 1: Use chunking with the original apply method
# chunk_size = 32
# results = []
# for i in range(0, len(df), chunk_size):
# chunk = df.iloc[i:i+chunk_size]
# chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description))
# results.extend(chunk_results.tolist())
# # Convert the list of dictionaries into a DataFrame and join with original
# scores_df = pd.DataFrame(results, index=df.index)
# end_time = time.time()
# print(f"Processing took {end_time - start_time:.2f} seconds")
# # Return the original dataframe with the new columns
# return pd.concat([df, scores_df], axis=1)
# current_time=datetime.now()
# df=resumes_data()
# exp=get_scores_optimized(df, job_details)
# print (exp)
# end_time=datetime.now()
# print('total time:', end_time-current_time)