import re from datetime import datetime from collections import defaultdict from fuzzywuzzy import process, fuzz from parse_job_description import extract_job_details from data import resumes_data import pandas as pd # import multiprocessing as mp # from functools import partial def extract_experience(text): # Patterns for identifying Bachelor's and Master's degrees # print('Inside Extract Experience !!!') bachelors_patterns = [ 'bachelor', 'be', 'b.e.', 'b.tech', 'btech', 'bachelor of engineering', 'graduation' ] masters_patterns = [ 'master', 'm.e.', 'me', 'master of engineering' ] # Find all years in the text all_years = re.findall(r'\b(\d{4})\b', text) all_years = sorted(map(int, all_years), reverse=True) # look for Bachelor's degree year for pattern in bachelors_patterns: for year in all_years: if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE): current_year = datetime.now().year return current_year - year for pattern in masters_patterns: for year in all_years: if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE): current_year = datetime.now().year return current_year - year if all_years: current_year = datetime.now().year return current_year - all_years[0] return 0 # current_time=datetime.now() # df=resumes_data() # exp=extract_experience(df['Resume'][10]) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time) def extract_skills(text, job_details): job_skills=job_details['Skills'] # print('Inside Extract Skills !!!') found_skills=[] for skill in job_skills: best_match = process.extractOne(skill.lower(), text.lower().split()) if best_match[1] >= 95: found_skills.append(skill) return found_skills # current_time=datetime.now() # df=resumes_data() # exp=extract_skills(df['Resume'][10], job_details) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time) def extract_education(text, job_details): # print('Inside Extract Education!!!') education_patterns = job_details['Education'] max_ratio=0 for degree in education_patterns: # Fuzzy matching match_ratio = fuzz.partial_ratio(degree.lower(), text.lower()) if match_ratio > max_ratio: max_ratio=match_ratio return max_ratio # current_time=datetime.now() # df=resumes_data() # exp=extract_education(df['Resume'][10], job_details) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time) def match_personality_traits(resume_traits,job_details, threshold=70): # print('Inside Match Personality!!!') """ Matches personality traits from a job description with those in a candidate's resume using fuzzy matching. Parameters: - job_traits (list of str): Personality traits from the job description. - resume_traits (list of str): Personality traits from the candidate's resume. - threshold (int): Minimum similarity score (0-100) for a valid match. Returns: - dict: Mapping of job traits to best-matched resume traits with similarity scores. """ matches = {} job_traits= job_details['Personality Traits'] for job_trait in job_traits: best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio) # Only return a match if the score meets the threshold if score >= threshold: matches[job_trait] = {'Matched Trait': best_match, 'Score': score} else: matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score} return matches # current_time=datetime.now() # df=resumes_data() # exp=match_personality_traits(df['Resume'][10], job_details) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time) def scoring(resume_text, job_description): # print('inside scoring ..............') # Extract all required information from the resume text and compare with job_description matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here traits = match_personality_traits(resume_text, job_description) # Pass job_description here experience = extract_experience(resume_text) # Pass job_description here education_relevance = extract_education(resume_text, job_description)/100 # Pass job_description here # Calculate trait flags trait_flags = list({ trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1 for trait in traits }.values())[0] # Return a dictionary with all calculated values return { 'matched_skills': matched_skills, 'experience': experience, 'education_relevance': education_relevance, 'trait_flag': trait_flags } # current_time=datetime.now() # df=resumes_data() # exp=scoring(df['Resume'][10], job_details) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time) def get_scores_optimized(df, job_description): # Calculate all scores in a single apply operation # print('Lets see how much time it takes now !!!!') results = df['Resume'].apply(lambda x: scoring(x, job_description)) # Convert the series of dictionaries into a DataFrame and join with original scores_df = pd.DataFrame(results.tolist(), index=df.index) # Return the original dataframe with the new columns return pd.concat([df, scores_df], axis=1) # import pandas as pd # import multiprocessing as mp # from functools import partial # import time # First, ensure all the helper functions are defined at the module level # These are the functions called by scoring(): extract_skills, match_personality_traits, # extract_experience, and extract_education # def get_scores_optimized(df, job_description): # print('inside scores optimized..............') # start_time = time.time() # # Method 1: Use chunking with the original apply method # chunk_size = 32 # results = [] # for i in range(0, len(df), chunk_size): # chunk = df.iloc[i:i+chunk_size] # chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description)) # results.extend(chunk_results.tolist()) # # Convert the list of dictionaries into a DataFrame and join with original # scores_df = pd.DataFrame(results, index=df.index) # end_time = time.time() # print(f"Processing took {end_time - start_time:.2f} seconds") # # Return the original dataframe with the new columns # return pd.concat([df, scores_df], axis=1) # current_time=datetime.now() # df=resumes_data() # exp=get_scores_optimized(df, job_details) # print (exp) # end_time=datetime.now() # print('total time:', end_time-current_time)