Spaces:
Sleeping
Sleeping
| import re | |
| from datetime import datetime | |
| from collections import defaultdict | |
| from fuzzywuzzy import process, fuzz | |
| from parse_job_description import extract_job_details | |
| from data import resumes_data | |
| import pandas as pd | |
| # import multiprocessing as mp | |
| # from functools import partial | |
| def extract_experience(text): | |
| # Patterns for identifying Bachelor's and Master's degrees | |
| # print('Inside Extract Experience !!!') | |
| bachelors_patterns = [ | |
| 'bachelor', 'be', 'b.e.', 'b.tech', 'btech', | |
| 'bachelor of engineering', 'graduation' | |
| ] | |
| masters_patterns = [ | |
| 'master', 'm.e.', 'me', 'master of engineering' | |
| ] | |
| # Find all years in the text | |
| all_years = re.findall(r'\b(\d{4})\b', text) | |
| all_years = sorted(map(int, all_years), reverse=True) | |
| # look for Bachelor's degree year | |
| for pattern in bachelors_patterns: | |
| for year in all_years: | |
| if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE): | |
| current_year = datetime.now().year | |
| return current_year - year | |
| for pattern in masters_patterns: | |
| for year in all_years: | |
| if re.search(fr'{pattern}.*?{year}', text, re.IGNORECASE): | |
| current_year = datetime.now().year | |
| return current_year - year | |
| if all_years: | |
| current_year = datetime.now().year | |
| return current_year - all_years[0] | |
| return 0 | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=extract_experience(df['Resume'][10]) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |
| def extract_skills(text, job_details): | |
| job_skills=job_details['Skills'] | |
| # print('Inside Extract Skills !!!') | |
| found_skills=[] | |
| for skill in job_skills: | |
| best_match = process.extractOne(skill.lower(), text.lower().split()) | |
| if best_match[1] >= 95: | |
| found_skills.append(skill) | |
| return found_skills | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=extract_skills(df['Resume'][10], job_details) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |
| def extract_education(text, job_details): | |
| # print('Inside Extract Education!!!') | |
| education_patterns = job_details['Education'] | |
| max_ratio=0 | |
| for degree in education_patterns: | |
| # Fuzzy matching | |
| match_ratio = fuzz.partial_ratio(degree.lower(), text.lower()) | |
| if match_ratio > max_ratio: | |
| max_ratio=match_ratio | |
| return max_ratio | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=extract_education(df['Resume'][10], job_details) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |
| def match_personality_traits(resume_traits,job_details, threshold=70): | |
| # print('Inside Match Personality!!!') | |
| """ | |
| Matches personality traits from a job description with those in a candidate's resume using fuzzy matching. | |
| Parameters: | |
| - job_traits (list of str): Personality traits from the job description. | |
| - resume_traits (list of str): Personality traits from the candidate's resume. | |
| - threshold (int): Minimum similarity score (0-100) for a valid match. | |
| Returns: | |
| - dict: Mapping of job traits to best-matched resume traits with similarity scores. | |
| """ | |
| matches = {} | |
| job_traits= job_details['Personality Traits'] | |
| for job_trait in job_traits: | |
| best_match, score = process.extractOne(job_trait, resume_traits, scorer=fuzz.token_sort_ratio) | |
| # Only return a match if the score meets the threshold | |
| if score >= threshold: | |
| matches[job_trait] = {'Matched Trait': best_match, 'Score': score} | |
| else: | |
| matches[job_trait] = {'Matched Trait': "No suitable match", 'Score': score} | |
| return matches | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=match_personality_traits(df['Resume'][10], job_details) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |
| def scoring(resume_text, job_description): | |
| # print('inside scoring ..............') | |
| # Extract all required information from the resume text and compare with job_description | |
| matched_skills = len(extract_skills(resume_text, job_description)) # Pass job_description here | |
| traits = match_personality_traits(resume_text, job_description) # Pass job_description here | |
| experience = extract_experience(resume_text) # Pass job_description here | |
| education_relevance = extract_education(resume_text, job_description)/100 # Pass job_description here | |
| # Calculate trait flags | |
| trait_flags = list({ | |
| trait: 0 if traits[trait]['Matched Trait'] == 'No suitable match' else 1 | |
| for trait in traits | |
| }.values())[0] | |
| # Return a dictionary with all calculated values | |
| return { | |
| 'matched_skills': matched_skills, | |
| 'experience': experience, | |
| 'education_relevance': education_relevance, | |
| 'trait_flag': trait_flags | |
| } | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=scoring(df['Resume'][10], job_details) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |
| def get_scores_optimized(df, job_description): | |
| # Calculate all scores in a single apply operation | |
| # print('Lets see how much time it takes now !!!!') | |
| results = df['Resume'].apply(lambda x: scoring(x, job_description)) | |
| # Convert the series of dictionaries into a DataFrame and join with original | |
| scores_df = pd.DataFrame(results.tolist(), index=df.index) | |
| # Return the original dataframe with the new columns | |
| return pd.concat([df, scores_df], axis=1) | |
| # import pandas as pd | |
| # import multiprocessing as mp | |
| # from functools import partial | |
| # import time | |
| # First, ensure all the helper functions are defined at the module level | |
| # These are the functions called by scoring(): extract_skills, match_personality_traits, | |
| # extract_experience, and extract_education | |
| # def get_scores_optimized(df, job_description): | |
| # print('inside scores optimized..............') | |
| # start_time = time.time() | |
| # # Method 1: Use chunking with the original apply method | |
| # chunk_size = 32 | |
| # results = [] | |
| # for i in range(0, len(df), chunk_size): | |
| # chunk = df.iloc[i:i+chunk_size] | |
| # chunk_results = chunk['Resume'].apply(lambda x: scoring(x, job_description)) | |
| # results.extend(chunk_results.tolist()) | |
| # # Convert the list of dictionaries into a DataFrame and join with original | |
| # scores_df = pd.DataFrame(results, index=df.index) | |
| # end_time = time.time() | |
| # print(f"Processing took {end_time - start_time:.2f} seconds") | |
| # # Return the original dataframe with the new columns | |
| # return pd.concat([df, scores_df], axis=1) | |
| # current_time=datetime.now() | |
| # df=resumes_data() | |
| # exp=get_scores_optimized(df, job_details) | |
| # print (exp) | |
| # end_time=datetime.now() | |
| # print('total time:', end_time-current_time) | |