Spaces:
Sleeping
Sleeping
| from pdfminer.high_level import extract_text | |
| import re | |
| from datetime import datetime | |
| class ResumeParser: | |
| def parse(self, resume_file): | |
| """Extracts text from a PDF resume and processes candidate information.""" | |
| text = extract_text(resume_file) | |
| return self.extract_candidate_info(text) | |
| def extract_candidate_info(self, text): | |
| """Extracts candidate details from the parsed resume text.""" | |
| return { | |
| "name": self.extract_name(text), | |
| "email": self.extract_email(text), | |
| "phone": self.extract_phone(text), | |
| "experience": self.extract_experience(text), | |
| "position": self.extract_position(text), | |
| "location": self.extract_location(text), | |
| "tech_stack": self.extract_tech_stack(text), | |
| } | |
| def extract_name(text): | |
| """Extracts the candidate's name from the first line or common patterns.""" | |
| # Split text into lines and take the first non-empty line | |
| lines = text.splitlines() | |
| for line in lines: | |
| line = line.strip() | |
| if line: # Ignore empty lines | |
| # Check for a valid name format (e.g., avoiding single words like "Resume") | |
| if len(line.split()) >= 2: # Name should have at least two words | |
| return line | |
| break | |
| return "Name not found" | |
| def extract_email(text): | |
| """Extracts the candidate's email address.""" | |
| match = re.search(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text) | |
| return match.group(0) if match else "Email not found" | |
| def extract_phone(text): | |
| """Extracts the candidate's phone number.""" | |
| match = re.search(r"\+?\d{10,13}", text) | |
| return match.group(0) if match else "Phone number not found" | |
| def extract_position(text): | |
| """Extracts the candidate's position (e.g., Job Title).""" | |
| match = re.search(r"(?i)experience(?:\:|\s+)([^\n]+)", text) | |
| return match.group(1).strip() if match else "Position not found" | |
| def extract_location(text): | |
| """Extracts the candidate's location.""" | |
| # Regex to match patterns like 'Location: Bengaluru, Karnataka' or standalone 'Bengaluru, Karnataka' | |
| match = re.search(r"(?i)location(?:\:|\s+)([^\n]+)|\b([A-Za-z\s]+,\s*[A-Za-z\s]+)\b", text) | |
| if match: | |
| # Group 1 matches 'Location: <value>' and Group 2 matches '<City>, <State>' | |
| location = match.group(1) or match.group(2) | |
| return location.strip() | |
| return "Location not found" | |
| def extract_tech_stack(text): | |
| """Extracts technical skills dynamically from the skills section.""" | |
| # Find the 'Skills' or 'Technical Skills' section in the text | |
| match = re.search(r"(?i)(skills|technical skills)(?:\:|\s+)([^\n]+)", text) | |
| if match: | |
| tech_line = match.group(2).strip() | |
| # Split the skills based on common delimiters (comma, semicolon, etc.) | |
| skills = re.split(r"[,\;\|]", tech_line) | |
| # Strip whitespace and return unique skills | |
| return [skill.strip() for skill in skills if skill.strip()] | |
| return ["No tech stack found"] | |
| def extract_experience(text): | |
| """Extracts and calculates the candidate's total experience based on date ranges.""" | |
| # Updated regex pattern to match abbreviated and full month names along with 'Present' | |
| date_pattern = r"(\b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)?|May|Jun(?:e)?|Jul(?:y)?|Aug(?:ust)?|Sep(?:tember)?|Oct(?:ober)?|Nov(?:ember)?|Dec(?:ember)?) \d{4})" | |
| regex = rf"{date_pattern}\s*-\s*({date_pattern}|Present)" | |
| matches = re.findall(regex, text, re.IGNORECASE) | |
| total_months = 0 | |
| for match in matches: | |
| start_date_str = match[0] | |
| end_date_str = match[1] | |
| start_date = ResumeParser.parse_date(start_date_str) | |
| end_date = datetime.now() if "Present" in end_date_str else ResumeParser.parse_date(end_date_str) | |
| if start_date and end_date: | |
| delta = (end_date.year - start_date.year) * 12 + (end_date.month - start_date.month) | |
| total_months += delta | |
| years = total_months // 12 | |
| months = total_months % 12 | |
| return f"{years} years, {months} months" if total_months > 0 else "Experience not found" | |
| def parse_date(date_str): | |
| """Parses a date string like 'January 2015' or 'Feb 2024' into a datetime object.""" | |
| try: | |
| return datetime.strptime(date_str, "%b %Y") # Abbreviated month | |
| except ValueError: | |
| try: | |
| return datetime.strptime(date_str, "%B %Y") # Full month | |
| except ValueError: | |
| return None | |