Spaces:
Sleeping
Sleeping
| import re | |
| class ResumeAnalyzer: | |
| def __init__(self): | |
| # Document type indicators | |
| self.document_types = { | |
| 'resume': [ | |
| 'experience', 'education', 'skills', 'work', 'project', 'objective', | |
| 'summary', 'employment', 'qualification', 'achievements' | |
| ], | |
| 'marksheet': [ | |
| 'grade', 'marks', 'score', 'semester', 'cgpa', 'sgpa', 'examination', | |
| 'result', 'academic year', 'percentage' | |
| ], | |
| 'certificate': [ | |
| 'certificate', 'certification', 'awarded', 'completed', 'achievement', | |
| 'training', 'course completion', 'qualified' | |
| ], | |
| 'id_card': [ | |
| 'id card', 'identity', 'student id', 'employee id', 'valid until', | |
| 'date of issue', 'identification' | |
| ] | |
| } | |
| def detect_document_type(self, text): | |
| text = text.lower() | |
| scores = {} | |
| # Calculate score for each document type | |
| for doc_type, keywords in self.document_types.items(): | |
| matches = sum(1 for keyword in keywords if keyword in text) | |
| density = matches / len(keywords) | |
| frequency = matches / (len(text.split()) + 1) # Add 1 to avoid division by zero | |
| scores[doc_type] = (density * 0.7) + (frequency * 0.3) | |
| # Get the highest scoring document type | |
| best_match = max(scores.items(), key=lambda x: x[1]) | |
| # Only return a document type if the score is significant | |
| return best_match[0] if best_match[1] > 0.15 else 'unknown' | |
| def calculate_keyword_match(self, resume_text, required_skills): | |
| resume_text = resume_text.lower() | |
| found_skills = [] | |
| missing_skills = [] | |
| for skill in required_skills: | |
| skill_lower = skill.lower() | |
| # Check for exact match | |
| if skill_lower in resume_text: | |
| found_skills.append(skill) | |
| # Check for partial matches (e.g., "Python" in "Python programming") | |
| elif any(skill_lower in phrase for phrase in resume_text.split('.')): | |
| found_skills.append(skill) | |
| else: | |
| missing_skills.append(skill) | |
| match_score = (len(found_skills) / len(required_skills)) * 100 if required_skills else 0 | |
| return { | |
| 'score': match_score, | |
| 'found_skills': found_skills, | |
| 'missing_skills': missing_skills | |
| } | |
| def check_resume_sections(self, text): | |
| text = text.lower() | |
| essential_sections = { | |
| 'contact': ['email', 'phone', 'address', 'linkedin'], | |
| 'education': ['education', 'university', 'college', 'degree', 'academic'], | |
| 'experience': ['experience', 'work', 'employment', 'job', 'internship'], | |
| 'skills': ['skills', 'technologies', 'tools', 'proficiencies', 'expertise'] | |
| } | |
| section_scores = {} | |
| for section, keywords in essential_sections.items(): | |
| found = sum(1 for keyword in keywords if keyword in text) | |
| section_scores[section] = min(25, (found / len(keywords)) * 25) | |
| return sum(section_scores.values()) | |
| def check_formatting(self, text): | |
| lines = text.split('\n') | |
| score = 100 | |
| deductions = [] | |
| # Check for minimum content | |
| if len(text) < 300: | |
| score -= 30 | |
| deductions.append("Resume is too short") | |
| # Check for section headers | |
| if not any(line.isupper() for line in lines): | |
| score -= 20 | |
| deductions.append("No clear section headers found") | |
| # Check for bullet points | |
| if not any(line.strip().startswith(('•', '-', '*', '→')) for line in lines): | |
| score -= 20 | |
| deductions.append("No bullet points found for listing details") | |
| # Check for consistent spacing | |
| if any(len(line.strip()) == 0 and len(next_line.strip()) == 0 | |
| for line, next_line in zip(lines[:-1], lines[1:])): | |
| score -= 15 | |
| deductions.append("Inconsistent spacing between sections") | |
| # Check for contact information format | |
| contact_patterns = [ | |
| r'\b[\w\.-]+@[\w\.-]+\.\w+\b', # email | |
| r'\b\d{3}[-.]?\d{3}[-.]?\d{4}\b', # phone | |
| r'linkedin\.com/\w+', # LinkedIn | |
| ] | |
| if not any(re.search(pattern, text) for pattern in contact_patterns): | |
| score -= 15 | |
| deductions.append("Missing or improperly formatted contact information") | |
| return max(0, score), deductions | |
| def extract_text_from_pdf(self, file): | |
| try: | |
| import PyPDF2 | |
| import io | |
| # Create a PDF reader object | |
| # First make sure we have the file content as bytes | |
| if hasattr(file, 'read'): | |
| # If it's already a file-like object, read it | |
| file_content = file.read() | |
| file.seek(0) # Reset file pointer | |
| else: | |
| # If it's already bytes | |
| file_content = file | |
| # Create BytesIO from bytes content | |
| pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_content)) | |
| # Extract text from all pages | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text | |
| except Exception as e: | |
| raise Exception(f"Error extracting text from PDF: {str(e)}") | |
| def extract_text_from_docx(self, docx_file): | |
| """Extract text from a DOCX file""" | |
| try: | |
| from docx import Document | |
| doc = Document(docx_file) | |
| full_text = [] | |
| for paragraph in doc.paragraphs: | |
| full_text.append(paragraph.text) | |
| return '\n'.join(full_text) | |
| except Exception as e: | |
| raise Exception(f"Error extracting text from DOCX file: {str(e)}") | |
| def extract_personal_info(self, text): | |
| """Extract personal information from resume text""" | |
| # Basic patterns for personal info | |
| email_pattern = r'[\w\.-]+@[\w\.-]+\.\w+' | |
| phone_pattern = r'(\+\d{1,3}[-.]?)?\s*\(?\d{3}\)?[-.]?\s*\d{3}[-.]?\s*\d{4}' | |
| linkedin_pattern = r'linkedin\.com/in/[\w-]+' | |
| github_pattern = r'github\.com/[\w-]+' | |
| # Extract information | |
| email = re.search(email_pattern, text) | |
| phone = re.search(phone_pattern, text) | |
| linkedin = re.search(linkedin_pattern, text) | |
| github = re.search(github_pattern, text) | |
| # Get the first line as name (basic assumption) | |
| name = text.split('\n')[0].strip() | |
| return { | |
| 'name': name if len(name) > 0 else 'Unknown', | |
| 'email': email.group(0) if email else '', | |
| 'phone': phone.group(0) if phone else '', | |
| 'linkedin': linkedin.group(0) if linkedin else '', | |
| 'github': github.group(0) if github else '', | |
| 'portfolio': '' # Can be enhanced later | |
| } | |
| def extract_education(self, text): | |
| """Extract education information from resume text""" | |
| education = [] | |
| lines = text.split('\n') | |
| education_keywords = [ | |
| 'education', 'academic', 'qualification', 'degree', 'university', 'college', | |
| 'school', 'institute', 'certification', 'diploma', 'bachelor', 'master', | |
| 'phd', 'b.tech', 'm.tech', 'b.e', 'm.e', 'b.sc', 'm.sc','bca', 'mca', 'b.com', | |
| 'm.com', 'b.cs-it', 'imca', 'bba', 'mba', 'honors', 'scholarship' | |
| ] | |
| in_education_section = False | |
| current_entry = [] | |
| for line in lines: | |
| line = line.strip() | |
| # Check for section header | |
| if any(keyword.lower() in line.lower() for keyword in education_keywords): | |
| if not any(keyword.lower() == line.lower() for keyword in education_keywords): | |
| # This line contains education info, not just a header | |
| current_entry.append(line) | |
| in_education_section = True | |
| continue | |
| if in_education_section: | |
| # Check if we've hit another section | |
| if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']): | |
| if not any(edu_key.lower() in line.lower() for edu_key in education_keywords): | |
| in_education_section = False | |
| if current_entry: | |
| education.append(' '.join(current_entry)) | |
| current_entry = [] | |
| continue | |
| if line: | |
| current_entry.append(line) | |
| elif current_entry: # Empty line and we have content | |
| education.append(' '.join(current_entry)) | |
| current_entry = [] | |
| if current_entry: | |
| education.append(' '.join(current_entry)) | |
| return education | |
| def extract_experience(self, text): | |
| """Extract work experience information from resume text""" | |
| experience = [] | |
| lines = text.split('\n') | |
| experience_keywords = [ | |
| 'experience', 'employment', 'work history', 'professional experience', | |
| 'work experience', 'career history', 'professional background', | |
| 'employment history', 'job history', 'positions held', 'experience', | |
| 'job title', 'job responsibilities', 'job description', 'job summary' | |
| ] | |
| in_experience_section = False | |
| current_entry = [] | |
| for line in lines: | |
| line = line.strip() | |
| # Check for section header | |
| if any(keyword.lower() in line.lower() for keyword in experience_keywords): | |
| if not any(keyword.lower() == line.lower() for keyword in experience_keywords): | |
| # This line contains experience info, not just a header | |
| current_entry.append(line) | |
| in_experience_section = True | |
| continue | |
| if in_experience_section: | |
| # Check if we've hit another section | |
| if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']): | |
| if not any(exp_key.lower() in line.lower() for exp_key in experience_keywords): | |
| in_experience_section = False | |
| if current_entry: | |
| experience.append(' '.join(current_entry)) | |
| current_entry = [] | |
| continue | |
| if line: | |
| current_entry.append(line) | |
| elif current_entry: # Empty line and we have content | |
| experience.append(' '.join(current_entry)) | |
| current_entry = [] | |
| if current_entry: | |
| experience.append(' '.join(current_entry)) | |
| return experience | |
| def extract_projects(self, text): | |
| """Extract project information from resume text""" | |
| projects = [] | |
| lines = text.split('\n') | |
| project_keywords = [ | |
| 'projects', 'personal projects', 'academic projects', 'key projects', | |
| 'major projects', 'professional projects', 'project experience', | |
| 'relevant projects', 'featured projects','latest projects', | |
| 'top projects' | |
| ] | |
| in_project_section = False | |
| current_entry = [] | |
| for line in lines: | |
| line = line.strip() | |
| # Check for section header | |
| if any(keyword.lower() in line.lower() for keyword in project_keywords): | |
| if not any(keyword.lower() == line.lower() for keyword in project_keywords): | |
| # This line contains project info, not just a header | |
| current_entry.append(line) | |
| in_project_section = True | |
| continue | |
| if in_project_section: | |
| # Check if we've hit another section | |
| if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']): | |
| if not any(proj_key.lower() in line.lower() for proj_key in project_keywords): | |
| in_project_section = False | |
| if current_entry: | |
| projects.append(' '.join(current_entry)) | |
| current_entry = [] | |
| continue | |
| if line: | |
| current_entry.append(line) | |
| elif current_entry: # Empty line and we have content | |
| projects.append(' '.join(current_entry)) | |
| current_entry = [] | |
| if current_entry: | |
| projects.append(' '.join(current_entry)) | |
| return projects | |
| def extract_skills(self, text): | |
| """Extract skills from resume text""" | |
| skills = set() # Use set to avoid duplicates | |
| lines = text.split('\n') | |
| skills_keywords = [ | |
| 'skills', 'technical skills', 'competencies', 'expertise', | |
| 'core competencies', 'professional skills', 'key skills', | |
| 'technical expertise', 'proficiencies', 'qualifications', | |
| 'top skills', 'key skill', 'major skill', 'personal skill', | |
| 'soft skills', 'soft skill', 'soft skillset' | |
| ] | |
| in_skills_section = False | |
| current_entry = [] | |
| # Common skill separators | |
| separators = [',', '•', '|', '/', '\\', '·', '>', '-', '–', '―'] | |
| for line in lines: | |
| line = line.strip() | |
| # Check for section header | |
| if any(keyword.lower() in line.lower() for keyword in skills_keywords): | |
| if not any(keyword.lower() == line.lower() for keyword in skills_keywords): | |
| # This line contains skills, not just a header | |
| current_entry.append(line) | |
| in_skills_section = True | |
| continue | |
| if in_skills_section: | |
| # Check if we've hit another section | |
| if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']): | |
| if not any(skill_key.lower() in line.lower() for skill_key in skills_keywords): | |
| in_skills_section = False | |
| if current_entry: | |
| # Process the current entry | |
| text_to_process = ' '.join(current_entry) | |
| # Split by common separators | |
| for separator in separators: | |
| if separator in text_to_process: | |
| skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip()) | |
| current_entry = [] | |
| continue | |
| if line: | |
| current_entry.append(line) | |
| elif current_entry: # Empty line and we have content | |
| # Process the current entry | |
| text_to_process = ' '.join(current_entry) | |
| # Split by common separators | |
| for separator in separators: | |
| if separator in text_to_process: | |
| skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip()) | |
| current_entry = [] | |
| if current_entry: | |
| # Process any remaining skills | |
| text_to_process = ' '.join(current_entry) | |
| for separator in separators: | |
| if separator in text_to_process: | |
| skills.update(skill.strip() for skill in text_to_process.split(separator) if skill.strip()) | |
| return list(skills) | |
| def extract_summary(self, text): | |
| """Extract summary/objective from resume text""" | |
| summary = [] | |
| lines = text.split('\n') | |
| summary_keywords = [ | |
| 'summary', 'professional summary', 'career summary', 'objective', | |
| 'career objective', 'professional objective', 'about me', 'profile', | |
| 'professional profile', 'career profile', 'overview', 'skill summary' | |
| ] | |
| in_summary_section = False | |
| current_entry = [] | |
| # Try to find summary at the beginning of the resume | |
| start_index = 0 | |
| while start_index < min(10, len(lines)) and not lines[start_index].strip(): | |
| start_index += 1 | |
| # Check first few non-empty lines for potential summary | |
| first_lines = [] | |
| lines_checked = 0 | |
| for line in lines[start_index:]: | |
| if line.strip(): | |
| first_lines.append(line.strip()) | |
| lines_checked += 1 | |
| if lines_checked >= 5: # Check first 5 non-empty lines | |
| break | |
| # If first few lines look like a summary (no special formatting, no contact info) | |
| if first_lines and not any(keyword in first_lines[0].lower() for keyword in summary_keywords): | |
| potential_summary = ' '.join(first_lines) | |
| if len(potential_summary.split()) > 10: # More than 10 words | |
| if not re.search(r'\b(?:email|phone|address|tel|mobile|linkedin)\b', potential_summary.lower()): | |
| summary.append(potential_summary) | |
| # Look for explicitly marked summary section | |
| for line in lines: | |
| line = line.strip() | |
| # Check for section header | |
| if any(keyword.lower() in line.lower() for keyword in summary_keywords): | |
| if not any(keyword.lower() == line.lower() for keyword in summary_keywords): | |
| # This line contains summary info, not just a header | |
| current_entry.append(line) | |
| in_summary_section = True | |
| continue | |
| if in_summary_section: | |
| # Check if we've hit another section | |
| if line and any(keyword.lower() in line.lower() for keyword in self.document_types['resume']): | |
| if not any(sum_key.lower() in line.lower() for sum_key in summary_keywords): | |
| in_summary_section = False | |
| if current_entry: | |
| summary.append(' '.join(current_entry)) | |
| current_entry = [] | |
| continue | |
| if line: | |
| current_entry.append(line) | |
| elif current_entry: # Empty line and we have content | |
| summary.append(' '.join(current_entry)) | |
| current_entry = [] | |
| if current_entry: | |
| summary.append(' '.join(current_entry)) | |
| return ' '.join(summary) if summary else '' | |
| def analyze_resume(self, resume_data, job_requirements): | |
| """Analyze resume and return scores and recommendations""" | |
| try: | |
| text = resume_data.get('raw_text', '') | |
| # Extract personal information | |
| personal_info = self.extract_personal_info(text) | |
| # First detect document type | |
| doc_type = self.detect_document_type(text) | |
| if doc_type != 'resume': | |
| return { | |
| 'ats_score': 0, | |
| 'document_type': doc_type, | |
| 'keyword_match': {'score': 0, 'found_skills': [], 'missing_skills': []}, | |
| 'section_score': 0, | |
| 'format_score': 0, | |
| 'suggestions': [f"This appears to be a {doc_type} document. Please upload a resume for ATS analysis."] | |
| } | |
| # Calculate keyword match | |
| required_skills = job_requirements.get('required_skills', []) | |
| keyword_match = self.calculate_keyword_match(text, required_skills) | |
| # Extract all resume sections | |
| education = self.extract_education(text) | |
| experience = self.extract_experience(text) | |
| projects = self.extract_projects(text) | |
| skills = list(self.extract_skills(text)) # Convert skills set to list | |
| summary = self.extract_summary(text) | |
| # Check resume sections | |
| section_score = self.check_resume_sections(text) | |
| # Check formatting | |
| format_score, format_deductions = self.check_formatting(text) | |
| # Generate section-specific suggestions | |
| contact_suggestions = [] | |
| if not personal_info.get('email'): | |
| contact_suggestions.append("Add your email address") | |
| if not personal_info.get('phone'): | |
| contact_suggestions.append("Add your phone number") | |
| if not personal_info.get('linkedin'): | |
| contact_suggestions.append("Add your LinkedIn profile URL") | |
| summary_suggestions = [] | |
| if not summary: | |
| summary_suggestions.append("Add a professional summary to highlight your key qualifications") | |
| elif len(summary.split()) < 30: | |
| summary_suggestions.append("Expand your professional summary to better highlight your experience and goals") | |
| elif len(summary.split()) > 100: | |
| summary_suggestions.append("Consider making your summary more concise (aim for 50-75 words)") | |
| skills_suggestions = [] | |
| if not skills: | |
| skills_suggestions.append("Add a dedicated skills section") | |
| if isinstance(skills, (list, set)) and len(list(skills)) < 5: | |
| skills_suggestions.append("List more relevant technical and soft skills") | |
| if keyword_match['score'] < 70: | |
| skills_suggestions.append("Add more skills that match the job requirements") | |
| experience_suggestions = [] | |
| if not experience: | |
| experience_suggestions.append("Add your work experience section") | |
| else: | |
| has_dates = any(re.search(r'\b(19|20)\d{2}\b', exp) for exp in experience) | |
| has_bullets = any(re.search(r'[•\-\*]', exp) for exp in experience) | |
| has_action_verbs = any(re.search(r'\b(developed|managed|created|implemented|designed|led|improved)\b', | |
| exp.lower()) for exp in experience) | |
| if not has_dates: | |
| experience_suggestions.append("Include dates for each work experience") | |
| if not has_bullets: | |
| experience_suggestions.append("Use bullet points to list your achievements and responsibilities") | |
| if not has_action_verbs: | |
| experience_suggestions.append("Start bullet points with strong action verbs") | |
| education_suggestions = [] | |
| if not education: | |
| education_suggestions.append("Add your educational background") | |
| else: | |
| has_dates = any(re.search(r'\b(19|20)\d{2}\b', edu) for edu in education) | |
| has_degree = any(re.search(r'\b(bachelor|master|phd|b\.|m\.|diploma)\b', | |
| edu.lower()) for edu in education) | |
| has_gpa = any(re.search(r'\b(gpa|cgpa|grade|percentage)\b', | |
| edu.lower()) for edu in education) | |
| if not has_dates: | |
| education_suggestions.append("Include graduation dates") | |
| if not has_degree: | |
| education_suggestions.append("Specify your degree type") | |
| if not has_gpa and job_requirements.get('require_gpa', False): | |
| education_suggestions.append("Include your GPA if it's above 3.0") | |
| format_suggestions = [] | |
| if format_score < 100: | |
| format_suggestions.extend(format_deductions) | |
| # Calculate section-specific scores | |
| contact_score = 100 - (len(contact_suggestions) * 25) # -25 for each missing item | |
| summary_score = 100 - (len(summary_suggestions) * 33) # -33 for each issue | |
| skills_score = keyword_match['score'] | |
| experience_score = 100 - (len(experience_suggestions) * 25) | |
| education_score = 100 - (len(education_suggestions) * 25) | |
| # Calculate overall ATS score with weighted components | |
| ats_score = ( | |
| int(round(contact_score * 0.1)) + # 10% weight for contact info | |
| int(round(summary_score * 0.1)) + # 10% weight for summary | |
| int(round(skills_score * 0.3)) + # 30% weight for skills match | |
| int(round(experience_score * 0.2)) + # 20% weight for experience | |
| int(round(education_score * 0.1)) + # 10% weight for education | |
| int(round(format_score * 0.2)) # 20% weight for formatting | |
| ) | |
| # Combine all suggestions into a single list | |
| suggestions = [] | |
| suggestions.extend(contact_suggestions) | |
| suggestions.extend(summary_suggestions) | |
| suggestions.extend(skills_suggestions) | |
| suggestions.extend(experience_suggestions) | |
| suggestions.extend(education_suggestions) | |
| suggestions.extend(format_suggestions) | |
| if not suggestions: | |
| suggestions.append("Your resume is well-optimized for ATS systems") | |
| # Return final structured result | |
| return { | |
| **personal_info, # Include extracted personal info | |
| 'ats_score': ats_score, | |
| 'document_type': 'resume', | |
| 'keyword_match': keyword_match, | |
| 'section_score': section_score, | |
| 'format_score': format_score, | |
| 'education': education, | |
| 'experience': experience, | |
| 'projects': projects, | |
| 'skills': skills, | |
| 'summary': summary, | |
| 'suggestions': suggestions, | |
| 'contact_suggestions': contact_suggestions, | |
| 'summary_suggestions': summary_suggestions, | |
| 'skills_suggestions': skills_suggestions, | |
| 'experience_suggestions': experience_suggestions, | |
| 'education_suggestions': education_suggestions, | |
| 'format_suggestions': format_suggestions, | |
| 'section_scores': { | |
| 'contact': contact_score, | |
| 'summary': summary_score, | |
| 'skills': skills_score, | |
| 'experience': experience_score, | |
| 'education': education_score, | |
| 'format': format_score | |
| } | |
| } | |
| except Exception as e: | |
| import traceback | |
| print(f"Error analyzing resume: {str(e)}") | |
| print(traceback.format_exc()) | |
| # Return a default error response | |
| return { | |
| 'error': f"Resume analysis failed: {str(e)}", | |
| 'ats_score': 0, | |
| 'document_type': 'unknown', | |
| 'keyword_match': {'score': 0, 'found_skills': [], 'missing_skills': []}, | |
| 'section_score': 0, | |
| 'format_score': 0, | |
| 'suggestions': [f"Error analyzing resume: {str(e)}. Please check your file and try again."] | |
| } |