Spaces:
Sleeping
Sleeping
| from flask import Flask, request, jsonify, send_from_directory | |
| from flask_cors import CORS | |
| import os | |
| from werkzeug.utils import secure_filename | |
| import PyPDF2 | |
| import docx | |
| import re | |
| import numpy as np | |
| from typing import List, Dict, Any | |
| import uuid | |
| import logging | |
| from logging.handlers import RotatingFileHandler | |
| # Set up logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__) | |
| CORS(app) | |
| # Configuration | |
| UPLOAD_FOLDER = os.path.join("/tmp", "uploads") | |
| ALLOWED_EXTENSIONS = {'txt', 'pdf', 'doc', 'docx'} | |
| MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB | |
| os.environ["HF_HOME"] = "/tmp/hf_home" # writable in Hugging Face Spaces | |
| app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER | |
| app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE | |
| # Create upload directory if it doesn't exist | |
| os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
| # Try to load AI models (optional) | |
| ai_models_loaded = False | |
| classifier = None | |
| try: | |
| from transformers import pipeline | |
| # Use a smaller, more efficient model | |
| classifier = pipeline( | |
| "zero-shot-classification", | |
| # model="facebook/bart-large-mnli", | |
| model="valhalla/distilbart-mnli-12-1", # ✅ Lighter model than bart-large-mnli | |
| device=-1, # Use CPU | |
| framework="pt" | |
| ) | |
| ai_models_loaded = True | |
| logger.info("AI models loaded successfully (using distilbart-mnli-12-1)") | |
| except ImportError: | |
| logger.warning("Transformers not installed, using fallback methods") | |
| except Exception as e: | |
| logger.error(f"Error loading AI models: {e}, using fallback") | |
| def allowed_file(filename): | |
| return '.' in filename and \ | |
| filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS | |
| def extract_text_from_file(file_path, filename): | |
| """Extract text from various file types""" | |
| text = "" | |
| if filename.endswith('.pdf'): | |
| try: | |
| with open(file_path, 'rb') as f: | |
| pdf_reader = PyPDF2.PdfReader(f) | |
| for page in pdf_reader.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| except Exception as e: | |
| logger.error(f"Error reading PDF: {e}") | |
| raise Exception(f"Failed to extract text from PDF: {e}") | |
| elif filename.endswith(('.doc', '.docx')): | |
| try: | |
| doc = docx.Document(file_path) | |
| for paragraph in doc.paragraphs: | |
| if paragraph.text: | |
| text += paragraph.text + "\n" | |
| except Exception as e: | |
| logger.error(f"Error reading DOCX: {e}") | |
| raise Exception(f"Failed to extract text from DOCX: {e}") | |
| elif filename.endswith('.txt'): | |
| try: | |
| with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: | |
| text = f.read() | |
| except Exception as e: | |
| logger.error(f"Error reading TXT: {e}") | |
| raise Exception(f"Failed to extract text from TXT: {e}") | |
| if not text.strip(): | |
| raise Exception("No text could be extracted from the file") | |
| # Clean up text | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| return text | |
| def extract_skills(text): | |
| """Extract skills from text using pattern matching""" | |
| # Comprehensive skills list with improved matching | |
| common_skills = [ | |
| 'python', 'java', 'javascript', 'typescript', 'react', 'angular', 'vue', | |
| 'node.js', 'express', 'django', 'flask', 'spring', 'laravel', 'ruby', | |
| 'php', 'html', 'css', 'sass', 'less', 'bootstrap', 'tailwind', | |
| 'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'oracle', | |
| 'aws', 'azure', 'google cloud', 'gcp', 'docker', 'kubernetes', | |
| 'jenkins', 'git', 'github', 'gitlab', 'ci/cd', 'devops', | |
| 'machine learning', 'ml', 'ai', 'deep learning', 'tensorflow', | |
| 'pytorch', 'keras', 'scikit-learn', 'data analysis', 'pandas', | |
| 'numpy', 'r', 'tableau', 'power bi', 'excel', | |
| 'agile', 'scrum', 'kanban', 'project management', | |
| 'rest api', 'graphql', 'microservices', 'api development', | |
| 'c++', 'c#', 'net', 'swift', 'kotlin', 'go', 'rust' | |
| ] | |
| found_skills = set() | |
| text_lower = text.lower() | |
| # Use word boundaries for better matching | |
| for skill in common_skills: | |
| # Match whole words only to avoid false positives | |
| if re.search(r'\b' + re.escape(skill) + r'\b', text_lower): | |
| found_skills.add(skill.title()) | |
| return list(found_skills) | |
| def calculate_score(job_description, candidate_text, skills): | |
| """Calculate relevance score using AI models or fallback methods""" | |
| if classifier and ai_models_loaded: | |
| try: | |
| # Use AI model for scoring with better error handling | |
| sequence_to_classify = candidate_text[:512] # Limit text length for the model | |
| # More specific labels for better classification | |
| candidate_labels = [ | |
| "highly relevant candidate for the job", | |
| "somewhat relevant candidate", | |
| "irrelevant candidate for this position" | |
| ] | |
| result = classifier(sequence_to_classify, candidate_labels) | |
| # Weight the scores (highest for most relevant) | |
| relevance_score = (result['scores'][0] * 0.7 + result['scores'][1] * 0.3) * 100 | |
| # Skills matching with better approach | |
| if skills: | |
| skill_match_score = min(100, len(skills) * 5) # Cap at 100 | |
| else: | |
| skill_match_score = 30 | |
| # Combine scores (weighted average) | |
| final_score = (relevance_score * 0.7) + (skill_match_score * 0.3) | |
| return min(100, max(0, int(final_score))) | |
| except Exception as e: | |
| logger.error(f"Error in AI scoring: {e}, using fallback") | |
| # Fallback scoring method | |
| return calculate_fallback_score(job_description, candidate_text, skills) | |
| def calculate_fallback_score(job_description, candidate_text, skills): | |
| """Fallback scoring method without AI""" | |
| score = 40 # Lower base score | |
| # Simple keyword matching with better approach | |
| job_lower = job_description.lower() | |
| candidate_lower = candidate_text.lower() | |
| # Extract meaningful words (4+ characters) | |
| job_words = set(re.findall(r'\b[a-z]{4,}\b', job_lower)) | |
| candidate_words = set(re.findall(r'\b[a-z]{4,}\b', candidate_lower)) | |
| # Remove common stop words | |
| stop_words = {'with', 'this', 'that', 'have', 'from', 'they', 'which', 'were', 'their'} | |
| job_words = job_words - stop_words | |
| candidate_words = candidate_words - stop_words | |
| common_words = job_words & candidate_words | |
| if job_words: | |
| keyword_match = len(common_words) / len(job_words) * 40 # Increased weight | |
| score += min(40, keyword_match) | |
| # Skills bonus | |
| if skills: | |
| score += min(20, len(skills) * 3) # Increased bonus per skill | |
| # Experience indicators with context | |
| experience_indicators = [ | |
| 'experience', 'years', 'worked', 'developed', 'created', 'built', | |
| 'managed', 'led', 'implemented', 'designed' | |
| ] | |
| for indicator in experience_indicators: | |
| if re.search(r'\b' + indicator + r'\b', candidate_lower): | |
| score += 2 # Increased points per indicator | |
| return min(100, max(0, int(score))) | |
| def extract_candidate_info(text, filename): | |
| """Extract candidate information from text with improved patterns""" | |
| # Extract name with better pattern | |
| name_patterns = [ | |
| r'(?:^|\n)[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)[\s]*(?:\n|$)', | |
| r'Resume[\s\S]{0,500}?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)', | |
| r'Name[:]?[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)' | |
| ] | |
| name = filename.split('.')[0] # Default to filename | |
| for pattern in name_patterns: | |
| name_match = re.search(pattern, text, re.IGNORECASE) | |
| if name_match: | |
| name = name_match.group(1).strip() | |
| break | |
| # Extract email | |
| email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text) | |
| email = email_match.group(0) if email_match else "No email found" | |
| # Improved phone regex for international numbers | |
| phone_patterns = [ | |
| r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}', | |
| r'(\+?\d{1,3}[-.\s]?)?\(?\d{2}\)?[-.\s]?\d{4}[-.\s]?\d{4}', | |
| r'(\+?\d{1,3}[-.\s]?)?\(?\d{4}\)?[-.\s]?\d{3}[-.\s]?\d{3}' | |
| ] | |
| phone = "No phone found" | |
| for pattern in phone_patterns: | |
| phone_match = re.search(pattern, text) | |
| if phone_match: | |
| phone = phone_match.group(0) | |
| break | |
| return name, email, phone | |
| def analyze_candidate(job_description, candidate_text, filename): | |
| """Analyze a single candidate""" | |
| try: | |
| skills = extract_skills(candidate_text) | |
| score = calculate_score(job_description, candidate_text, skills) | |
| name, email, phone = extract_candidate_info(candidate_text, filename) | |
| return { | |
| 'id': str(uuid.uuid4()), | |
| 'name': name, | |
| 'email': email, | |
| 'phone': phone, | |
| 'skills': skills, | |
| 'score': score, | |
| 'text_preview': candidate_text[:200] + '...' if len(candidate_text) > 200 else candidate_text | |
| } | |
| except Exception as e: | |
| logger.error(f"Error analyzing candidate: {e}") | |
| return { | |
| 'id': str(uuid.uuid4()), | |
| 'name': filename.split('.')[0], | |
| 'email': "Error in extraction", | |
| 'phone': "Error in extraction", | |
| 'skills': [], | |
| 'score': 0, | |
| 'text_preview': "Error processing file", | |
| 'error': str(e) | |
| } | |
| def process_resumes(): | |
| """Process uploaded resumes against job description""" | |
| try: | |
| # Check if files are present | |
| if 'resumes' not in request.files: | |
| return jsonify({'error': 'Missing resume files'}), 400 | |
| if 'jobDescription' not in request.files: | |
| return jsonify({'error': 'Missing job description file'}), 400 | |
| job_desc_file = request.files['jobDescription'] | |
| resume_files = request.files.getlist('resumes') | |
| # Validate job description file | |
| if job_desc_file.filename == '': | |
| return jsonify({'error': 'No job description file selected'}), 400 | |
| if not allowed_file(job_desc_file.filename): | |
| return jsonify({'error': 'Invalid job description file type'}), 400 | |
| # Validate resume files | |
| valid_resumes = [] | |
| for file in resume_files: | |
| if file.filename != '' and allowed_file(file.filename): | |
| valid_resumes.append(file) | |
| if not valid_resumes: | |
| return jsonify({'error': 'No valid resume files'}), 400 | |
| # Save and process job description | |
| job_desc_filename = secure_filename(job_desc_file.filename) | |
| job_desc_path = os.path.join(app.config['UPLOAD_FOLDER'], job_desc_filename) | |
| job_desc_file.save(job_desc_path) | |
| try: | |
| job_description = extract_text_from_file(job_desc_path, job_desc_filename) | |
| except Exception as e: | |
| return jsonify({'error': f'Failed to process job description: {str(e)}'}), 400 | |
| # Process each resume | |
| candidates = [] | |
| for resume_file in valid_resumes: | |
| resume_filename = secure_filename(resume_file.filename) | |
| resume_path = os.path.join(app.config['UPLOAD_FOLDER'], resume_filename) | |
| resume_file.save(resume_path) | |
| try: | |
| # Extract text from resume | |
| resume_text = extract_text_from_file(resume_path, resume_filename) | |
| # Analyze candidate | |
| candidate = analyze_candidate(job_description, resume_text, resume_filename) | |
| candidates.append(candidate) | |
| except Exception as e: | |
| logger.error(f"Error processing {resume_filename}: {e}") | |
| candidates.append({ | |
| 'id': str(uuid.uuid4()), | |
| 'name': resume_filename.split('.')[0], | |
| 'email': "Processing error", | |
| 'phone': "Processing error", | |
| 'skills': [], | |
| 'score': 0, | |
| 'text_preview': f"Error: {str(e)}", | |
| 'error': str(e) | |
| }) | |
| # Clean up resume file | |
| try: | |
| os.remove(resume_path) | |
| except: | |
| pass | |
| # Clean up job description file | |
| try: | |
| os.remove(job_desc_path) | |
| except: | |
| pass | |
| # Sort candidates by score | |
| candidates.sort(key=lambda x: x['score'], reverse=True) | |
| return jsonify({ | |
| 'candidates': candidates, | |
| 'job_description': job_description[:500] + '...' if len(job_description) > 500 else job_description, | |
| 'total_processed': len(candidates), | |
| 'ai_used': ai_models_loaded | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error processing resumes: {e}") | |
| return jsonify({'error': 'Internal server error'}), 500 | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| 'status': 'healthy', | |
| 'ai_models_loaded': ai_models_loaded, | |
| 'upload_folder_exists': os.path.exists(UPLOAD_FOLDER) | |
| }) | |
| def index(): | |
| return jsonify({'message': 'Resume Analyzer API is running'}) | |
| if __name__ == "__main__": | |
| port = int(os.environ.get("PORT", 10000)) | |
| app.run(host="0.0.0.0", port=port, debug=False) | |