Spaces:

TryRaisins
/

cv_analyzer

Sleeping

App Files Files Community

TryRaisins commited on Aug 28, 2025

Commit

305c2a1

verified ·

1 Parent(s): 9284fb7

Upload 5 files

Browse files

Files changed (5) hide show

Dockerfile +20 -0
Procfile +1 -0
app.py +381 -0
requirements.txt +11 -0
runtime.txt +1 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+# Use lightweight Python base image
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Copy dependency list
+COPY requirements.txt .
+# Install dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy all project files
+COPY . .
+# Expose the port
+EXPOSE 7860
+# Start Flask with Gunicorn on port 7860 (Spaces expects this port)
+CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]

Procfile ADDED Viewed

	@@ -0,0 +1 @@


1	+ web: gunicorn app:app --bind 0.0.0.0:$PORT

app.py ADDED Viewed

	@@ -0,0 +1,381 @@

+from flask import Flask, request, jsonify, send_from_directory
+from flask_cors import CORS
+import os
+from werkzeug.utils import secure_filename
+import PyPDF2
+import docx
+import re
+import numpy as np
+from typing import List, Dict, Any
+import uuid
+import logging
+from logging.handlers import RotatingFileHandler
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+app = Flask(__name__)
+CORS(app)
+# Configuration
+UPLOAD_FOLDER = 'uploads'
+ALLOWED_EXTENSIONS = {'txt', 'pdf', 'doc', 'docx'}
+MAX_FILE_SIZE = 16 * 1024 * 1024  # 16MB
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE
+# Create upload directory if it doesn't exist
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)
+# Try to load AI models (optional)
+ai_models_loaded = False
+classifier = None
+try:
+    from transformers import pipeline
+    # Use a smaller, more efficient model
+    classifier = pipeline(
+        "zero-shot-classification",
+        model="facebook/bart-large-mnli",
+        # model="valhalla/distilbart-mnli-12-1",  # ✅ Lighter model than bart-large-mnli
+        device=-1,  # Use CPU
+        framework="pt"
+    )
+    ai_models_loaded = True
+    logger.info("AI models loaded successfully (using distilbart-mnli-12-1)")
+except ImportError:
+    logger.warning("Transformers not installed, using fallback methods")
+except Exception as e:
+    logger.error(f"Error loading AI models: {e}, using fallback")
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+def extract_text_from_file(file_path, filename):
+    """Extract text from various file types"""
+    text = ""
+    if filename.endswith('.pdf'):
+        try:
+            with open(file_path, 'rb') as f:
+                pdf_reader = PyPDF2.PdfReader(f)
+                for page in pdf_reader.pages:
+                    page_text = page.extract_text()
+                    if page_text:
+                        text += page_text + "\n"
+        except Exception as e:
+            logger.error(f"Error reading PDF: {e}")
+            raise Exception(f"Failed to extract text from PDF: {e}")
+    elif filename.endswith(('.doc', '.docx')):
+        try:
+            doc = docx.Document(file_path)
+            for paragraph in doc.paragraphs:
+                if paragraph.text:
+                    text += paragraph.text + "\n"
+        except Exception as e:
+            logger.error(f"Error reading DOCX: {e}")
+            raise Exception(f"Failed to extract text from DOCX: {e}")
+    elif filename.endswith('.txt'):
+        try:
+            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
+                text = f.read()
+        except Exception as e:
+            logger.error(f"Error reading TXT: {e}")
+            raise Exception(f"Failed to extract text from TXT: {e}")
+    if not text.strip():
+        raise Exception("No text could be extracted from the file")
+    # Clean up text
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def extract_skills(text):
+    """Extract skills from text using pattern matching"""
+    # Comprehensive skills list with improved matching
+    common_skills = [
+        'python', 'java', 'javascript', 'typescript', 'react', 'angular', 'vue',
+        'node.js', 'express', 'django', 'flask', 'spring', 'laravel', 'ruby',
+        'php', 'html', 'css', 'sass', 'less', 'bootstrap', 'tailwind',
+        'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'oracle',
+        'aws', 'azure', 'google cloud', 'gcp', 'docker', 'kubernetes',
+        'jenkins', 'git', 'github', 'gitlab', 'ci/cd', 'devops',
+        'machine learning', 'ml', 'ai', 'deep learning', 'tensorflow',
+        'pytorch', 'keras', 'scikit-learn', 'data analysis', 'pandas',
+        'numpy', 'r', 'tableau', 'power bi', 'excel',
+        'agile', 'scrum', 'kanban', 'project management',
+        'rest api', 'graphql', 'microservices', 'api development',
+        'c++', 'c#', 'net', 'swift', 'kotlin', 'go', 'rust'
+    ]
+    found_skills = set()
+    text_lower = text.lower()
+    # Use word boundaries for better matching
+    for skill in common_skills:
+        # Match whole words only to avoid false positives
+        if re.search(r'\b' + re.escape(skill) + r'\b', text_lower):
+            found_skills.add(skill.title())
+    return list(found_skills)
+def calculate_score(job_description, candidate_text, skills):
+    """Calculate relevance score using AI models or fallback methods"""
+    if classifier and ai_models_loaded:
+        try:
+            # Use AI model for scoring with better error handling
+            sequence_to_classify = candidate_text[:512]  # Limit text length for the model
+            # More specific labels for better classification
+            candidate_labels = [
+                "highly relevant candidate for the job",
+                "somewhat relevant candidate",
+                "irrelevant candidate for this position"
+            ]
+            result = classifier(sequence_to_classify, candidate_labels)
+            # Weight the scores (highest for most relevant)
+            relevance_score = (result['scores'][0] * 0.7 + result['scores'][1] * 0.3) * 100
+            # Skills matching with better approach
+            if skills:
+                skill_match_score = min(100, len(skills) * 5)  # Cap at 100
+            else:
+                skill_match_score = 30
+            # Combine scores (weighted average)
+            final_score = (relevance_score * 0.7) + (skill_match_score * 0.3)
+            return min(100, max(0, int(final_score)))
+        except Exception as e:
+            logger.error(f"Error in AI scoring: {e}, using fallback")
+    # Fallback scoring method
+    return calculate_fallback_score(job_description, candidate_text, skills)
+def calculate_fallback_score(job_description, candidate_text, skills):
+    """Fallback scoring method without AI"""
+    score = 40  # Lower base score
+    # Simple keyword matching with better approach
+    job_lower = job_description.lower()
+    candidate_lower = candidate_text.lower()
+    # Extract meaningful words (4+ characters)
+    job_words = set(re.findall(r'\b[a-z]{4,}\b', job_lower))
+    candidate_words = set(re.findall(r'\b[a-z]{4,}\b', candidate_lower))
+    # Remove common stop words
+    stop_words = {'with', 'this', 'that', 'have', 'from', 'they', 'which', 'were', 'their'}
+    job_words = job_words - stop_words
+    candidate_words = candidate_words - stop_words
+    common_words = job_words & candidate_words
+    if job_words:
+        keyword_match = len(common_words) / len(job_words) * 40  # Increased weight
+        score += min(40, keyword_match)
+    # Skills bonus
+    if skills:
+        score += min(20, len(skills) * 3)  # Increased bonus per skill
+    # Experience indicators with context
+    experience_indicators = [
+        'experience', 'years', 'worked', 'developed', 'created', 'built',
+        'managed', 'led', 'implemented', 'designed'
+    ]
+    for indicator in experience_indicators:
+        if re.search(r'\b' + indicator + r'\b', candidate_lower):
+            score += 2  # Increased points per indicator
+    return min(100, max(0, int(score)))
+def extract_candidate_info(text, filename):
+    """Extract candidate information from text with improved patterns"""
+    # Extract name with better pattern
+    name_patterns = [
+        r'(?:^|\n)[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)[\s]*(?:\n|$)',
+        r'Resume[\s\S]{0,500}?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)',
+        r'Name[:]?[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)'
+    ]
+    name = filename.split('.')[0]  # Default to filename
+    for pattern in name_patterns:
+        name_match = re.search(pattern, text, re.IGNORECASE)
+        if name_match:
+            name = name_match.group(1).strip()
+            break
+    # Extract email
+    email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
+    email = email_match.group(0) if email_match else "No email found"
+    # Improved phone regex for international numbers
+    phone_patterns = [
+        r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
+        r'(\+?\d{1,3}[-.\s]?)?\(?\d{2}\)?[-.\s]?\d{4}[-.\s]?\d{4}',
+        r'(\+?\d{1,3}[-.\s]?)?\(?\d{4}\)?[-.\s]?\d{3}[-.\s]?\d{3}'
+    ]
+    phone = "No phone found"
+    for pattern in phone_patterns:
+        phone_match = re.search(pattern, text)
+        if phone_match:
+            phone = phone_match.group(0)
+            break
+    return name, email, phone
+def analyze_candidate(job_description, candidate_text, filename):
+    """Analyze a single candidate"""
+    try:
+        skills = extract_skills(candidate_text)
+        score = calculate_score(job_description, candidate_text, skills)
+        name, email, phone = extract_candidate_info(candidate_text, filename)
+        return {
+            'id': str(uuid.uuid4()),
+            'name': name,
+            'email': email,
+            'phone': phone,
+            'skills': skills,
+            'score': score,
+            'text_preview': candidate_text[:200] + '...' if len(candidate_text) > 200 else candidate_text
+        }
+    except Exception as e:
+        logger.error(f"Error analyzing candidate: {e}")
+        return {
+            'id': str(uuid.uuid4()),
+            'name': filename.split('.')[0],
+            'email': "Error in extraction",
+            'phone': "Error in extraction",
+            'skills': [],
+            'score': 0,
+            'text_preview': "Error processing file",
+            'error': str(e)
+        }
+@app.route('/api/process-resumes', methods=['POST'])
+def process_resumes():
+    """Process uploaded resumes against job description"""
+    try:
+        # Check if files are present
+        if 'resumes' not in request.files:
+            return jsonify({'error': 'Missing resume files'}), 400
+        if 'jobDescription' not in request.files:
+            return jsonify({'error': 'Missing job description file'}), 400
+        job_desc_file = request.files['jobDescription']
+        resume_files = request.files.getlist('resumes')
+        # Validate job description file
+        if job_desc_file.filename == '':
+            return jsonify({'error': 'No job description file selected'}), 400
+        if not allowed_file(job_desc_file.filename):
+            return jsonify({'error': 'Invalid job description file type'}), 400
+        # Validate resume files
+        valid_resumes = []
+        for file in resume_files:
+            if file.filename != '' and allowed_file(file.filename):
+                valid_resumes.append(file)
+        if not valid_resumes:
+            return jsonify({'error': 'No valid resume files'}), 400
+        # Save and process job description
+        job_desc_filename = secure_filename(job_desc_file.filename)
+        job_desc_path = os.path.join(app.config['UPLOAD_FOLDER'], job_desc_filename)
+        job_desc_file.save(job_desc_path)
+        try:
+            job_description = extract_text_from_file(job_desc_path, job_desc_filename)
+        except Exception as e:
+            return jsonify({'error': f'Failed to process job description: {str(e)}'}), 400
+        # Process each resume
+        candidates = []
+        for resume_file in valid_resumes:
+            resume_filename = secure_filename(resume_file.filename)
+            resume_path = os.path.join(app.config['UPLOAD_FOLDER'], resume_filename)
+            resume_file.save(resume_path)
+            try:
+                # Extract text from resume
+                resume_text = extract_text_from_file(resume_path, resume_filename)
+                # Analyze candidate
+                candidate = analyze_candidate(job_description, resume_text, resume_filename)
+                candidates.append(candidate)
+            except Exception as e:
+                logger.error(f"Error processing {resume_filename}: {e}")
+                candidates.append({
+                    'id': str(uuid.uuid4()),
+                    'name': resume_filename.split('.')[0],
+                    'email': "Processing error",
+                    'phone': "Processing error",
+                    'skills': [],
+                    'score': 0,
+                    'text_preview': f"Error: {str(e)}",
+                    'error': str(e)
+                })
+            # Clean up resume file
+            try:
+                os.remove(resume_path)
+            except:
+                pass
+        # Clean up job description file
+        try:
+            os.remove(job_desc_path)
+        except:
+            pass
+        # Sort candidates by score
+        candidates.sort(key=lambda x: x['score'], reverse=True)
+        return jsonify({
+            'candidates': candidates,
+            'job_description': job_description[:500] + '...' if len(job_description) > 500 else job_description,
+            'total_processed': len(candidates),
+            'ai_used': ai_models_loaded
+        })
+    except Exception as e:
+        logger.error(f"Error processing resumes: {e}")
+        return jsonify({'error': 'Internal server error'}), 500
+@app.route('/api/health', methods=['GET'])
+def health_check():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'healthy',
+        'ai_models_loaded': ai_models_loaded,
+        'upload_folder_exists': os.path.exists(UPLOAD_FOLDER)
+    })
+@app.route('/')
+def index():
+    return jsonify({'message': 'Resume Analyzer API is running'})
+if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 10000))
+    app.run(host="0.0.0.0", port=port, debug=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+flask
+flask-cors
+werkzeug
+PyPDF2
+python-docx
+docx
+numpy
+torch
+transformers
+sentence-transformers
+gunicorn

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.9.13