TryRaisins commited on
Commit
305c2a1
·
verified ·
1 Parent(s): 9284fb7

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +20 -0
  2. Procfile +1 -0
  3. app.py +381 -0
  4. requirements.txt +11 -0
  5. runtime.txt +1 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use lightweight Python base image
2
+ FROM python:3.10-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy dependency list
8
+ COPY requirements.txt .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy all project files
14
+ COPY . .
15
+
16
+ # Expose the port
17
+ EXPOSE 7860
18
+
19
+ # Start Flask with Gunicorn on port 7860 (Spaces expects this port)
20
+ CMD ["gunicorn", "-b", "0.0.0.0:7860", "app:app"]
Procfile ADDED
@@ -0,0 +1 @@
 
 
1
+ web: gunicorn app:app --bind 0.0.0.0:$PORT
app.py ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, send_from_directory
2
+ from flask_cors import CORS
3
+ import os
4
+ from werkzeug.utils import secure_filename
5
+ import PyPDF2
6
+ import docx
7
+ import re
8
+ import numpy as np
9
+ from typing import List, Dict, Any
10
+ import uuid
11
+ import logging
12
+ from logging.handlers import RotatingFileHandler
13
+
14
+ # Set up logging
15
+ logging.basicConfig(level=logging.INFO)
16
+ logger = logging.getLogger(__name__)
17
+
18
+ app = Flask(__name__)
19
+ CORS(app)
20
+
21
+ # Configuration
22
+ UPLOAD_FOLDER = 'uploads'
23
+ ALLOWED_EXTENSIONS = {'txt', 'pdf', 'doc', 'docx'}
24
+ MAX_FILE_SIZE = 16 * 1024 * 1024 # 16MB
25
+
26
+ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
27
+ app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE
28
+
29
+ # Create upload directory if it doesn't exist
30
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
31
+
32
+
33
+ # Try to load AI models (optional)
34
+ ai_models_loaded = False
35
+ classifier = None
36
+
37
+ try:
38
+
39
+ from transformers import pipeline
40
+ # Use a smaller, more efficient model
41
+ classifier = pipeline(
42
+ "zero-shot-classification",
43
+
44
+ model="facebook/bart-large-mnli",
45
+
46
+ # model="valhalla/distilbart-mnli-12-1", # ✅ Lighter model than bart-large-mnli
47
+
48
+ device=-1, # Use CPU
49
+ framework="pt"
50
+ )
51
+ ai_models_loaded = True
52
+ logger.info("AI models loaded successfully (using distilbart-mnli-12-1)")
53
+
54
+ except ImportError:
55
+ logger.warning("Transformers not installed, using fallback methods")
56
+ except Exception as e:
57
+ logger.error(f"Error loading AI models: {e}, using fallback")
58
+
59
+
60
+ def allowed_file(filename):
61
+ return '.' in filename and \
62
+ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
63
+
64
+ def extract_text_from_file(file_path, filename):
65
+ """Extract text from various file types"""
66
+ text = ""
67
+
68
+ if filename.endswith('.pdf'):
69
+ try:
70
+ with open(file_path, 'rb') as f:
71
+ pdf_reader = PyPDF2.PdfReader(f)
72
+ for page in pdf_reader.pages:
73
+ page_text = page.extract_text()
74
+ if page_text:
75
+ text += page_text + "\n"
76
+ except Exception as e:
77
+ logger.error(f"Error reading PDF: {e}")
78
+ raise Exception(f"Failed to extract text from PDF: {e}")
79
+
80
+ elif filename.endswith(('.doc', '.docx')):
81
+ try:
82
+ doc = docx.Document(file_path)
83
+ for paragraph in doc.paragraphs:
84
+ if paragraph.text:
85
+ text += paragraph.text + "\n"
86
+ except Exception as e:
87
+ logger.error(f"Error reading DOCX: {e}")
88
+ raise Exception(f"Failed to extract text from DOCX: {e}")
89
+
90
+ elif filename.endswith('.txt'):
91
+ try:
92
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
93
+ text = f.read()
94
+ except Exception as e:
95
+ logger.error(f"Error reading TXT: {e}")
96
+ raise Exception(f"Failed to extract text from TXT: {e}")
97
+
98
+ if not text.strip():
99
+ raise Exception("No text could be extracted from the file")
100
+
101
+ # Clean up text
102
+ text = re.sub(r'\s+', ' ', text).strip()
103
+ return text
104
+
105
+ def extract_skills(text):
106
+ """Extract skills from text using pattern matching"""
107
+ # Comprehensive skills list with improved matching
108
+ common_skills = [
109
+ 'python', 'java', 'javascript', 'typescript', 'react', 'angular', 'vue',
110
+ 'node.js', 'express', 'django', 'flask', 'spring', 'laravel', 'ruby',
111
+ 'php', 'html', 'css', 'sass', 'less', 'bootstrap', 'tailwind',
112
+ 'sql', 'mysql', 'postgresql', 'mongodb', 'redis', 'oracle',
113
+ 'aws', 'azure', 'google cloud', 'gcp', 'docker', 'kubernetes',
114
+ 'jenkins', 'git', 'github', 'gitlab', 'ci/cd', 'devops',
115
+ 'machine learning', 'ml', 'ai', 'deep learning', 'tensorflow',
116
+ 'pytorch', 'keras', 'scikit-learn', 'data analysis', 'pandas',
117
+ 'numpy', 'r', 'tableau', 'power bi', 'excel',
118
+ 'agile', 'scrum', 'kanban', 'project management',
119
+ 'rest api', 'graphql', 'microservices', 'api development',
120
+ 'c++', 'c#', 'net', 'swift', 'kotlin', 'go', 'rust'
121
+ ]
122
+
123
+ found_skills = set()
124
+ text_lower = text.lower()
125
+
126
+ # Use word boundaries for better matching
127
+ for skill in common_skills:
128
+ # Match whole words only to avoid false positives
129
+ if re.search(r'\b' + re.escape(skill) + r'\b', text_lower):
130
+ found_skills.add(skill.title())
131
+
132
+ return list(found_skills)
133
+
134
+ def calculate_score(job_description, candidate_text, skills):
135
+ """Calculate relevance score using AI models or fallback methods"""
136
+ if classifier and ai_models_loaded:
137
+ try:
138
+ # Use AI model for scoring with better error handling
139
+ sequence_to_classify = candidate_text[:512] # Limit text length for the model
140
+
141
+ # More specific labels for better classification
142
+ candidate_labels = [
143
+ "highly relevant candidate for the job",
144
+ "somewhat relevant candidate",
145
+ "irrelevant candidate for this position"
146
+ ]
147
+
148
+ result = classifier(sequence_to_classify, candidate_labels)
149
+ # Weight the scores (highest for most relevant)
150
+ relevance_score = (result['scores'][0] * 0.7 + result['scores'][1] * 0.3) * 100
151
+
152
+ # Skills matching with better approach
153
+ if skills:
154
+ skill_match_score = min(100, len(skills) * 5) # Cap at 100
155
+ else:
156
+ skill_match_score = 30
157
+
158
+ # Combine scores (weighted average)
159
+ final_score = (relevance_score * 0.7) + (skill_match_score * 0.3)
160
+
161
+ return min(100, max(0, int(final_score)))
162
+
163
+ except Exception as e:
164
+ logger.error(f"Error in AI scoring: {e}, using fallback")
165
+
166
+ # Fallback scoring method
167
+ return calculate_fallback_score(job_description, candidate_text, skills)
168
+
169
+ def calculate_fallback_score(job_description, candidate_text, skills):
170
+ """Fallback scoring method without AI"""
171
+ score = 40 # Lower base score
172
+
173
+ # Simple keyword matching with better approach
174
+ job_lower = job_description.lower()
175
+ candidate_lower = candidate_text.lower()
176
+
177
+ # Extract meaningful words (4+ characters)
178
+ job_words = set(re.findall(r'\b[a-z]{4,}\b', job_lower))
179
+ candidate_words = set(re.findall(r'\b[a-z]{4,}\b', candidate_lower))
180
+
181
+ # Remove common stop words
182
+ stop_words = {'with', 'this', 'that', 'have', 'from', 'they', 'which', 'were', 'their'}
183
+ job_words = job_words - stop_words
184
+ candidate_words = candidate_words - stop_words
185
+
186
+ common_words = job_words & candidate_words
187
+ if job_words:
188
+ keyword_match = len(common_words) / len(job_words) * 40 # Increased weight
189
+ score += min(40, keyword_match)
190
+
191
+ # Skills bonus
192
+ if skills:
193
+ score += min(20, len(skills) * 3) # Increased bonus per skill
194
+
195
+ # Experience indicators with context
196
+ experience_indicators = [
197
+ 'experience', 'years', 'worked', 'developed', 'created', 'built',
198
+ 'managed', 'led', 'implemented', 'designed'
199
+ ]
200
+ for indicator in experience_indicators:
201
+ if re.search(r'\b' + indicator + r'\b', candidate_lower):
202
+ score += 2 # Increased points per indicator
203
+
204
+ return min(100, max(0, int(score)))
205
+
206
+ def extract_candidate_info(text, filename):
207
+ """Extract candidate information from text with improved patterns"""
208
+ # Extract name with better pattern
209
+ name_patterns = [
210
+ r'(?:^|\n)[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)[\s]*(?:\n|$)',
211
+ r'Resume[\s\S]{0,500}?([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)',
212
+ r'Name[:]?[\s]*([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)'
213
+ ]
214
+
215
+ name = filename.split('.')[0] # Default to filename
216
+
217
+ for pattern in name_patterns:
218
+ name_match = re.search(pattern, text, re.IGNORECASE)
219
+ if name_match:
220
+ name = name_match.group(1).strip()
221
+ break
222
+
223
+ # Extract email
224
+ email_match = re.search(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', text)
225
+ email = email_match.group(0) if email_match else "No email found"
226
+
227
+ # Improved phone regex for international numbers
228
+ phone_patterns = [
229
+ r'(\+?\d{1,3}[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}',
230
+ r'(\+?\d{1,3}[-.\s]?)?\(?\d{2}\)?[-.\s]?\d{4}[-.\s]?\d{4}',
231
+ r'(\+?\d{1,3}[-.\s]?)?\(?\d{4}\)?[-.\s]?\d{3}[-.\s]?\d{3}'
232
+ ]
233
+
234
+ phone = "No phone found"
235
+ for pattern in phone_patterns:
236
+ phone_match = re.search(pattern, text)
237
+ if phone_match:
238
+ phone = phone_match.group(0)
239
+ break
240
+
241
+ return name, email, phone
242
+
243
+ def analyze_candidate(job_description, candidate_text, filename):
244
+ """Analyze a single candidate"""
245
+ try:
246
+ skills = extract_skills(candidate_text)
247
+ score = calculate_score(job_description, candidate_text, skills)
248
+ name, email, phone = extract_candidate_info(candidate_text, filename)
249
+
250
+ return {
251
+ 'id': str(uuid.uuid4()),
252
+ 'name': name,
253
+ 'email': email,
254
+ 'phone': phone,
255
+ 'skills': skills,
256
+ 'score': score,
257
+ 'text_preview': candidate_text[:200] + '...' if len(candidate_text) > 200 else candidate_text
258
+ }
259
+ except Exception as e:
260
+ logger.error(f"Error analyzing candidate: {e}")
261
+ return {
262
+ 'id': str(uuid.uuid4()),
263
+ 'name': filename.split('.')[0],
264
+ 'email': "Error in extraction",
265
+ 'phone': "Error in extraction",
266
+ 'skills': [],
267
+ 'score': 0,
268
+ 'text_preview': "Error processing file",
269
+ 'error': str(e)
270
+ }
271
+
272
+ @app.route('/api/process-resumes', methods=['POST'])
273
+ def process_resumes():
274
+ """Process uploaded resumes against job description"""
275
+ try:
276
+ # Check if files are present
277
+ if 'resumes' not in request.files:
278
+ return jsonify({'error': 'Missing resume files'}), 400
279
+
280
+ if 'jobDescription' not in request.files:
281
+ return jsonify({'error': 'Missing job description file'}), 400
282
+
283
+ job_desc_file = request.files['jobDescription']
284
+ resume_files = request.files.getlist('resumes')
285
+
286
+ # Validate job description file
287
+ if job_desc_file.filename == '':
288
+ return jsonify({'error': 'No job description file selected'}), 400
289
+
290
+ if not allowed_file(job_desc_file.filename):
291
+ return jsonify({'error': 'Invalid job description file type'}), 400
292
+
293
+ # Validate resume files
294
+ valid_resumes = []
295
+ for file in resume_files:
296
+ if file.filename != '' and allowed_file(file.filename):
297
+ valid_resumes.append(file)
298
+
299
+ if not valid_resumes:
300
+ return jsonify({'error': 'No valid resume files'}), 400
301
+
302
+ # Save and process job description
303
+ job_desc_filename = secure_filename(job_desc_file.filename)
304
+ job_desc_path = os.path.join(app.config['UPLOAD_FOLDER'], job_desc_filename)
305
+ job_desc_file.save(job_desc_path)
306
+
307
+ try:
308
+ job_description = extract_text_from_file(job_desc_path, job_desc_filename)
309
+ except Exception as e:
310
+ return jsonify({'error': f'Failed to process job description: {str(e)}'}), 400
311
+
312
+ # Process each resume
313
+ candidates = []
314
+ for resume_file in valid_resumes:
315
+ resume_filename = secure_filename(resume_file.filename)
316
+ resume_path = os.path.join(app.config['UPLOAD_FOLDER'], resume_filename)
317
+ resume_file.save(resume_path)
318
+
319
+ try:
320
+ # Extract text from resume
321
+ resume_text = extract_text_from_file(resume_path, resume_filename)
322
+
323
+ # Analyze candidate
324
+ candidate = analyze_candidate(job_description, resume_text, resume_filename)
325
+ candidates.append(candidate)
326
+
327
+ except Exception as e:
328
+ logger.error(f"Error processing {resume_filename}: {e}")
329
+ candidates.append({
330
+ 'id': str(uuid.uuid4()),
331
+ 'name': resume_filename.split('.')[0],
332
+ 'email': "Processing error",
333
+ 'phone': "Processing error",
334
+ 'skills': [],
335
+ 'score': 0,
336
+ 'text_preview': f"Error: {str(e)}",
337
+ 'error': str(e)
338
+ })
339
+
340
+ # Clean up resume file
341
+ try:
342
+ os.remove(resume_path)
343
+ except:
344
+ pass
345
+
346
+ # Clean up job description file
347
+ try:
348
+ os.remove(job_desc_path)
349
+ except:
350
+ pass
351
+
352
+ # Sort candidates by score
353
+ candidates.sort(key=lambda x: x['score'], reverse=True)
354
+
355
+ return jsonify({
356
+ 'candidates': candidates,
357
+ 'job_description': job_description[:500] + '...' if len(job_description) > 500 else job_description,
358
+ 'total_processed': len(candidates),
359
+ 'ai_used': ai_models_loaded
360
+ })
361
+
362
+ except Exception as e:
363
+ logger.error(f"Error processing resumes: {e}")
364
+ return jsonify({'error': 'Internal server error'}), 500
365
+
366
+ @app.route('/api/health', methods=['GET'])
367
+ def health_check():
368
+ """Health check endpoint"""
369
+ return jsonify({
370
+ 'status': 'healthy',
371
+ 'ai_models_loaded': ai_models_loaded,
372
+ 'upload_folder_exists': os.path.exists(UPLOAD_FOLDER)
373
+ })
374
+
375
+ @app.route('/')
376
+ def index():
377
+ return jsonify({'message': 'Resume Analyzer API is running'})
378
+
379
+ if __name__ == "__main__":
380
+ port = int(os.environ.get("PORT", 10000))
381
+ app.run(host="0.0.0.0", port=port, debug=False)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ flask
2
+ flask-cors
3
+ werkzeug
4
+ PyPDF2
5
+ python-docx
6
+ docx
7
+ numpy
8
+ torch
9
+ transformers
10
+ sentence-transformers
11
+ gunicorn
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.9.13