riteshkokam commited on
Commit
30d6309
Β·
verified Β·
1 Parent(s): 20669ce

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +356 -293
app.py CHANGED
@@ -1,16 +1,18 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline, AutoTokenizer, AutoModel
4
  import PyPDF2
5
  import docx
6
  import io
7
  import re
8
  import numpy as np
 
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  import nltk
11
  from collections import Counter
12
  import warnings
13
  import time
 
14
  warnings.filterwarnings("ignore")
15
 
16
  # Download required NLTK data
@@ -35,69 +37,73 @@ except LookupError:
35
  from nltk.corpus import stopwords
36
  from nltk.tokenize import word_tokenize, sent_tokenize
37
 
38
- class ATSResumeAnalyzer:
39
  def __init__(self):
40
- # Initialize models for different analysis tasks
41
  self.progress_callback = None
 
 
42
 
43
- # For semantic analysis - using a more powerful model
44
- self.update_progress("πŸ”„ Loading AI models...", 10)
45
 
46
- # Use a more sophisticated model for better analysis
47
  try:
48
- # BAAI/bge-small-en-v1.5 is excellent for semantic similarity and works on CPU
49
  from sentence_transformers import SentenceTransformer
50
- self.semantic_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
51
- except:
52
- # Fallback to all-MiniLM if BGE is not available
53
- from sentence_transformers import SentenceTransformer
54
- self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
55
 
56
- # Initialize text generation pipeline for suggestions (using a small model)
57
  try:
58
- self.suggestion_generator = pipeline(
59
- "text-generation",
60
- model="microsoft/DialoGPT-small",
61
- tokenizer="microsoft/DialoGPT-small",
62
- device=-1 # CPU
63
- )
64
- except:
65
- self.suggestion_generator = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  self.stop_words = set(stopwords.words('english'))
 
68
 
69
- # ATS Keywords categories
70
- self.ats_categories = {
71
- 'technical_skills': ['python', 'javascript', 'java', 'sql', 'aws', 'docker', 'kubernetes', 'react', 'angular', 'node.js', 'machine learning', 'data science', 'tensorflow', 'pytorch', 'git', 'linux', 'windows', 'azure', 'gcp', 'html', 'css', 'mongodb', 'postgresql', 'mysql', 'api', 'rest', 'graphql', 'microservices', 'agile', 'scrum', 'devops', 'ci/cd'],
72
- 'soft_skills': ['leadership', 'communication', 'teamwork', 'problem solving', 'analytical', 'creative', 'adaptable', 'organized', 'detail oriented', 'time management', 'project management', 'collaboration', 'innovation', 'strategic thinking'],
73
- 'experience_indicators': ['managed', 'led', 'developed', 'implemented', 'designed', 'created', 'improved', 'optimized', 'achieved', 'delivered', 'coordinated', 'executed', 'supervised', 'mentored', 'trained', 'built', 'established', 'streamlined'],
74
- 'education_keywords': ['degree', 'bachelor', 'master', 'phd', 'certification', 'course', 'training', 'university', 'college', 'institute', 'graduated'],
75
- 'industry_specific': [] # Will be populated based on job description
76
- }
77
-
78
- self.update_progress("βœ… Models loaded successfully!", 20)
79
 
80
  def set_progress_callback(self, callback):
81
- """Set the progress callback function"""
82
  self.progress_callback = callback
83
 
84
  def update_progress(self, message, progress):
85
- """Update progress if callback is set"""
86
  if self.progress_callback:
87
  self.progress_callback(message, progress)
88
- time.sleep(0.1) # Small delay for better UX
89
 
90
- def extract_text_from_pdf(self, pdf_file):
91
  """Extract text from PDF file"""
92
  try:
93
- if isinstance(pdf_file, str):
94
- with open(pdf_file, 'rb') as file:
95
- pdf_reader = PyPDF2.PdfReader(file)
96
- text = ""
97
- for page in pdf_reader.pages:
98
- text += page.extract_text() + "\n"
99
- else:
100
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
101
  text = ""
102
  for page in pdf_reader.pages:
103
  text += page.extract_text() + "\n"
@@ -105,13 +111,10 @@ class ATSResumeAnalyzer:
105
  except Exception as e:
106
  return f"Error reading PDF: {str(e)}"
107
 
108
- def extract_text_from_docx(self, docx_file):
109
  """Extract text from DOCX file"""
110
  try:
111
- if isinstance(docx_file, str):
112
- doc = docx.Document(docx_file)
113
- else:
114
- doc = docx.Document(io.BytesIO(docx_file))
115
  text = ""
116
  for paragraph in doc.paragraphs:
117
  text += paragraph.text + "\n"
@@ -119,194 +122,235 @@ class ATSResumeAnalyzer:
119
  except Exception as e:
120
  return f"Error reading DOCX: {str(e)}"
121
 
122
- def preprocess_text(self, text):
123
- """Clean and preprocess text"""
124
- # Remove extra whitespace and normalize
125
  text = re.sub(r'\s+', ' ', text)
126
  text = re.sub(r'[^\w\s.,()-]', ' ', text)
127
- text = text.strip()
128
- return text
129
 
130
- def extract_ats_keywords(self, text, job_text=""):
131
- """Extract ATS-relevant keywords with weighting"""
132
- text_lower = text.lower()
133
- job_lower = job_text.lower() if job_text else ""
134
-
135
- # Extract keywords by category
136
- found_keywords = {}
137
-
138
- for category, keywords in self.ats_categories.items():
139
- found = []
140
- for keyword in keywords:
141
- if keyword in text_lower:
142
- # Give extra weight if keyword is also in job description
143
- weight = 2 if keyword in job_lower else 1
144
- found.append((keyword, weight))
145
- found_keywords[category] = found
146
-
147
- # Extract custom keywords from job description
148
- if job_text:
149
- job_keywords = self.extract_job_specific_keywords(job_text)
150
- found_keywords['job_specific'] = [(kw, 3) for kw in job_keywords if kw in text_lower]
151
-
152
- return found_keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
- def extract_job_specific_keywords(self, job_text):
155
- """Extract important keywords specific to the job posting"""
156
- # Remove common job posting fluff
157
- job_text = re.sub(r'(we are looking for|ideal candidate|requirements|qualifications|responsibilities)', '', job_text.lower())
158
 
159
- words = word_tokenize(job_text.lower())
160
- words = [word for word in words if word.isalpha() and word not in self.stop_words and len(word) > 3]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # Get most frequent words as job-specific keywords
163
- word_freq = Counter(words)
164
- job_keywords = [word for word, freq in word_freq.most_common(15) if freq >= 2]
 
 
 
 
 
 
165
 
166
- return job_keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  def analyze_resume_structure(self, resume_text):
169
- """Analyze resume structure and format (ATS-friendly check)"""
170
- structure_score = 100
171
- issues = []
172
 
173
- # Check for common sections
174
  sections = {
175
  'contact': r'(email|phone|@|linkedin|github)',
176
- 'experience': r'(experience|work|employment|career)',
177
- 'education': r'(education|degree|university|college)',
178
- 'skills': r'(skills|technical|technologies|competencies)'
179
  }
180
 
181
- found_sections = 0
182
  for section, pattern in sections.items():
183
  if re.search(pattern, resume_text, re.IGNORECASE):
184
- found_sections += 1
185
- else:
186
- issues.append(f"Missing {section} section")
187
-
188
- section_score = (found_sections / len(sections)) * 100
189
 
190
- # Check for formatting issues
191
- if len(resume_text.split('\n')) < 10:
192
- structure_score -= 20
193
- issues.append("Resume appears to lack proper formatting/structure")
194
 
195
- # Check length
196
  word_count = len(resume_text.split())
197
- if word_count < 200:
198
- structure_score -= 30
199
- issues.append("Resume is too short (less than 200 words)")
200
- elif word_count > 1000:
201
- structure_score -= 10
202
- issues.append("Resume might be too long for ATS systems")
203
-
204
- return max(0, (structure_score + section_score) / 2), issues
205
-
206
- def calculate_ats_score(self, resume_keywords, job_keywords, resume_text, job_text):
207
- """Calculate ATS-style matching score"""
208
- self.update_progress("πŸ€– Calculating ATS compatibility...", 60)
209
-
210
- total_score = 0
211
- max_possible_score = 0
212
- category_scores = {}
213
-
214
- # Weight different categories
215
- category_weights = {
216
- 'technical_skills': 0.35,
217
- 'soft_skills': 0.15,
218
- 'experience_indicators': 0.25,
219
- 'education_keywords': 0.10,
220
- 'job_specific': 0.15
221
- }
222
 
223
- for category, weight in category_weights.items():
224
- max_possible_score += weight * 100
225
-
226
- if category in resume_keywords and category in job_keywords:
227
- resume_kw = dict(resume_keywords[category])
228
- job_kw = dict(job_keywords[category]) if isinstance(job_keywords[category][0], tuple) else {kw: 1 for kw in job_keywords[category]}
229
-
230
- if job_kw: # Only score if there are job keywords in this category
231
- matched_score = 0
232
- for kw, weight_val in resume_kw.items():
233
- if kw in job_kw:
234
- matched_score += weight_val * job_kw[kw]
235
-
236
- category_score = min(100, (matched_score / max(1, sum(job_kw.values()))) * 100)
237
- category_scores[category] = category_score
238
- total_score += weight * category_score
239
- else:
240
- category_scores[category] = 0
241
- else:
242
- category_scores[category] = 0
243
 
244
- # Semantic similarity bonus
245
- semantic_score = self.get_semantic_similarity(resume_text, job_text)
246
- total_score += 0.2 * semantic_score # 20% weight for semantic similarity
247
- max_possible_score += 0.2 * 100
248
 
249
- final_score = min(100, (total_score / max_possible_score) * 100)
250
-
251
- return final_score, category_scores, semantic_score
252
 
253
- def get_semantic_similarity(self, resume_text, job_text):
254
- """Calculate semantic similarity using transformer model"""
255
- try:
256
- # Encode texts
257
- resume_embedding = self.semantic_model.encode(resume_text)
258
- job_embedding = self.semantic_model.encode(job_text)
259
-
260
- # Calculate cosine similarity
261
- similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
262
- return max(0, similarity * 100)
263
- except Exception as e:
264
- # Fallback to simple word overlap
265
- resume_words = set(resume_text.lower().split())
266
- job_words = set(job_text.lower().split())
267
- overlap = len(resume_words.intersection(job_words))
268
- return min(100, (overlap / len(job_words)) * 100) if job_words else 0
269
-
270
- def generate_ats_suggestions(self, resume_keywords, job_keywords, category_scores, structure_score, structure_issues):
271
- """Generate ATS-specific improvement suggestions"""
272
  suggestions = []
273
 
274
- # Structure suggestions
275
- if structure_score < 80:
276
- suggestions.append(f"πŸ“‹ **Resume Structure** (Score: {structure_score:.0f}/100): " +
277
- f"Improve resume formatting. Issues found: {', '.join(structure_issues)}")
278
-
279
- # Category-specific suggestions
280
- for category, score in category_scores.items():
281
- if score < 60:
282
- category_name = category.replace('_', ' ').title()
283
- if category == 'technical_skills':
284
- suggestions.append(f"πŸ’» **{category_name}** (Score: {score:.0f}/100): Add more relevant technical skills mentioned in the job description. Consider including specific tools, programming languages, or technologies.")
285
- elif category == 'experience_indicators':
286
- suggestions.append(f"πŸ“ˆ **{category_name}** (Score: {score:.0f}/100): Use more action verbs like 'managed', 'developed', 'implemented', 'led' to describe your achievements.")
287
- elif category == 'job_specific':
288
- suggestions.append(f"🎯 **{category_name}** (Score: {score:.0f}/100): Include more keywords that are specific to this job posting.")
289
- else:
290
- suggestions.append(f"πŸ”§ **{category_name}** (Score: {score:.0f}/100): Enhance this section to better match job requirements.")
291
-
292
- # Overall suggestions based on total score
293
- overall_score = np.mean(list(category_scores.values()))
294
- if overall_score < 40:
295
- suggestions.append("🚨 **Critical**: Your resume needs significant optimization for ATS systems. Consider using more keywords from the job description.")
296
- elif overall_score < 70:
297
- suggestions.append("⚠️ **Moderate**: Your resume has good potential but needs keyword optimization to improve ATS compatibility.")
298
  else:
299
- suggestions.append("βœ… **Good**: Your resume shows strong ATS compatibility. Minor tweaks could make it even better.")
 
 
 
 
300
 
301
- # Add specific actionable suggestions
302
- suggestions.append("πŸ’‘ **ATS Tips**: Use standard section headings, include keywords naturally in context, quantify achievements with numbers, and save as PDF to preserve formatting.")
 
 
 
 
 
 
303
 
304
  return suggestions
305
 
306
  def process_resume_analysis(self, resume_file, job_description, progress=gr.Progress()):
307
- """Main processing function with progress tracking"""
308
  try:
309
- # Set up progress tracking
310
  def update_progress_ui(message, prog):
311
  progress(prog/100, desc=message)
312
 
@@ -314,120 +358,110 @@ class ATSResumeAnalyzer:
314
 
315
  # Validation
316
  if resume_file is None:
317
- return "Please upload a resume file.", "", "", ""
318
 
319
- if not job_description.strip():
320
- return "Please provide a job description.", "", "", ""
321
 
322
- self.update_progress("πŸ“„ Reading resume file...", 30)
323
 
324
- # Extract text from resume
325
- if hasattr(resume_file, 'name'):
326
- filename = resume_file.name.lower()
327
- with open(resume_file.name, 'rb') as f:
328
- file_content = f.read()
329
- else:
330
- filename = str(resume_file).lower()
331
- with open(resume_file, 'rb') as f:
332
- file_content = f.read()
333
 
334
  if filename.endswith('.pdf'):
335
- resume_text = self.extract_text_from_pdf(file_content)
336
  elif filename.endswith('.docx'):
337
- resume_text = self.extract_text_from_docx(file_content)
338
  else:
339
- return f"Unsupported file format: {filename}. Please upload PDF or DOCX files.", "", "", ""
340
 
341
  if "Error reading" in resume_text:
342
  return resume_text, "", "", ""
343
 
344
- self.update_progress("πŸ” Analyzing resume structure...", 40)
345
-
346
- # Preprocess texts
347
- resume_clean = self.preprocess_text(resume_text)
348
- job_clean = self.preprocess_text(job_description)
349
-
350
- if len(resume_clean.split()) < 50:
351
- return "Resume text is too short or couldn't be extracted properly. Please ensure your PDF/DOCX contains readable text.", "", "", ""
352
 
353
- # Structure analysis
354
- structure_score, structure_issues = self.analyze_resume_structure(resume_clean)
355
 
356
- self.update_progress("🎯 Extracting ATS keywords...", 50)
 
357
 
358
- # Extract ATS keywords
359
- resume_keywords = self.extract_ats_keywords(resume_clean, job_clean)
360
- job_keywords = self.extract_ats_keywords(job_clean)
361
-
362
- # Calculate ATS score
363
- ats_score, category_scores, semantic_score = self.calculate_ats_score(
364
- resume_keywords, job_keywords, resume_clean, job_clean
365
- )
366
-
367
- self.update_progress("πŸ’‘ Generating improvement suggestions...", 80)
368
 
369
  # Generate suggestions
370
- suggestions = self.generate_ats_suggestions(
371
- resume_keywords, job_keywords, category_scores, structure_score, structure_issues
372
- )
373
 
374
  self.update_progress("βœ… Analysis complete!", 100)
375
 
376
  # Format results
377
- score_text = f"# 🎯 ATS Compatibility Score: {ats_score:.0f}/100\n\n"
378
-
379
- if ats_score >= 80:
380
- score_text += "🟒 **Excellent ATS Compatibility** - Your resume should pass most ATS systems"
381
- elif ats_score >= 60:
382
- score_text += "🟑 **Good ATS Compatibility** - Some improvements recommended"
383
- elif ats_score >= 40:
384
- score_text += "🟠 **Moderate ATS Compatibility** - Significant improvements needed"
 
 
 
385
  else:
386
- score_text += "πŸ”΄ **Poor ATS Compatibility** - Major optimization required"
 
387
 
388
- details = f"""## πŸ“Š Detailed ATS Analysis
389
-
390
- **Overall Structure Score**: {structure_score:.1f}/100
391
- **Semantic Match**: {semantic_score:.1f}/100
392
 
393
- ### Category Breakdown:
394
- - **Technical Skills**: {category_scores.get('technical_skills', 0):.1f}/100
395
- - **Experience Indicators**: {category_scores.get('experience_indicators', 0):.1f}/100
396
- - **Job-Specific Keywords**: {category_scores.get('job_specific', 0):.1f}/100
397
- - **Soft Skills**: {category_scores.get('soft_skills', 0):.1f}/100
398
- - **Education Keywords**: {category_scores.get('education_keywords', 0):.1f}/100
 
 
 
 
399
  """
400
 
401
- suggestions_text = "## πŸ’‘ ATS Optimization Suggestions\n\n" + "\n\n".join(suggestions)
402
 
403
  # Keywords analysis
404
- resume_tech_kw = [kw for kw, _ in resume_keywords.get('technical_skills', [])]
405
- job_specific_kw = [kw for kw, _ in resume_keywords.get('job_specific', [])]
406
-
407
- keywords_text = f"""## πŸ” Keyword Analysis
408
 
409
- **Technical Skills Found**: {', '.join(resume_tech_kw[:10]) if resume_tech_kw else 'None detected'}
 
 
 
 
 
 
 
410
 
411
- **Job-Specific Keywords Found**: {', '.join(job_specific_kw[:10]) if job_specific_kw else 'None detected'}
412
 
413
- **ATS Tip**: Ensure keywords appear naturally in context, not just in a skills list.
414
  """
 
 
415
 
416
  return score_text, details, suggestions_text, keywords_text
417
 
418
  except Exception as e:
419
- return f"An error occurred during analysis: {str(e)}", "", "", ""
420
 
421
- # Initialize the analyzer
422
- analyzer = ATSResumeAnalyzer()
423
 
424
- # Create Gradio interface
425
  def create_interface():
426
- with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as interface:
427
  gr.HTML("""
428
- <div style='text-align: center; padding: 20px;'>
429
- <h1>πŸ€– AI-Powered ATS Resume Analyzer</h1>
430
- <p>Get your resume analyzed like real ATS systems! Upload your resume and job description to receive detailed compatibility scoring and optimization suggestions.</p>
 
431
  </div>
432
  """)
433
 
@@ -443,20 +477,31 @@ def create_interface():
443
  gr.HTML("<h3>πŸ“‹ Job Description</h3>")
444
  job_description = gr.Textbox(
445
  label="Paste Complete Job Description",
446
- placeholder="Paste the full job description including requirements, qualifications, and responsibilities...",
447
- lines=12,
448
- max_lines=20
449
  )
450
 
451
- analyze_btn = gr.Button("πŸš€ Analyze with ATS", variant="primary", size="lg")
 
 
 
 
 
 
 
 
 
 
 
 
452
 
453
  with gr.Column(scale=1):
454
- score_output = gr.Markdown(label="ATS Compatibility Score")
455
- details_output = gr.Markdown(label="Detailed Analysis")
456
- suggestions_output = gr.Markdown(label="Optimization Suggestions")
457
- keywords_output = gr.Markdown(label="Keywords Analysis")
458
 
459
- # Set up the event handler with progress tracking
460
  analyze_btn.click(
461
  fn=analyzer.process_resume_analysis,
462
  inputs=[resume_file, job_description],
@@ -464,16 +509,34 @@ def create_interface():
464
  )
465
 
466
  gr.HTML("""
467
- <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;'>
468
- <p><strong>🎯 ATS-Powered Analysis:</strong> This tool simulates real ATS (Applicant Tracking System) behavior using advanced AI models for keyword extraction, semantic analysis, and resume structure evaluation.</p>
469
- <p><strong>πŸ“ˆ What makes this different:</strong> Unlike simple keyword matching, this analyzer considers context, semantic meaning, industry-specific terms, and proper resume structure - just like enterprise ATS systems.</p>
470
- <p><em>Supported formats: PDF, DOCX | Optimized for CPU performance</em></p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  </div>
472
  """)
473
 
474
  return interface
475
 
476
- # Launch the app
477
  if __name__ == "__main__":
478
  app = create_interface()
479
  app.launch(
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  import PyPDF2
5
  import docx
6
  import io
7
  import re
8
  import numpy as np
9
+ from sklearn.feature_extraction.text import TfidfVectorizer
10
  from sklearn.metrics.pairwise import cosine_similarity
11
  import nltk
12
  from collections import Counter
13
  import warnings
14
  import time
15
+ import json
16
  warnings.filterwarnings("ignore")
17
 
18
  # Download required NLTK data
 
37
  from nltk.corpus import stopwords
38
  from nltk.tokenize import word_tokenize, sent_tokenize
39
 
40
+ class ModernATSAnalyzer:
41
  def __init__(self):
 
42
  self.progress_callback = None
43
+ self.llm_pipeline = None
44
+ self.embedding_model = None
45
 
46
+ self.update_progress("πŸš€ Initializing AI models...", 5)
 
47
 
48
+ # Initialize embedding model for semantic analysis
49
  try:
 
50
  from sentence_transformers import SentenceTransformer
51
+ # Use latest 2025 optimized model for better understanding
52
+ self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
53
+ self.update_progress("βœ… Embedding model loaded", 15)
54
+ except Exception as e:
55
+ self.update_progress(f"❌ Embedding model failed: {str(e)}", 15)
56
 
57
+ # Initialize LLM for intelligent analysis (using 2025 small models)
58
  try:
59
+ # Try to load a small but capable 2025 model
60
+ model_options = [
61
+ "microsoft/DialoGPT-small", # Fallback option
62
+ "HuggingFaceTB/SmolLM2-135M", # 2025 efficient model
63
+ "Qwen/Qwen2.5-0.5B" # 2025 small but powerful
64
+ ]
65
+
66
+ for model_name in model_options:
67
+ try:
68
+ self.llm_pipeline = pipeline(
69
+ "text-generation",
70
+ model=model_name,
71
+ tokenizer=model_name,
72
+ device=-1, # CPU
73
+ max_length=512,
74
+ do_sample=True,
75
+ temperature=0.7,
76
+ pad_token_id=50256
77
+ )
78
+ self.update_progress(f"βœ… LLM loaded: {model_name}", 25)
79
+ break
80
+ except:
81
+ continue
82
+
83
+ if not self.llm_pipeline:
84
+ self.update_progress("⚠️ Using rule-based analysis (LLM unavailable)", 25)
85
+
86
+ except Exception as e:
87
+ self.update_progress(f"⚠️ LLM initialization failed, using backup methods", 25)
88
 
89
  self.stop_words = set(stopwords.words('english'))
90
+ self.tfidf_vectorizer = TfidfVectorizer(stop_words='english', max_features=1000)
91
 
92
+ self.update_progress("🎯 System ready for analysis!", 30)
 
 
 
 
 
 
 
 
 
93
 
94
  def set_progress_callback(self, callback):
 
95
  self.progress_callback = callback
96
 
97
  def update_progress(self, message, progress):
 
98
  if self.progress_callback:
99
  self.progress_callback(message, progress)
100
+ time.sleep(0.05)
101
 
102
+ def extract_text_from_pdf(self, file_path):
103
  """Extract text from PDF file"""
104
  try:
105
+ with open(file_path, 'rb') as file:
106
+ pdf_reader = PyPDF2.PdfReader(file)
 
 
 
 
 
 
107
  text = ""
108
  for page in pdf_reader.pages:
109
  text += page.extract_text() + "\n"
 
111
  except Exception as e:
112
  return f"Error reading PDF: {str(e)}"
113
 
114
+ def extract_text_from_docx(self, file_path):
115
  """Extract text from DOCX file"""
116
  try:
117
+ doc = docx.Document(file_path)
 
 
 
118
  text = ""
119
  for paragraph in doc.paragraphs:
120
  text += paragraph.text + "\n"
 
122
  except Exception as e:
123
  return f"Error reading DOCX: {str(e)}"
124
 
125
+ def clean_text(self, text):
126
+ """Clean and normalize text"""
 
127
  text = re.sub(r'\s+', ' ', text)
128
  text = re.sub(r'[^\w\s.,()-]', ' ', text)
129
+ return text.strip()
 
130
 
131
+ def extract_dynamic_keywords(self, text, top_n=30):
132
+ """Dynamically extract important keywords using NLP techniques"""
133
+ # Clean text
134
+ clean_text = self.clean_text(text.lower())
135
+
136
+ # Tokenize and filter
137
+ words = word_tokenize(clean_text)
138
+ words = [word for word in words if (
139
+ word.isalpha() and
140
+ len(word) > 2 and
141
+ word not in self.stop_words
142
+ )]
143
+
144
+ # Get word frequencies
145
+ word_freq = Counter(words)
146
+
147
+ # Extract phrases (bigrams and trigrams)
148
+ sentences = sent_tokenize(text)
149
+ phrases = []
150
+ for sentence in sentences:
151
+ sentence_words = word_tokenize(sentence.lower())
152
+ sentence_words = [w for w in sentence_words if w.isalpha()]
153
+
154
+ # Bigrams
155
+ for i in range(len(sentence_words) - 1):
156
+ bigram = f"{sentence_words[i]} {sentence_words[i+1]}"
157
+ if len(bigram) > 6: # Avoid very short phrases
158
+ phrases.append(bigram)
159
+
160
+ # Trigrams for technical terms
161
+ for i in range(len(sentence_words) - 2):
162
+ trigram = f"{sentence_words[i]} {sentence_words[i+1]} {sentence_words[i+2]}"
163
+ if len(trigram) > 10:
164
+ phrases.append(trigram)
165
+
166
+ phrase_freq = Counter(phrases)
167
+
168
+ # Combine words and phrases
169
+ keywords = []
170
+
171
+ # Add top words
172
+ for word, freq in word_freq.most_common(top_n//2):
173
+ keywords.append((word, freq, 'word'))
174
+
175
+ # Add top phrases
176
+ for phrase, freq in phrase_freq.most_common(top_n//2):
177
+ if freq >= 2: # Only include phrases that appear multiple times
178
+ keywords.append((phrase, freq, 'phrase'))
179
+
180
+ return keywords
181
 
182
+ def analyze_with_llm(self, resume_text, job_text):
183
+ """Use LLM for intelligent analysis"""
184
+ if not self.llm_pipeline:
185
+ return self.fallback_analysis(resume_text, job_text)
186
 
187
+ try:
188
+ prompt = f"""Analyze this resume against the job description and provide a compatibility score out of 100.
189
+
190
+ Job Description:
191
+ {job_text[:500]}...
192
+
193
+ Resume:
194
+ {resume_text[:500]}...
195
+
196
+ Provide analysis in this format:
197
+ Score: [0-100]
198
+ Skills Match: [description]
199
+ Experience Match: [description]
200
+ Key Gaps: [description]
201
+ """
202
+
203
+ response = self.llm_pipeline(prompt, max_new_tokens=200, num_return_sequences=1)
204
+ analysis_text = response[0]['generated_text'].split(prompt)[-1].strip()
205
+
206
+ # Parse the response
207
+ score_match = re.search(r'Score:\s*(\d+)', analysis_text)
208
+ score = int(score_match.group(1)) if score_match else 50
209
+
210
+ return {
211
+ 'overall_score': min(100, max(0, score)),
212
+ 'analysis_text': analysis_text,
213
+ 'method': 'LLM'
214
+ }
215
+
216
+ except Exception as e:
217
+ return self.fallback_analysis(resume_text, job_text)
218
+
219
+ def fallback_analysis(self, resume_text, job_text):
220
+ """Sophisticated rule-based analysis as fallback"""
221
+ # Extract keywords from both texts
222
+ resume_keywords = self.extract_dynamic_keywords(resume_text)
223
+ job_keywords = self.extract_dynamic_keywords(job_text)
224
 
225
+ # Create keyword sets for comparison
226
+ resume_terms = set([kw[0] for kw in resume_keywords])
227
+ job_terms = set([kw[0] for kw in job_keywords])
228
+
229
+ # Calculate various similarity metrics
230
+
231
+ # 1. Keyword overlap
232
+ overlap = len(resume_terms.intersection(job_terms))
233
+ keyword_score = (overlap / len(job_terms)) * 100 if job_terms else 0
234
 
235
+ # 2. TF-IDF Similarity
236
+ try:
237
+ tfidf_matrix = self.tfidf_vectorizer.fit_transform([resume_text, job_text])
238
+ tfidf_similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0] * 100
239
+ except:
240
+ tfidf_similarity = 0
241
+
242
+ # 3. Semantic similarity using embeddings
243
+ semantic_score = 0
244
+ if self.embedding_model:
245
+ try:
246
+ resume_embedding = self.embedding_model.encode(resume_text[:512])
247
+ job_embedding = self.embedding_model.encode(job_text[:512])
248
+ semantic_score = cosine_similarity([resume_embedding], [job_embedding])[0][0] * 100
249
+ except:
250
+ semantic_score = 0
251
+
252
+ # 4. Structure and length analysis
253
+ structure_score = self.analyze_resume_structure(resume_text)
254
+
255
+ # Weighted combination
256
+ overall_score = (
257
+ keyword_score * 0.3 +
258
+ tfidf_similarity * 0.25 +
259
+ semantic_score * 0.25 +
260
+ structure_score * 0.2
261
+ )
262
+
263
+ return {
264
+ 'overall_score': min(100, max(0, overall_score)),
265
+ 'keyword_score': keyword_score,
266
+ 'tfidf_score': tfidf_similarity,
267
+ 'semantic_score': semantic_score,
268
+ 'structure_score': structure_score,
269
+ 'resume_keywords': resume_keywords[:10],
270
+ 'job_keywords': job_keywords[:10],
271
+ 'common_keywords': list(resume_terms.intersection(job_terms))[:10],
272
+ 'method': 'Advanced Rule-based'
273
+ }
274
 
275
  def analyze_resume_structure(self, resume_text):
276
+ """Analyze resume structure and formatting"""
277
+ score = 100
 
278
 
279
+ # Check for essential sections
280
  sections = {
281
  'contact': r'(email|phone|@|linkedin|github)',
282
+ 'experience': r'(experience|work|employment|career|job)',
283
+ 'education': r'(education|degree|university|college|school)',
284
+ 'skills': r'(skills|technical|technologies|competencies|tools)'
285
  }
286
 
287
+ sections_found = 0
288
  for section, pattern in sections.items():
289
  if re.search(pattern, resume_text, re.IGNORECASE):
290
+ sections_found += 1
 
 
 
 
291
 
292
+ # Penalize missing sections
293
+ section_penalty = (4 - sections_found) * 15
294
+ score -= section_penalty
 
295
 
296
+ # Check word count
297
  word_count = len(resume_text.split())
298
+ if word_count < 150:
299
+ score -= 30
300
+ elif word_count > 1200:
301
+ score -= 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+ # Check for bullet points or structure
304
+ if 'β€’' in resume_text or '-' in resume_text or '*' in resume_text:
305
+ score += 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
+ # Check for years/dates (experience indicators)
308
+ years_pattern = r'(20\d{2}|19\d{2})'
309
+ if re.search(years_pattern, resume_text):
310
+ score += 10
311
 
312
+ return max(0, min(100, score))
 
 
313
 
314
+ def generate_intelligent_suggestions(self, analysis_result):
315
+ """Generate intelligent suggestions based on analysis"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  suggestions = []
317
 
318
+ if analysis_result['method'] == 'LLM' and 'analysis_text' in analysis_result:
319
+ # Extract suggestions from LLM response
320
+ if 'Key Gaps:' in analysis_result['analysis_text']:
321
+ gaps = analysis_result['analysis_text'].split('Key Gaps:')[-1].strip()
322
+ suggestions.append(f"🎯 **Key Areas to Improve**: {gaps}")
323
+
324
+ # Add rule-based suggestions
325
+ score = analysis_result['overall_score']
326
+
327
+ if score < 40:
328
+ suggestions.append("🚨 **Critical**: Your resume needs major optimization. Consider professional resume writing services.")
329
+ elif score < 60:
330
+ suggestions.append("⚠️ **Moderate Compatibility**: Your resume shows potential but needs significant keyword optimization.")
331
+ elif score < 80:
332
+ suggestions.append("πŸ‘ **Good Foundation**: You're on the right track. Focus on fine-tuning keywords and formatting.")
 
 
 
 
 
 
 
 
 
333
  else:
334
+ suggestions.append("βœ… **Excellent**: Your resume shows strong compatibility with this job!")
335
+
336
+ # Specific suggestions based on analysis components
337
+ if 'keyword_score' in analysis_result and analysis_result['keyword_score'] < 40:
338
+ suggestions.append("πŸ”‘ **Keywords**: Incorporate more relevant keywords from the job description naturally into your resume content.")
339
 
340
+ if 'structure_score' in analysis_result and analysis_result['structure_score'] < 70:
341
+ suggestions.append("πŸ“‹ **Structure**: Improve resume formatting with clear sections: Contact, Experience, Education, Skills.")
342
+
343
+ if 'semantic_score' in analysis_result and analysis_result['semantic_score'] < 50:
344
+ suggestions.append("🎨 **Content Alignment**: Rewrite your experience descriptions to better match the job's language and requirements.")
345
+
346
+ # Add common ATS tips
347
+ suggestions.append("πŸ’‘ **ATS Tips**: Use standard fonts, avoid images/graphics, save as PDF, and use keywords in context rather than just listing them.")
348
 
349
  return suggestions
350
 
351
  def process_resume_analysis(self, resume_file, job_description, progress=gr.Progress()):
352
+ """Main analysis function"""
353
  try:
 
354
  def update_progress_ui(message, prog):
355
  progress(prog/100, desc=message)
356
 
 
358
 
359
  # Validation
360
  if resume_file is None:
361
+ return "❌ Please upload a resume file.", "", "", ""
362
 
363
+ if not job_description or len(job_description.strip()) < 50:
364
+ return "❌ Please provide a detailed job description (at least 50 characters).", "", "", ""
365
 
366
+ self.update_progress("πŸ“„ Extracting text from resume...", 35)
367
 
368
+ # Extract resume text
369
+ filename = str(resume_file).lower()
 
 
 
 
 
 
 
370
 
371
  if filename.endswith('.pdf'):
372
+ resume_text = self.extract_text_from_pdf(resume_file)
373
  elif filename.endswith('.docx'):
374
+ resume_text = self.extract_text_from_docx(resume_file)
375
  else:
376
+ return f"❌ Unsupported file format. Please upload PDF or DOCX files.", "", "", ""
377
 
378
  if "Error reading" in resume_text:
379
  return resume_text, "", "", ""
380
 
381
+ if len(resume_text.strip()) < 100:
382
+ return "❌ Resume text is too short or couldn't be extracted. Please ensure your file contains readable text.", "", "", ""
 
 
 
 
 
 
383
 
384
+ self.update_progress("🧠 Analyzing with AI...", 50)
 
385
 
386
+ # Perform AI analysis
387
+ analysis_result = self.analyze_with_llm(resume_text, job_description)
388
 
389
+ self.update_progress("πŸ’‘ Generating suggestions...", 80)
 
 
 
 
 
 
 
 
 
390
 
391
  # Generate suggestions
392
+ suggestions = self.generate_intelligent_suggestions(analysis_result)
 
 
393
 
394
  self.update_progress("βœ… Analysis complete!", 100)
395
 
396
  # Format results
397
+ score = analysis_result['overall_score']
398
+
399
+ if score >= 85:
400
+ emoji = "🟒"
401
+ status = "Excellent Match"
402
+ elif score >= 70:
403
+ emoji = "🟑"
404
+ status = "Good Compatibility"
405
+ elif score >= 50:
406
+ emoji = "🟠"
407
+ status = "Moderate Match"
408
  else:
409
+ emoji = "πŸ”΄"
410
+ status = "Needs Improvement"
411
 
412
+ score_text = f"# 🎯 ATS Compatibility Score: {score:.0f}/100\n\n{emoji} **{status}**"
413
+
414
+ # Detailed breakdown
415
+ details = f"""## πŸ“Š Analysis Breakdown
416
 
417
+ **Analysis Method**: {analysis_result['method']}
418
+ **Overall Score**: {score:.1f}/100
419
+ """
420
+
421
+ if 'keyword_score' in analysis_result:
422
+ details += f"""
423
+ **Keyword Match**: {analysis_result['keyword_score']:.1f}/100
424
+ **Content Similarity**: {analysis_result.get('tfidf_score', 0):.1f}/100
425
+ **Semantic Match**: {analysis_result.get('semantic_score', 0):.1f}/100
426
+ **Structure Quality**: {analysis_result.get('structure_score', 0):.1f}/100
427
  """
428
 
429
+ suggestions_text = "## πŸ’‘ Improvement Recommendations\n\n" + "\n\n".join(suggestions)
430
 
431
  # Keywords analysis
432
+ keywords_text = "## πŸ” Keyword Analysis\n\n"
 
 
 
433
 
434
+ if 'resume_keywords' in analysis_result:
435
+ resume_kw = [kw[0] for kw in analysis_result['resume_keywords']]
436
+ job_kw = [kw[0] for kw in analysis_result['job_keywords']]
437
+ common_kw = analysis_result.get('common_keywords', [])
438
+
439
+ keywords_text += f"""**Resume Keywords**: {', '.join(resume_kw)}
440
+
441
+ **Job Keywords**: {', '.join(job_kw)}
442
 
443
+ **Matching Keywords**: {', '.join(common_kw) if common_kw else 'Limited overlap detected'}
444
 
445
+ **Recommendation**: Focus on incorporating more job-specific keywords naturally into your resume content.
446
  """
447
+ else:
448
+ keywords_text += "**Dynamic keyword extraction completed.** The analysis considered context and semantic meaning rather than simple keyword matching."
449
 
450
  return score_text, details, suggestions_text, keywords_text
451
 
452
  except Exception as e:
453
+ return f"❌ Analysis error: {str(e)}\n\nPlease try again or contact support.", "", "", ""
454
 
455
+ # Initialize analyzer
456
+ analyzer = ModernATSAnalyzer()
457
 
 
458
  def create_interface():
459
+ with gr.Blocks(title="Modern ATS Analyzer 2025", theme=gr.themes.Soft()) as interface:
460
  gr.HTML("""
461
+ <div style='text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px; margin-bottom: 20px;'>
462
+ <h1>πŸ€– Modern ATS Resume Analyzer 2025</h1>
463
+ <p style='font-size: 16px; margin: 10px 0;'>Powered by Latest AI Models | Dynamic Keyword Extraction | Intelligent Analysis</p>
464
+ <p style='font-size: 14px; opacity: 0.9;'>No predefined keywords - Real ATS-like analysis using 2025 AI technology</p>
465
  </div>
466
  """)
467
 
 
477
  gr.HTML("<h3>πŸ“‹ Job Description</h3>")
478
  job_description = gr.Textbox(
479
  label="Paste Complete Job Description",
480
+ placeholder="Paste the full job posting including requirements, responsibilities, qualifications, and company information...",
481
+ lines=15,
482
+ max_lines=25
483
  )
484
 
485
+ analyze_btn = gr.Button("πŸš€ Analyze with Modern AI", variant="primary", size="lg")
486
+
487
+ gr.HTML("""
488
+ <div style='margin-top: 15px; padding: 15px; background: #f0f8ff; border-radius: 8px; border-left: 4px solid #4CAF50;'>
489
+ <h4 style='margin: 0 0 10px 0; color: #2E7D32;'>🎯 What makes this different:</h4>
490
+ <ul style='margin: 0; padding-left: 20px; color: #424242;'>
491
+ <li><strong>No predefined keywords</strong> - Dynamically extracts relevant terms</li>
492
+ <li><strong>2025 AI models</strong> - Uses latest language understanding</li>
493
+ <li><strong>Context-aware</strong> - Understands meaning, not just word matching</li>
494
+ <li><strong>Real ATS simulation</strong> - Mimics actual hiring systems</li>
495
+ </ul>
496
+ </div>
497
+ """)
498
 
499
  with gr.Column(scale=1):
500
+ score_output = gr.Markdown(label="🎯 Compatibility Score")
501
+ details_output = gr.Markdown(label="πŸ“Š Detailed Analysis")
502
+ suggestions_output = gr.Markdown(label="πŸ’‘ AI Recommendations")
503
+ keywords_output = gr.Markdown(label="πŸ” Keyword Intelligence")
504
 
 
505
  analyze_btn.click(
506
  fn=analyzer.process_resume_analysis,
507
  inputs=[resume_file, job_description],
 
509
  )
510
 
511
  gr.HTML("""
512
+ <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 2px solid #e0e0e0; background: #fafafa; border-radius: 8px;'>
513
+ <h4 style='color: #333; margin-bottom: 15px;'>🧠 AI-Powered Analysis Engine</h4>
514
+ <div style='display: flex; justify-content: space-around; flex-wrap: wrap;'>
515
+ <div style='margin: 10px; text-align: center;'>
516
+ <strong style='color: #1976D2;'>🎯 Dynamic Keywords</strong><br>
517
+ <span style='font-size: 12px; color: #666;'>Extracts context-relevant terms</span>
518
+ </div>
519
+ <div style='margin: 10px; text-align: center;'>
520
+ <strong style='color: #388E3C;'>🧠 Semantic Analysis</strong><br>
521
+ <span style='font-size: 12px; color: #666;'>Understands meaning & context</span>
522
+ </div>
523
+ <div style='margin: 10px; text-align: center;'>
524
+ <strong style='color: #F57C00;'>πŸ“Š Multi-metric Scoring</strong><br>
525
+ <span style='font-size: 12px; color: #666;'>Comprehensive compatibility analysis</span>
526
+ </div>
527
+ <div style='margin: 10px; text-align: center;'>
528
+ <strong style='color: #7B1FA2;'>πŸ’‘ AI Suggestions</strong><br>
529
+ <span style='font-size: 12px; color: #666;'>Personalized improvement tips</span>
530
+ </div>
531
+ </div>
532
+ <p style='margin-top: 15px; font-size: 13px; color: #777;'>
533
+ <em>Optimized for CPU inference β€’ 2025 Model Architecture β€’ Enterprise-grade Analysis</em>
534
+ </p>
535
  </div>
536
  """)
537
 
538
  return interface
539
 
 
540
  if __name__ == "__main__":
541
  app = create_interface()
542
  app.launch(