riteshkokam commited on
Commit
327fa58
Β·
verified Β·
1 Parent(s): 9e1414a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +298 -159
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import torch
3
- from sentence_transformers import SentenceTransformer
4
  import PyPDF2
5
  import docx
6
  import io
@@ -10,6 +10,7 @@ from sklearn.metrics.pairwise import cosine_similarity
10
  import nltk
11
  from collections import Counter
12
  import warnings
 
13
  warnings.filterwarnings("ignore")
14
 
15
  # Download required NLTK data
@@ -34,20 +35,72 @@ except LookupError:
34
  from nltk.corpus import stopwords
35
  from nltk.tokenize import word_tokenize, sent_tokenize
36
 
37
- class ResumeJobMatcher:
38
  def __init__(self):
39
- # Use a lightweight but effective sentence transformer model
40
- # all-MiniLM-L6-v2 is optimized for CPU and works well on limited resources
41
- self.model = SentenceTransformer('all-MiniLM-L6-v2')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  self.stop_words = set(stopwords.words('english'))
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  def extract_text_from_pdf(self, pdf_file):
45
  """Extract text from PDF file"""
46
  try:
47
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
48
- text = ""
49
- for page in pdf_reader.pages:
50
- text += page.extract_text() + "\n"
 
 
 
 
 
 
 
51
  return text
52
  except Exception as e:
53
  return f"Error reading PDF: {str(e)}"
@@ -55,7 +108,10 @@ class ResumeJobMatcher:
55
  def extract_text_from_docx(self, docx_file):
56
  """Extract text from DOCX file"""
57
  try:
58
- doc = docx.Document(io.BytesIO(docx_file))
 
 
 
59
  text = ""
60
  for paragraph in doc.paragraphs:
61
  text += paragraph.text + "\n"
@@ -67,228 +123,311 @@ class ResumeJobMatcher:
67
  """Clean and preprocess text"""
68
  # Remove extra whitespace and normalize
69
  text = re.sub(r'\s+', ' ', text)
70
- text = re.sub(r'[^\w\s]', ' ', text)
71
- text = text.lower().strip()
72
  return text
73
 
74
- def extract_keywords(self, text, top_n=20):
75
- """Extract important keywords from text"""
76
- words = word_tokenize(text.lower())
77
- words = [word for word in words if word.isalpha() and word not in self.stop_words and len(word) > 2]
78
 
79
- # Get most common words
80
- word_freq = Counter(words)
81
- keywords = [word for word, freq in word_freq.most_common(top_n)]
82
- return keywords
83
-
84
- def calculate_keyword_match(self, resume_keywords, job_keywords):
85
- """Calculate keyword matching score"""
86
- resume_set = set(resume_keywords)
87
- job_set = set(job_keywords)
88
 
89
- if not job_set:
90
- return 0
 
 
 
 
 
 
91
 
92
- intersection = resume_set.intersection(job_set)
93
- return len(intersection) / len(job_set) * 100
 
 
 
 
94
 
95
- def get_semantic_similarity(self, resume_text, job_text):
96
- """Calculate semantic similarity using sentence transformers"""
97
- # Split texts into sentences for better analysis
98
- resume_sentences = sent_tokenize(resume_text)
99
- job_sentences = sent_tokenize(job_text)
100
-
101
- # Encode texts
102
- resume_embedding = self.model.encode(resume_text)
103
- job_embedding = self.model.encode(job_text)
104
-
105
- # Calculate cosine similarity
106
- similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
107
- return similarity * 100
108
 
109
- def analyze_sections(self, resume_text, job_text):
110
- """Analyze different sections of resume vs job requirements"""
111
- # Common resume sections patterns
 
 
 
112
  sections = {
113
- 'experience': r'(experience|work history|employment|career|professional)',
114
- 'skills': r'(skills|competencies|technical|technologies|tools)',
115
- 'education': r'(education|degree|university|college|academic)',
116
- 'projects': r'(projects|portfolio|achievements|accomplishments)'
117
  }
118
 
119
- section_scores = {}
120
  for section, pattern in sections.items():
121
- # Extract relevant text from resume
122
- resume_section = self.extract_section_text(resume_text, pattern)
123
- if resume_section:
124
- score = self.get_semantic_similarity(resume_section, job_text)
125
- section_scores[section] = min(score, 100)
126
  else:
127
- section_scores[section] = 0
128
 
129
- return section_scores
130
-
131
- def extract_section_text(self, text, pattern):
132
- """Extract text from specific sections"""
133
- sentences = sent_tokenize(text)
134
- relevant_sentences = []
135
-
136
- for sentence in sentences:
137
- if re.search(pattern, sentence, re.IGNORECASE):
138
- relevant_sentences.append(sentence)
139
-
140
- # Also include sentences around matches for context
141
- for i, sentence in enumerate(sentences):
142
- if re.search(pattern, sentence, re.IGNORECASE):
143
- if i > 0:
144
- relevant_sentences.append(sentences[i-1])
145
- if i < len(sentences) - 1:
146
- relevant_sentences.append(sentences[i+1])
147
-
148
- return ' '.join(relevant_sentences)
149
 
150
- def generate_suggestions(self, resume_text, job_text, overall_score, section_scores, keyword_match_score):
151
- """Generate improvement suggestions"""
152
- suggestions = []
153
 
154
- if overall_score < 70:
155
- suggestions.append("πŸ“ˆ **Overall Score Enhancement**: Your resume needs significant improvement to match this job. Consider tailoring your resume more specifically to the job requirements.")
 
156
 
157
- if keyword_match_score < 40:
158
- job_keywords = self.extract_keywords(job_text, 15)
159
- resume_keywords = self.extract_keywords(resume_text, 15)
160
- missing_keywords = set(job_keywords) - set(resume_keywords)
161
- if missing_keywords:
162
- suggestions.append(f"πŸ”‘ **Missing Keywords**: Consider incorporating these relevant keywords: {', '.join(list(missing_keywords)[:8])}")
 
 
163
 
164
- # Section-specific suggestions
165
- if section_scores.get('skills', 0) < 50:
166
- suggestions.append("πŸ› οΈ **Skills Section**: Enhance your skills section to better match the job requirements. Include both technical and soft skills mentioned in the job description.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
- if section_scores.get('experience', 0) < 50:
169
- suggestions.append("πŸ’Ό **Experience Section**: Better highlight your relevant work experience. Use action verbs and quantify your achievements where possible.")
 
 
170
 
171
- if section_scores.get('education', 0) < 30 and 'education' in job_text.lower():
172
- suggestions.append("πŸŽ“ **Education Section**: If you have relevant educational background, make sure it's prominently featured and matches job requirements.")
173
 
174
- if overall_score > 80:
175
- suggestions.append("βœ… **Great Match**: Your resume shows strong alignment with this job! Consider minor tweaks to optimize further.")
176
- elif overall_score > 60:
177
- suggestions.append("πŸ‘ **Good Foundation**: You have a solid foundation. Focus on highlighting the most relevant experiences and skills.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Always add a general suggestion
180
- suggestions.append("πŸ’‘ **Pro Tip**: Customize your resume for each application by emphasizing the most relevant experiences and using similar language to the job description.")
 
 
 
 
 
 
 
 
 
181
 
182
  return suggestions
183
 
184
- def process_files(self, resume_file, job_description):
185
- """Main processing function"""
186
  try:
187
- # Extract text from resume file
 
 
 
 
 
 
188
  if resume_file is None:
189
  return "Please upload a resume file.", "", "", ""
190
 
191
  if not job_description.strip():
192
  return "Please provide a job description.", "", "", ""
193
 
194
- # Handle different ways Gradio might pass the file
 
 
195
  if hasattr(resume_file, 'name'):
196
  filename = resume_file.name.lower()
197
- # Read the file content
198
  with open(resume_file.name, 'rb') as f:
199
  file_content = f.read()
200
  else:
201
- # If resume_file is already the file path (string)
202
  filename = str(resume_file).lower()
203
  with open(resume_file, 'rb') as f:
204
  file_content = f.read()
205
 
206
- # Determine file type and extract text
207
  if filename.endswith('.pdf'):
208
  resume_text = self.extract_text_from_pdf(file_content)
209
  elif filename.endswith('.docx'):
210
  resume_text = self.extract_text_from_docx(file_content)
211
  else:
212
- return f"Unsupported file format for file: {filename}. Please upload PDF or DOCX files.", "", "", ""
213
 
214
  if "Error reading" in resume_text:
215
  return resume_text, "", "", ""
216
 
 
 
217
  # Preprocess texts
218
  resume_clean = self.preprocess_text(resume_text)
219
  job_clean = self.preprocess_text(job_description)
220
 
221
- if len(resume_clean) < 50:
222
- return "Resume text is too short or couldn't be extracted properly.", "", "", ""
223
 
224
- # Calculate different scores
225
- semantic_score = self.get_semantic_similarity(resume_clean, job_clean)
226
 
227
- # Keyword matching
228
- resume_keywords = self.extract_keywords(resume_clean)
229
- job_keywords = self.extract_keywords(job_clean)
230
- keyword_score = self.calculate_keyword_match(resume_keywords, job_keywords)
231
 
232
- # Section analysis
233
- section_scores = self.analyze_sections(resume_clean, job_clean)
 
234
 
235
- # Calculate overall score (weighted average)
236
- overall_score = (
237
- semantic_score * 0.4 + # Semantic similarity (40%)
238
- keyword_score * 0.3 + # Keyword matching (30%)
239
- np.mean(list(section_scores.values())) * 0.3 # Section scores (30%)
240
  )
241
 
242
- overall_score = min(round(overall_score), 100) # Cap at 100
243
 
244
  # Generate suggestions
245
- suggestions = self.generate_suggestions(
246
- resume_clean, job_clean, overall_score, section_scores, keyword_score
247
  )
248
 
 
 
249
  # Format results
250
- score_text = f"# 🎯 Resume Match Score: {overall_score}/100\n\n"
251
 
252
- details = f"""## πŸ“Š Detailed Analysis
 
 
 
 
 
 
 
253
 
254
- **Semantic Similarity**: {semantic_score:.1f}/100
255
- **Keyword Match**: {keyword_score:.1f}/100
 
 
256
 
257
- ### Section Scores:
258
- - **Experience**: {section_scores.get('experience', 0):.1f}/100
259
- - **Skills**: {section_scores.get('skills', 0):.1f}/100
260
- - **Education**: {section_scores.get('education', 0):.1f}/100
261
- - **Projects**: {section_scores.get('projects', 0):.1f}/100
 
262
  """
263
 
264
- suggestions_text = "## πŸ’‘ Improvement Suggestions\n\n" + "\n\n".join(suggestions)
 
 
 
 
265
 
266
- # Keywords comparison
267
- common_keywords = set(resume_keywords[:10]).intersection(set(job_keywords[:10]))
268
  keywords_text = f"""## πŸ” Keyword Analysis
269
 
270
- **Top Resume Keywords**: {', '.join(resume_keywords[:10])}
271
 
272
- **Top Job Keywords**: {', '.join(job_keywords[:10])}
273
 
274
- **Matching Keywords**: {', '.join(common_keywords) if common_keywords else 'None found'}
275
  """
276
 
277
  return score_text, details, suggestions_text, keywords_text
278
 
279
  except Exception as e:
280
- return f"An error occurred: {str(e)}", "", "", ""
281
 
282
- # Initialize the matcher
283
- matcher = ResumeJobMatcher()
284
 
285
  # Create Gradio interface
286
  def create_interface():
287
- with gr.Blocks(title="Resume Job Matcher", theme=gr.themes.Soft()) as interface:
288
  gr.HTML("""
289
  <div style='text-align: center; padding: 20px;'>
290
- <h1>🎯 AI Resume Job Matcher</h1>
291
- <p>Upload your resume and paste the job description to get a compatibility score and improvement suggestions!</p>
292
  </div>
293
  """)
294
 
@@ -303,32 +442,32 @@ def create_interface():
303
 
304
  gr.HTML("<h3>πŸ“‹ Job Description</h3>")
305
  job_description = gr.Textbox(
306
- label="Paste Job Description",
307
- placeholder="Paste the complete job description here...",
308
- lines=10,
309
- max_lines=15
310
  )
311
 
312
- analyze_btn = gr.Button("πŸ” Analyze Match", variant="primary", size="lg")
313
 
314
  with gr.Column(scale=1):
315
- score_output = gr.Markdown(label="Match Score")
316
  details_output = gr.Markdown(label="Detailed Analysis")
317
- suggestions_output = gr.Markdown(label="Suggestions")
318
  keywords_output = gr.Markdown(label="Keywords Analysis")
319
 
320
- # Set up the event handler
321
  analyze_btn.click(
322
- fn=matcher.process_files,
323
  inputs=[resume_file, job_description],
324
  outputs=[score_output, details_output, suggestions_output, keywords_output]
325
  )
326
 
327
  gr.HTML("""
328
  <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;'>
329
- <p><strong>How it works:</strong> This tool uses advanced AI to analyze semantic similarity between your resume and job description,
330
- performs keyword matching, and provides personalized suggestions for improvement.</p>
331
- <p><em>Supported formats: PDF, DOCX</em></p>
332
  </div>
333
  """)
334
 
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModel
4
  import PyPDF2
5
  import docx
6
  import io
 
10
  import nltk
11
  from collections import Counter
12
  import warnings
13
+ import time
14
  warnings.filterwarnings("ignore")
15
 
16
  # Download required NLTK data
 
35
  from nltk.corpus import stopwords
36
  from nltk.tokenize import word_tokenize, sent_tokenize
37
 
38
+ class ATSResumeAnalyzer:
39
  def __init__(self):
40
+ # Initialize models for different analysis tasks
41
+ self.progress_callback = None
42
+
43
+ # For semantic analysis - using a more powerful model
44
+ self.update_progress("πŸ”„ Loading AI models...", 10)
45
+
46
+ # Use a more sophisticated model for better analysis
47
+ try:
48
+ # BAAI/bge-small-en-v1.5 is excellent for semantic similarity and works on CPU
49
+ from sentence_transformers import SentenceTransformer
50
+ self.semantic_model = SentenceTransformer('BAAI/bge-small-en-v1.5')
51
+ except:
52
+ # Fallback to all-MiniLM if BGE is not available
53
+ from sentence_transformers import SentenceTransformer
54
+ self.semantic_model = SentenceTransformer('all-MiniLM-L6-v2')
55
+
56
+ # Initialize text generation pipeline for suggestions (using a small model)
57
+ try:
58
+ self.suggestion_generator = pipeline(
59
+ "text-generation",
60
+ model="microsoft/DialoGPT-small",
61
+ tokenizer="microsoft/DialoGPT-small",
62
+ device=-1 # CPU
63
+ )
64
+ except:
65
+ self.suggestion_generator = None
66
+
67
  self.stop_words = set(stopwords.words('english'))
68
 
69
+ # ATS Keywords categories
70
+ self.ats_categories = {
71
+ 'technical_skills': ['python', 'javascript', 'java', 'sql', 'aws', 'docker', 'kubernetes', 'react', 'angular', 'node.js', 'machine learning', 'data science', 'tensorflow', 'pytorch', 'git', 'linux', 'windows', 'azure', 'gcp', 'html', 'css', 'mongodb', 'postgresql', 'mysql', 'api', 'rest', 'graphql', 'microservices', 'agile', 'scrum', 'devops', 'ci/cd'],
72
+ 'soft_skills': ['leadership', 'communication', 'teamwork', 'problem solving', 'analytical', 'creative', 'adaptable', 'organized', 'detail oriented', 'time management', 'project management', 'collaboration', 'innovation', 'strategic thinking'],
73
+ 'experience_indicators': ['managed', 'led', 'developed', 'implemented', 'designed', 'created', 'improved', 'optimized', 'achieved', 'delivered', 'coordinated', 'executed', 'supervised', 'mentored', 'trained', 'built', 'established', 'streamlined'],
74
+ 'education_keywords': ['degree', 'bachelor', 'master', 'phd', 'certification', 'course', 'training', 'university', 'college', 'institute', 'graduated'],
75
+ 'industry_specific': [] # Will be populated based on job description
76
+ }
77
+
78
+ self.update_progress("βœ… Models loaded successfully!", 20)
79
+
80
+ def set_progress_callback(self, callback):
81
+ """Set the progress callback function"""
82
+ self.progress_callback = callback
83
+
84
+ def update_progress(self, message, progress):
85
+ """Update progress if callback is set"""
86
+ if self.progress_callback:
87
+ self.progress_callback(message, progress)
88
+ time.sleep(0.1) # Small delay for better UX
89
+
90
  def extract_text_from_pdf(self, pdf_file):
91
  """Extract text from PDF file"""
92
  try:
93
+ if isinstance(pdf_file, str):
94
+ with open(pdf_file, 'rb') as file:
95
+ pdf_reader = PyPDF2.PdfReader(file)
96
+ text = ""
97
+ for page in pdf_reader.pages:
98
+ text += page.extract_text() + "\n"
99
+ else:
100
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
101
+ text = ""
102
+ for page in pdf_reader.pages:
103
+ text += page.extract_text() + "\n"
104
  return text
105
  except Exception as e:
106
  return f"Error reading PDF: {str(e)}"
 
108
  def extract_text_from_docx(self, docx_file):
109
  """Extract text from DOCX file"""
110
  try:
111
+ if isinstance(docx_file, str):
112
+ doc = docx.Document(docx_file)
113
+ else:
114
+ doc = docx.Document(io.BytesIO(docx_file))
115
  text = ""
116
  for paragraph in doc.paragraphs:
117
  text += paragraph.text + "\n"
 
123
  """Clean and preprocess text"""
124
  # Remove extra whitespace and normalize
125
  text = re.sub(r'\s+', ' ', text)
126
+ text = re.sub(r'[^\w\s.,()-]', ' ', text)
127
+ text = text.strip()
128
  return text
129
 
130
+ def extract_ats_keywords(self, text, job_text=""):
131
+ """Extract ATS-relevant keywords with weighting"""
132
+ text_lower = text.lower()
133
+ job_lower = job_text.lower() if job_text else ""
134
 
135
+ # Extract keywords by category
136
+ found_keywords = {}
 
 
 
 
 
 
 
137
 
138
+ for category, keywords in self.ats_categories.items():
139
+ found = []
140
+ for keyword in keywords:
141
+ if keyword in text_lower:
142
+ # Give extra weight if keyword is also in job description
143
+ weight = 2 if keyword in job_lower else 1
144
+ found.append((keyword, weight))
145
+ found_keywords[category] = found
146
 
147
+ # Extract custom keywords from job description
148
+ if job_text:
149
+ job_keywords = self.extract_job_specific_keywords(job_text)
150
+ found_keywords['job_specific'] = [(kw, 3) for kw in job_keywords if kw in text_lower]
151
+
152
+ return found_keywords
153
 
154
+ def extract_job_specific_keywords(self, job_text):
155
+ """Extract important keywords specific to the job posting"""
156
+ # Remove common job posting fluff
157
+ job_text = re.sub(r'(we are looking for|ideal candidate|requirements|qualifications|responsibilities)', '', job_text.lower())
158
+
159
+ words = word_tokenize(job_text.lower())
160
+ words = [word for word in words if word.isalpha() and word not in self.stop_words and len(word) > 3]
161
+
162
+ # Get most frequent words as job-specific keywords
163
+ word_freq = Counter(words)
164
+ job_keywords = [word for word, freq in word_freq.most_common(15) if freq >= 2]
165
+
166
+ return job_keywords
167
 
168
+ def analyze_resume_structure(self, resume_text):
169
+ """Analyze resume structure and format (ATS-friendly check)"""
170
+ structure_score = 100
171
+ issues = []
172
+
173
+ # Check for common sections
174
  sections = {
175
+ 'contact': r'(email|phone|@|linkedin|github)',
176
+ 'experience': r'(experience|work|employment|career)',
177
+ 'education': r'(education|degree|university|college)',
178
+ 'skills': r'(skills|technical|technologies|competencies)'
179
  }
180
 
181
+ found_sections = 0
182
  for section, pattern in sections.items():
183
+ if re.search(pattern, resume_text, re.IGNORECASE):
184
+ found_sections += 1
 
 
 
185
  else:
186
+ issues.append(f"Missing {section} section")
187
 
188
+ section_score = (found_sections / len(sections)) * 100
189
+
190
+ # Check for formatting issues
191
+ if len(resume_text.split('\n')) < 10:
192
+ structure_score -= 20
193
+ issues.append("Resume appears to lack proper formatting/structure")
194
+
195
+ # Check length
196
+ word_count = len(resume_text.split())
197
+ if word_count < 200:
198
+ structure_score -= 30
199
+ issues.append("Resume is too short (less than 200 words)")
200
+ elif word_count > 1000:
201
+ structure_score -= 10
202
+ issues.append("Resume might be too long for ATS systems")
203
+
204
+ return max(0, (structure_score + section_score) / 2), issues
 
 
 
205
 
206
+ def calculate_ats_score(self, resume_keywords, job_keywords, resume_text, job_text):
207
+ """Calculate ATS-style matching score"""
208
+ self.update_progress("πŸ€– Calculating ATS compatibility...", 60)
209
 
210
+ total_score = 0
211
+ max_possible_score = 0
212
+ category_scores = {}
213
 
214
+ # Weight different categories
215
+ category_weights = {
216
+ 'technical_skills': 0.35,
217
+ 'soft_skills': 0.15,
218
+ 'experience_indicators': 0.25,
219
+ 'education_keywords': 0.10,
220
+ 'job_specific': 0.15
221
+ }
222
 
223
+ for category, weight in category_weights.items():
224
+ max_possible_score += weight * 100
225
+
226
+ if category in resume_keywords and category in job_keywords:
227
+ resume_kw = dict(resume_keywords[category])
228
+ job_kw = dict(job_keywords[category]) if isinstance(job_keywords[category][0], tuple) else {kw: 1 for kw in job_keywords[category]}
229
+
230
+ if job_kw: # Only score if there are job keywords in this category
231
+ matched_score = 0
232
+ for kw, weight_val in resume_kw.items():
233
+ if kw in job_kw:
234
+ matched_score += weight_val * job_kw[kw]
235
+
236
+ category_score = min(100, (matched_score / max(1, sum(job_kw.values()))) * 100)
237
+ category_scores[category] = category_score
238
+ total_score += weight * category_score
239
+ else:
240
+ category_scores[category] = 0
241
+ else:
242
+ category_scores[category] = 0
243
 
244
+ # Semantic similarity bonus
245
+ semantic_score = self.get_semantic_similarity(resume_text, job_text)
246
+ total_score += 0.2 * semantic_score # 20% weight for semantic similarity
247
+ max_possible_score += 0.2 * 100
248
 
249
+ final_score = min(100, (total_score / max_possible_score) * 100)
 
250
 
251
+ return final_score, category_scores, semantic_score
252
+
253
+ def get_semantic_similarity(self, resume_text, job_text):
254
+ """Calculate semantic similarity using transformer model"""
255
+ try:
256
+ # Encode texts
257
+ resume_embedding = self.semantic_model.encode(resume_text)
258
+ job_embedding = self.semantic_model.encode(job_text)
259
+
260
+ # Calculate cosine similarity
261
+ similarity = cosine_similarity([resume_embedding], [job_embedding])[0][0]
262
+ return max(0, similarity * 100)
263
+ except Exception as e:
264
+ # Fallback to simple word overlap
265
+ resume_words = set(resume_text.lower().split())
266
+ job_words = set(job_text.lower().split())
267
+ overlap = len(resume_words.intersection(job_words))
268
+ return min(100, (overlap / len(job_words)) * 100) if job_words else 0
269
+
270
+ def generate_ats_suggestions(self, resume_keywords, job_keywords, category_scores, structure_score, structure_issues):
271
+ """Generate ATS-specific improvement suggestions"""
272
+ suggestions = []
273
+
274
+ # Structure suggestions
275
+ if structure_score < 80:
276
+ suggestions.append(f"πŸ“‹ **Resume Structure** (Score: {structure_score:.0f}/100): " +
277
+ f"Improve resume formatting. Issues found: {', '.join(structure_issues)}")
278
+
279
+ # Category-specific suggestions
280
+ for category, score in category_scores.items():
281
+ if score < 60:
282
+ category_name = category.replace('_', ' ').title()
283
+ if category == 'technical_skills':
284
+ suggestions.append(f"πŸ’» **{category_name}** (Score: {score:.0f}/100): Add more relevant technical skills mentioned in the job description. Consider including specific tools, programming languages, or technologies.")
285
+ elif category == 'experience_indicators':
286
+ suggestions.append(f"πŸ“ˆ **{category_name}** (Score: {score:.0f}/100): Use more action verbs like 'managed', 'developed', 'implemented', 'led' to describe your achievements.")
287
+ elif category == 'job_specific':
288
+ suggestions.append(f"🎯 **{category_name}** (Score: {score:.0f}/100): Include more keywords that are specific to this job posting.")
289
+ else:
290
+ suggestions.append(f"πŸ”§ **{category_name}** (Score: {score:.0f}/100): Enhance this section to better match job requirements.")
291
 
292
+ # Overall suggestions based on total score
293
+ overall_score = np.mean(list(category_scores.values()))
294
+ if overall_score < 40:
295
+ suggestions.append("🚨 **Critical**: Your resume needs significant optimization for ATS systems. Consider using more keywords from the job description.")
296
+ elif overall_score < 70:
297
+ suggestions.append("⚠️ **Moderate**: Your resume has good potential but needs keyword optimization to improve ATS compatibility.")
298
+ else:
299
+ suggestions.append("βœ… **Good**: Your resume shows strong ATS compatibility. Minor tweaks could make it even better.")
300
+
301
+ # Add specific actionable suggestions
302
+ suggestions.append("πŸ’‘ **ATS Tips**: Use standard section headings, include keywords naturally in context, quantify achievements with numbers, and save as PDF to preserve formatting.")
303
 
304
  return suggestions
305
 
306
+ def process_resume_analysis(self, resume_file, job_description, progress=gr.Progress()):
307
+ """Main processing function with progress tracking"""
308
  try:
309
+ # Set up progress tracking
310
+ def update_progress_ui(message, prog):
311
+ progress(prog/100, desc=message)
312
+
313
+ self.set_progress_callback(update_progress_ui)
314
+
315
+ # Validation
316
  if resume_file is None:
317
  return "Please upload a resume file.", "", "", ""
318
 
319
  if not job_description.strip():
320
  return "Please provide a job description.", "", "", ""
321
 
322
+ self.update_progress("πŸ“„ Reading resume file...", 30)
323
+
324
+ # Extract text from resume
325
  if hasattr(resume_file, 'name'):
326
  filename = resume_file.name.lower()
 
327
  with open(resume_file.name, 'rb') as f:
328
  file_content = f.read()
329
  else:
 
330
  filename = str(resume_file).lower()
331
  with open(resume_file, 'rb') as f:
332
  file_content = f.read()
333
 
 
334
  if filename.endswith('.pdf'):
335
  resume_text = self.extract_text_from_pdf(file_content)
336
  elif filename.endswith('.docx'):
337
  resume_text = self.extract_text_from_docx(file_content)
338
  else:
339
+ return f"Unsupported file format: {filename}. Please upload PDF or DOCX files.", "", "", ""
340
 
341
  if "Error reading" in resume_text:
342
  return resume_text, "", "", ""
343
 
344
+ self.update_progress("πŸ” Analyzing resume structure...", 40)
345
+
346
  # Preprocess texts
347
  resume_clean = self.preprocess_text(resume_text)
348
  job_clean = self.preprocess_text(job_description)
349
 
350
+ if len(resume_clean.split()) < 50:
351
+ return "Resume text is too short or couldn't be extracted properly. Please ensure your PDF/DOCX contains readable text.", "", "", ""
352
 
353
+ # Structure analysis
354
+ structure_score, structure_issues = self.analyze_resume_structure(resume_clean)
355
 
356
+ self.update_progress("🎯 Extracting ATS keywords...", 50)
 
 
 
357
 
358
+ # Extract ATS keywords
359
+ resume_keywords = self.extract_ats_keywords(resume_clean, job_clean)
360
+ job_keywords = self.extract_ats_keywords(job_clean)
361
 
362
+ # Calculate ATS score
363
+ ats_score, category_scores, semantic_score = self.calculate_ats_score(
364
+ resume_keywords, job_keywords, resume_clean, job_clean
 
 
365
  )
366
 
367
+ self.update_progress("πŸ’‘ Generating improvement suggestions...", 80)
368
 
369
  # Generate suggestions
370
+ suggestions = self.generate_ats_suggestions(
371
+ resume_keywords, job_keywords, category_scores, structure_score, structure_issues
372
  )
373
 
374
+ self.update_progress("βœ… Analysis complete!", 100)
375
+
376
  # Format results
377
+ score_text = f"# 🎯 ATS Compatibility Score: {ats_score:.0f}/100\n\n"
378
 
379
+ if ats_score >= 80:
380
+ score_text += "🟒 **Excellent ATS Compatibility** - Your resume should pass most ATS systems"
381
+ elif ats_score >= 60:
382
+ score_text += "🟑 **Good ATS Compatibility** - Some improvements recommended"
383
+ elif ats_score >= 40:
384
+ score_text += "🟠 **Moderate ATS Compatibility** - Significant improvements needed"
385
+ else:
386
+ score_text += "πŸ”΄ **Poor ATS Compatibility** - Major optimization required"
387
 
388
+ details = f"""## πŸ“Š Detailed ATS Analysis
389
+
390
+ **Overall Structure Score**: {structure_score:.1f}/100
391
+ **Semantic Match**: {semantic_score:.1f}/100
392
 
393
+ ### Category Breakdown:
394
+ - **Technical Skills**: {category_scores.get('technical_skills', 0):.1f}/100
395
+ - **Experience Indicators**: {category_scores.get('experience_indicators', 0):.1f}/100
396
+ - **Job-Specific Keywords**: {category_scores.get('job_specific', 0):.1f}/100
397
+ - **Soft Skills**: {category_scores.get('soft_skills', 0):.1f}/100
398
+ - **Education Keywords**: {category_scores.get('education_keywords', 0):.1f}/100
399
  """
400
 
401
+ suggestions_text = "## πŸ’‘ ATS Optimization Suggestions\n\n" + "\n\n".join(suggestions)
402
+
403
+ # Keywords analysis
404
+ resume_tech_kw = [kw for kw, _ in resume_keywords.get('technical_skills', [])]
405
+ job_specific_kw = [kw for kw, _ in resume_keywords.get('job_specific', [])]
406
 
 
 
407
  keywords_text = f"""## πŸ” Keyword Analysis
408
 
409
+ **Technical Skills Found**: {', '.join(resume_tech_kw[:10]) if resume_tech_kw else 'None detected'}
410
 
411
+ **Job-Specific Keywords Found**: {', '.join(job_specific_kw[:10]) if job_specific_kw else 'None detected'}
412
 
413
+ **ATS Tip**: Ensure keywords appear naturally in context, not just in a skills list.
414
  """
415
 
416
  return score_text, details, suggestions_text, keywords_text
417
 
418
  except Exception as e:
419
+ return f"An error occurred during analysis: {str(e)}", "", "", ""
420
 
421
+ # Initialize the analyzer
422
+ analyzer = ATSResumeAnalyzer()
423
 
424
  # Create Gradio interface
425
  def create_interface():
426
+ with gr.Blocks(title="ATS Resume Analyzer", theme=gr.themes.Soft()) as interface:
427
  gr.HTML("""
428
  <div style='text-align: center; padding: 20px;'>
429
+ <h1>πŸ€– AI-Powered ATS Resume Analyzer</h1>
430
+ <p>Get your resume analyzed like real ATS systems! Upload your resume and job description to receive detailed compatibility scoring and optimization suggestions.</p>
431
  </div>
432
  """)
433
 
 
442
 
443
  gr.HTML("<h3>πŸ“‹ Job Description</h3>")
444
  job_description = gr.Textbox(
445
+ label="Paste Complete Job Description",
446
+ placeholder="Paste the full job description including requirements, qualifications, and responsibilities...",
447
+ lines=12,
448
+ max_lines=20
449
  )
450
 
451
+ analyze_btn = gr.Button("πŸš€ Analyze with ATS", variant="primary", size="lg")
452
 
453
  with gr.Column(scale=1):
454
+ score_output = gr.Markdown(label="ATS Compatibility Score")
455
  details_output = gr.Markdown(label="Detailed Analysis")
456
+ suggestions_output = gr.Markdown(label="Optimization Suggestions")
457
  keywords_output = gr.Markdown(label="Keywords Analysis")
458
 
459
+ # Set up the event handler with progress tracking
460
  analyze_btn.click(
461
+ fn=analyzer.process_resume_analysis,
462
  inputs=[resume_file, job_description],
463
  outputs=[score_output, details_output, suggestions_output, keywords_output]
464
  )
465
 
466
  gr.HTML("""
467
  <div style='text-align: center; padding: 20px; margin-top: 30px; border-top: 1px solid #ddd;'>
468
+ <p><strong>🎯 ATS-Powered Analysis:</strong> This tool simulates real ATS (Applicant Tracking System) behavior using advanced AI models for keyword extraction, semantic analysis, and resume structure evaluation.</p>
469
+ <p><strong>πŸ“ˆ What makes this different:</strong> Unlike simple keyword matching, this analyzer considers context, semantic meaning, industry-specific terms, and proper resume structure - just like enterprise ATS systems.</p>
470
+ <p><em>Supported formats: PDF, DOCX | Optimized for CPU performance</em></p>
471
  </div>
472
  """)
473