loki2910 commited on
Commit
fc2dad3
·
verified ·
1 Parent(s): 0a5c82c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -244
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  import re
4
  import tempfile
5
  import traceback
6
- from typing import Tuple, Dict, List
7
 
8
  import fitz # PyMuPDF
9
  import docx # python-docx
@@ -46,27 +46,18 @@ EN_STOPWORDS = {
46
  }
47
 
48
  # --------------------------
49
- # Job Suggestions Database - Updated for better accuracy
50
  # --------------------------
51
  JOB_SUGGESTIONS_DB = {
52
- "Data Scientist": {"python", "sql", "machine", "learning", "tensorflow", "pytorch", "analysis", "pandas", "numpy"},
53
- "Data Analyst": {"sql", "python", "excel", "tableau", "analysis", "statistics", "visualization"},
54
- "Backend Developer": {"python", "java", "sql", "docker", "aws", "api", "git", "rest", "microservices"},
55
- "Frontend Developer": {"react", "javascript", "html", "css", "git", "ui", "ux", "typescript"},
56
- "Full-Stack Developer": {"python", "javascript", "react", "sql", "docker", "git", "nodejs"},
57
- "Machine Learning Engineer": {"python", "tensorflow", "pytorch", "machine", "learning", "docker", "cloud", "aws", "gcp"},
58
- "Project Manager": {"agile", "scrum", "project", "management", "jira", "confluence", "planning"}
59
  }
60
 
61
- # --------------------------
62
- # Enhanced keyword sets for specific job roles
63
- # --------------------------
64
- ML_ENGINEERING_KEYWORDS = {
65
- "technical_skills": {"python", "machine", "learning", "tensorflow", "pytorch", "docker", "aws", "cloud", "sql", "git", "unix", "command", "line"},
66
- "systems": {"ml", "systems", "data", "storage", "database", "api", "integration"},
67
- "methodologies": {"agile", "scrum", "entrepreneurial", "distributed", "team"},
68
- "soft_skills": {"collaboration", "communication", "problem", "solving", "initiative"}
69
- }
70
 
71
  # --------------------------
72
  # Utilities: text extraction
@@ -109,7 +100,7 @@ def extract_text_from_fileobj(file_obj) -> Tuple[str, str]:
109
 
110
 
111
  # --------------------------
112
- # Text preprocessing - Enhanced with better cleaning
113
  # --------------------------
114
  def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
115
  if not text:
@@ -123,85 +114,6 @@ def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
123
  return " ".join(words)
124
 
125
 
126
- # --------------------------
127
- # Enhanced section extraction
128
- # --------------------------
129
- def extract_resume_sections(resume_text: str) -> Dict:
130
- sections = {
131
- "summary": "",
132
- "skills": "",
133
- "experience": "",
134
- "projects": "",
135
- "education": "",
136
- "certifications": ""
137
- }
138
-
139
- lines = resume_text.split('\n')
140
- current_section = None
141
-
142
- for line in lines:
143
- line_lower = line.strip().lower()
144
-
145
- # Identify section headers
146
- if any(keyword in line_lower for keyword in ["summary", "objective"]):
147
- current_section = "summary"
148
- continue
149
- elif any(keyword in line_lower for keyword in ["skills", "technical skills", "programming languages"]):
150
- current_section = "skills"
151
- continue
152
- elif any(keyword in line_lower for keyword in ["experience", "work experience", "employment"]):
153
- current_section = "experience"
154
- continue
155
- elif any(keyword in line_lower for keyword in ["projects", "personal projects", "academic projects"]):
156
- current_section = "projects"
157
- continue
158
- elif any(keyword in line_lower for keyword in ["education", "academic background"]):
159
- current_section = "education"
160
- continue
161
- elif any(keyword in line_lower for keyword in ["certifications", "certification", "licenses"]):
162
- current_section = "certifications"
163
- continue
164
-
165
- # Add line to current section
166
- if current_section and line.strip():
167
- sections[current_section] += line + "\n"
168
-
169
- return sections
170
-
171
-
172
- def extract_job_requirements(job_text: str) -> Dict:
173
- requirements = {
174
- "technical": "",
175
- "experience": "",
176
- "education": "",
177
- "qualifications": ""
178
- }
179
-
180
- lines = job_text.split('\n')
181
- current_section = None
182
-
183
- for line in lines:
184
- line_lower = line.strip().lower()
185
-
186
- if any(keyword in line_lower for keyword in ["requirements", "qualifications", "what we're looking for"]):
187
- current_section = "qualifications"
188
- continue
189
- elif any(keyword in line_lower for keyword in ["technical skills", "skills required", "requirements"]):
190
- current_section = "technical"
191
- continue
192
- elif any(keyword in line_lower for keyword in ["experience", "years of experience"]):
193
- current_section = "experience"
194
- continue
195
- elif any(keyword in line_lower for keyword in ["education", "degree", "qualification"]):
196
- current_section = "education"
197
- continue
198
-
199
- if current_section and line.strip():
200
- requirements[current_section] += line + "\n"
201
-
202
- return requirements
203
-
204
-
205
  # --------------------------
206
  # Embedding helpers
207
  # --------------------------
@@ -226,107 +138,51 @@ def calculate_similarity(resume_text: str, job_text: str, mode: str = "sbert") -
226
 
227
 
228
  # --------------------------
229
- # Enhanced keyword analysis with weighted scoring
230
  # --------------------------
231
- def calculate_technical_match(resume_skills: str, job_requirements: str, weight: float = 0.4) -> float:
232
- if not resume_skills or not job_requirements:
233
- return 0.0
234
-
235
- resume_clean = preprocess_text(resume_skills)
236
- job_clean = preprocess_text(job_requirements)
237
-
238
- resume_words = set(resume_clean.split())
239
- job_words = set(job_clean.split())
240
-
241
- # Use ML_ENGINEERING_KEYWORDS for specific role matching
242
- ml_keywords = ML_ENGINEERING_KEYWORDS["technical_skills"]
243
- matched_keywords = resume_words.intersection(ml_keywords)
244
- total_keywords = len(ml_keywords)
245
-
246
- if total_keywords == 0:
247
- return 0.0
248
-
249
- match_score = (len(matched_keywords) / total_keywords) * 100
250
- return match_score * weight
251
-
252
-
253
- def calculate_experience_match(resume_exp: str, job_exp: str, weight: float = 0.3) -> float:
254
- if not resume_exp or not job_exp:
255
- return 0.0
256
-
257
- sim = calculate_similarity(resume_exp, job_exp)
258
- return sim * weight
259
-
260
-
261
- def calculate_education_match(resume_edu: str, job_edu: str, weight: float = 0.15) -> float:
262
- if not resume_edu or not job_edu:
263
- return 0.0
264
-
265
- sim = calculate_similarity(resume_edu, job_edu)
266
- return sim * weight
267
-
268
-
269
- def calculate_project_match(resume_projects: str, job_projects: str, weight: float = 0.15) -> float:
270
- if not resume_projects or not job_projects:
271
- return 0.0
272
-
273
- sim = calculate_similarity(resume_projects, job_projects)
274
- return sim * weight
275
-
276
-
277
- def analyze_resume_with_context(resume_text: str, job_description: str) -> Dict:
278
- # Extract sections
279
- resume_sections = extract_resume_sections(resume_text)
280
- job_requirements = extract_job_requirements(job_description)
281
-
282
- # Calculate weighted scores
283
- technical_score = calculate_technical_match(
284
- resume_sections["skills"],
285
- job_requirements["technical"]
286
- )
287
-
288
- experience_score = calculate_experience_match(
289
- resume_sections["experience"],
290
- job_requirements["experience"]
291
- )
292
-
293
- education_score = calculate_education_match(
294
- resume_sections["education"],
295
- job_requirements["education"]
296
- )
297
-
298
- project_score = calculate_project_match(
299
- resume_sections["projects"],
300
- job_requirements.get("qualifications", "")
301
- )
302
-
303
- # Calculate overall score
304
- overall_score = technical_score + experience_score + education_score + project_score
305
-
306
- # Generate insights
307
- insights = []
308
- if technical_score < 30:
309
- insights.append("⚠️ Consider adding more technical skills mentioned in the job description")
310
- if experience_score < 20:
311
- insights.append("⚠️ Highlight relevant experience that matches the job requirements")
312
- if project_score < 15:
313
- insights.append("⚠️ Showcase projects that demonstrate required skills")
314
-
315
- if not insights:
316
- insights.append("✅ Your resume shows good alignment with the job requirements")
317
-
318
- return {
319
- "overall_score": overall_score,
320
- "technical_score": technical_score,
321
- "experience_score": experience_score,
322
- "education_score": education_score,
323
- "project_score": project_score,
324
- "insights": "\n".join(insights)
325
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
 
327
 
328
  # --------------------------
329
- # Project Section Analysis - Enhanced
330
  # --------------------------
331
  def extract_projects_section(resume_text: str) -> str:
332
  project_headings = ["projects", "personal projects", "academic projects", "portfolio"]
@@ -337,22 +193,18 @@ def extract_projects_section(resume_text: str) -> str:
337
  lines = resume_text.split('\n')
338
  start_index = -1
339
  end_index = len(lines)
340
-
341
  for i, line in enumerate(lines):
342
  cleaned_line = line.strip().lower()
343
- if any(heading in cleaned_line for heading in project_headings):
344
  start_index = i
345
  break
346
-
347
  if start_index == -1:
348
  return "Could not automatically identify a 'Projects' section in this resume."
349
-
350
  for i in range(start_index + 1, len(lines)):
351
- cleaned_line = lines[i].strip().lower()
352
- if len(cleaned_line.split()) < 4 and any(heading in cleaned_line for heading in end_headings):
353
  end_index = i
354
  break
355
-
356
  project_section_lines = lines[start_index:end_index]
357
  return "\n".join(project_section_lines)
358
 
@@ -423,7 +275,7 @@ def extract_top_keywords(text: str, top_n: int = 15) -> str:
423
 
424
 
425
  # --------------------------
426
- # Main Gradio app logic - Enhanced with context analysis
427
  # --------------------------
428
  def analyze_resume(file, job_description: str, mode: str):
429
  if file is None or not job_description.strip():
@@ -434,55 +286,34 @@ def analyze_resume(file, job_description: str, mode: str):
434
  if resume_text.strip().startswith("[Error"):
435
  raise RuntimeError(resume_text)
436
 
437
- # Enhanced analysis with context
438
- analysis_results = analyze_resume_with_context(resume_text, job_description)
439
- overall_score = analysis_results["overall_score"]
440
-
441
- # Generate verdict based on overall score
442
- if overall_score >= 80:
443
- verdict = f"<h3 style='color:green;'>✅ Excellent Match ({overall_score:.2f}%)</h3>"
444
- elif overall_score >= 60:
445
- verdict = f"<h3 style='color:limegreen;'>👍 Good Match ({overall_score:.2f}%)</h3>"
446
- elif overall_score >= 40:
447
- verdict = f"<h3 style='color:orange;'>⚠️ Fair Match ({overall_score:.2f}%)</h3>"
448
  else:
449
- verdict = f"<h3 style='color:red;'>❌ Low Match ({overall_score:.2f}%)</h3>"
450
-
451
- # Extract sections for display
452
- resume_sections = extract_resume_sections(resume_text)
453
- job_requirements = extract_job_requirements(job_description)
454
-
455
- # Generate suggestions
456
- suggestions = []
457
- if analysis_results["technical_score"] < 30:
458
- suggestions.append("Add more technical skills mentioned in the job description")
459
- if analysis_results["experience_score"] < 20:
460
- suggestions.append("Highlight relevant experience that matches the job requirements")
461
- if analysis_results["project_score"] < 15:
462
- suggestions.append("Showcase projects that demonstrate required skills")
463
-
464
- suggestions_text = "\n".join(f"- {s}" for s in suggestions) if suggestions else "Great job! Your resume shows good alignment with the job requirements."
465
-
466
- # Job suggestions
467
  job_suggestions = suggest_jobs(resume_text)
468
 
469
- # Project analysis
470
  projects_section = extract_projects_section(resume_text)
471
  project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
472
 
473
- # Keyword extraction
474
- resume_keywords_text = extract_top_keywords(preprocess_text(resume_text))
475
- jd_keywords_text = extract_top_keywords(preprocess_text(job_description))
476
 
477
  return (
478
- float(overall_score), verdict,
479
- f"### 📊 Detailed Breakdown\n- Technical Skills: {analysis_results['technical_score']:.2f}%\n- Experience: {analysis_results['experience_score']:.2f}%\n- Education: {analysis_results['education_score']:.2f}%\n- Projects: {analysis_results['project_score']:.2f}%",
480
- suggestions_text,
481
- job_suggestions,
482
- projects_section,
483
- project_fit_verdict,
484
- resume_keywords_text,
485
- jd_keywords_text
486
  )
487
 
488
  except Exception as e:
@@ -561,4 +392,4 @@ def build_ui():
561
  if __name__ == "__main__":
562
  demo = build_ui()
563
  demo.launch()
564
- #demo.launch(server_name="0.0.0.0")
 
3
  import re
4
  import tempfile
5
  import traceback
6
+ from typing import Tuple, Dict
7
 
8
  import fitz # PyMuPDF
9
  import docx # python-docx
 
46
  }
47
 
48
  # --------------------------
49
+ # Job Suggestions Database
50
  # --------------------------
51
  JOB_SUGGESTIONS_DB = {
52
+ "Data Scientist": {"python", "sql", "machine", "learning", "tensorflow", "pytorch", "analysis"},
53
+ "Data Analyst": {"sql", "python", "excel", "tableau", "analysis", "statistics"},
54
+ "Backend Developer": {"python", "java", "sql", "docker", "aws", "api", "git"},
55
+ "Frontend Developer": {"react", "javascript", "html", "css", "git", "ui", "ux"},
56
+ "Full-Stack Developer": {"python", "javascript", "react", "sql", "docker", "git"},
57
+ "Machine Learning Engineer": {"python", "tensorflow", "pytorch", "machine", "learning", "docker", "cloud"},
58
+ "Project Manager": {"agile", "scrum", "project", "management", "jira"}
59
  }
60
 
 
 
 
 
 
 
 
 
 
61
 
62
  # --------------------------
63
  # Utilities: text extraction
 
100
 
101
 
102
  # --------------------------
103
+ # Text preprocessing
104
  # --------------------------
105
  def preprocess_text(text: str, remove_stopwords: bool = True) -> str:
106
  if not text:
 
114
  return " ".join(words)
115
 
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  # --------------------------
118
  # Embedding helpers
119
  # --------------------------
 
138
 
139
 
140
  # --------------------------
141
+ # Keyword analysis
142
  # --------------------------
143
+ DEFAULT_KEYWORDS = {
144
+ "skills": {"python", "nlp", "java", "sql", "tensorflow", "pytorch", "docker", "git", "react", "cloud", "aws",
145
+ "azure"},
146
+ "concepts": {"machine", "learning", "data", "analysis", "nlp", "vision", "agile", "scrum"},
147
+ "roles": {"software", "engineer", "developer", "manager", "scientist", "analyst", "architect"},
148
+ }
149
+
150
+
151
+ def analyze_resume_keywords(resume_text: str, job_description: str):
152
+ clean_resume = preprocess_text(resume_text)
153
+ clean_job = preprocess_text(job_description)
154
+ resume_words = set(clean_resume.split())
155
+ job_words = set(clean_job.split())
156
+ missing = {}
157
+ for cat, kws in DEFAULT_KEYWORDS.items():
158
+ missing_from_cat = [kw for kw in kws if kw in job_words and kw not in resume_words]
159
+ if missing_from_cat:
160
+ missing[cat] = sorted(missing_from_cat)
161
+ low_resume = (resume_text or "").lower()
162
+ sections_present = {
163
+ "skills": "skills" in low_resume,
164
+ "experience": "experience" in low_resume or "employment" in low_resume,
165
+ "summary": "summary" in low_resume or "objective" in low_resume,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  }
167
+ suggestions = []
168
+ if any(missing.values()):
169
+ for cat, kws in missing.items():
170
+ for kw in kws:
171
+ if cat == "skills":
172
+ suggestions.append(f"Add keyword '{kw}' to your Skills section." if sections_present[
173
+ "skills"] else f"Consider creating a Skills section to include '{kw}'.")
174
+ elif cat == "concepts":
175
+ suggestions.append(
176
+ f"Try to demonstrate your knowledge of '{kw}' in your Experience or Projects section.")
177
+ elif cat == "roles":
178
+ suggestions.append(f"Align your Summary/Objective to mention the title '{kw}'.")
179
+ else:
180
+ suggestions.append("Great job! Your resume contains many of the keywords found in the job description.")
181
+ return missing, "\n".join(f"- {s}" for s in suggestions)
182
 
183
 
184
  # --------------------------
185
+ # Project Section Analysis
186
  # --------------------------
187
  def extract_projects_section(resume_text: str) -> str:
188
  project_headings = ["projects", "personal projects", "academic projects", "portfolio"]
 
193
  lines = resume_text.split('\n')
194
  start_index = -1
195
  end_index = len(lines)
 
196
  for i, line in enumerate(lines):
197
  cleaned_line = line.strip().lower()
198
+ if cleaned_line in project_headings:
199
  start_index = i
200
  break
 
201
  if start_index == -1:
202
  return "Could not automatically identify a 'Projects' section in this resume."
 
203
  for i in range(start_index + 1, len(lines)):
204
+ cleaned_line = line.strip().lower()
205
+ if len(cleaned_line.split()) < 4 and cleaned_line in end_headings:
206
  end_index = i
207
  break
 
208
  project_section_lines = lines[start_index:end_index]
209
  return "\n".join(project_section_lines)
210
 
 
275
 
276
 
277
  # --------------------------
278
+ # Main Gradio app logic
279
  # --------------------------
280
  def analyze_resume(file, job_description: str, mode: str):
281
  if file is None or not job_description.strip():
 
286
  if resume_text.strip().startswith("[Error"):
287
  raise RuntimeError(resume_text)
288
 
289
+ cleaned_resume = preprocess_text(resume_text)
290
+ cleaned_job = preprocess_text(job_description)
291
+
292
+ sim_pct = calculate_similarity(cleaned_resume, cleaned_job, mode=mode)
293
+
294
+ if sim_pct >= 80:
295
+ verdict = f"<h3 style='color:green;'>✅ Excellent Match ({sim_pct:.2f}%)</h3>"
296
+ elif sim_pct >= 60:
297
+ verdict = f"<h3 style='color:limegreen;'>👍 Good Match ({sim_pct:.2f}%)</h3>"
298
+ elif sim_pct >= 40:
299
+ verdict = f"<h3 style='color:orange;'>⚠️ Fair Match ({sim_pct:.2f}%)</h3>"
300
  else:
301
+ verdict = f"<h3 style='color:red;'>❌ Low Match ({sim_pct:.2f}%)</h3>"
302
+
303
+ missing_dict, suggestions_text = analyze_resume_keywords(resume_text, job_description)
304
+
305
+ missing_formatted = format_missing_keywords(missing_dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  job_suggestions = suggest_jobs(resume_text)
307
 
 
308
  projects_section = extract_projects_section(resume_text)
309
  project_fit_verdict = analyze_projects_fit(projects_section, job_description, mode)
310
 
311
+ resume_keywords_text = extract_top_keywords(cleaned_resume)
312
+ jd_keywords_text = extract_top_keywords(cleaned_job)
 
313
 
314
  return (
315
+ float(sim_pct), verdict, missing_formatted, suggestions_text,
316
+ job_suggestions, projects_section, project_fit_verdict, resume_keywords_text, jd_keywords_text
 
 
 
 
 
 
317
  )
318
 
319
  except Exception as e:
 
392
  if __name__ == "__main__":
393
  demo = build_ui()
394
  demo.launch()
395
+ #demo.launch(server_name="0.0.0.0")