Mangesh223 commited on
Commit
9c66cce
·
verified ·
1 Parent(s): c9c405f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -66
app.py CHANGED
@@ -14,9 +14,23 @@ login(token=os.getenv("HF_TOKEN"))
14
 
15
  # Precompiled regex patterns
16
  YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
17
- ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
18
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
19
- SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements)\s*:?', re.I | re.M)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def extract_text_from_pdf(pdf_file):
22
  """Extract text from PDF with detailed error handling"""
@@ -40,7 +54,7 @@ def extract_text_from_pdf(pdf_file):
40
  if not text.strip():
41
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
42
 
43
- return text[:10000] # Limit to first 10,000 characters
44
  except PyPDF2.errors.PdfReadError as e:
45
  raise Exception(f"PDF read error: {str(e)}")
46
  except Exception as e:
@@ -48,53 +62,74 @@ def extract_text_from_pdf(pdf_file):
48
  finally:
49
  gc.collect()
50
 
51
- def extract_keywords(job_desc):
52
- """Extract key skills, tools, and qualifications from job description"""
53
  if not job_desc:
54
- return set()
55
 
56
  job_lower = job_desc.lower()
57
- # Common skills/tools pattern (customize based on your domain)
58
- skill_pattern = re.compile(r'\b(python|sql|excel|java|project management|communication|teamwork|aws|docker|[a-z]{2,}\d*)\b', re.I)
59
  keywords = set(skill_pattern.findall(job_lower))
60
- # Boost priority for repeated terms
61
- for word in set(re.findall(r'\w+', job_lower)):
62
- if job_lower.count(word) > 2 and len(word) > 3: # Frequent, non-trivial words
63
- keywords.add(word)
64
- return keywords
65
-
66
- def calculate_scores(resume_text, job_desc=None):
67
- """Smart scoring tailored to job description"""
 
 
 
 
 
 
 
 
68
  resume_lower = resume_text.lower()
69
  scores = {
70
- "relevance_to_job": 0,
71
- "experience_quality": 0,
72
- "skills_match": 0,
73
- "education": 0,
74
- "achievements": 0,
75
- "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
76
- "customization": 0
77
  }
78
 
79
- job_keywords = extract_keywords(job_desc) if job_desc else set()
80
  resume_words = set(re.findall(r'\w+', resume_lower))
81
 
82
- # Relevance: Exact matches with job keywords
 
 
 
 
 
 
 
 
 
 
 
83
  if job_keywords:
84
- matches = job_keywords & resume_words
 
 
85
  scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
86
- scores["skills_match"] = min(20, sum(2 for word in matches if len(word) > 3) + sum(1 for word in matches))
87
  else:
88
- # Fallback: Infer skills from resume if no job desc
89
- inferred_skills = set(re.findall(r'\b(python|sql|excel|java|management|teamwork|analysis)\b', resume_lower, re.I))
90
- scores["skills_match"] = min(10, len(inferred_skills) * 2)
91
- scores["relevance_to_job"] = min(10, len(inferred_skills))
92
 
93
- # Experience: Years + context
94
  years = len(YEAR_PATTERN.findall(resume_text))
95
- scores["experience_quality"] = min(10, years * 2)
96
- if "experience" in resume_lower:
97
- scores["experience_quality"] += min(5, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
 
 
 
 
 
 
 
 
98
 
99
  # Education
100
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
@@ -103,57 +138,80 @@ def calculate_scores(resume_text, job_desc=None):
103
  scores["education"] = 6
104
  elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
105
  scores["education"] = 4
106
- elif 'high school' in resume_lower:
107
- scores["education"] = 2
108
 
109
- # Achievements
110
- scores["achievements"] = min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 3)
 
 
 
 
 
 
 
 
111
 
112
- # Customization: Check if resume mirrors job desc structure
113
- if job_desc and job_keywords:
 
 
 
 
 
 
 
 
114
  scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
115
 
116
- return scores, min(100, sum(scores.values())), job_keywords
117
 
118
- def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
119
- """Analyze resume with smart, job-specific feedback"""
120
  try:
121
  resume_text = extract_text_from_pdf(pdf_file)
122
  except Exception as e:
123
- return (
124
- f"Extraction failed: {str(e)}",
125
- {"error": str(e)}
126
- )
127
 
128
- scores, total_score, job_keywords = calculate_scores(resume_text, job_desc)
129
  resume_words = set(re.findall(r'\w+', resume_text.lower()))
130
 
131
  # Basic analysis
 
 
 
 
132
  basic_analysis = {
133
  "strengths": [
134
- f"Clear formatting (score: {scores['clarity']})" if scores["clarity"] > 7 else "",
135
- f"Strong experience (score: {scores['experience_quality']})" if scores["experience_quality"] > 5 else ""
 
136
  ],
137
  "improvements": [
138
- "Add specific achievements (e.g., 'Increased sales by 20%')" if scores["achievements"] < 5 else "",
139
- f"Include more job-specific keywords (e.g., {list(job_keywords)[:2]})" if scores["relevance_to_job"] < 10 and job_keywords else "",
140
- "Correct typos for better ATS parsing" if scores["clarity"] < 8 else ""
141
  ],
142
- "missing_skills": list(job_keywords - resume_words)[:3] if job_keywords else ["e.g., Python", "e.g., SQL"]
 
143
  }
144
 
145
- # Filter out empty strings
146
  basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
147
  basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
148
 
149
- # Enhanced analysis with inference (if available)
150
  if inference_fn:
151
- prompt = f"""[Return valid JSON]: Analyze this resume against the job description: {job_desc or "None"}.
152
- Based on scores: {scores}, resume sample: {resume_text[:200]}, and job keywords: {list(job_keywords)[:5]},
153
- provide:
154
- - "strengths": 2 specific strengths (e.g., 'Lists 3+ years of Python experience'),
155
- - "improvements": 3 actionable improvements (e.g., 'Add "AWS" to skills section'),
156
- - "missing_skills": 3 skills missing from resume but in job desc (or inferred if no job desc).
 
 
 
 
 
 
 
157
  Return valid JSON only."""
158
 
159
  try:
@@ -183,13 +241,14 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
183
  # --- Gradio Interface --- #
184
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
185
  with gr.Sidebar():
186
- gr.Markdown("# Resume Analyzer")
187
- gr.Markdown("Upload your resume in PDF format and optionally provide a job description.")
188
 
189
  with gr.Row():
190
  with gr.Column(scale=1):
191
  pdf_input = gr.File(label="PDF Resume", type="binary")
192
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
 
193
  submit_btn = gr.Button("Analyze")
194
 
195
  with gr.Column(scale=2):
@@ -198,7 +257,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
198
 
199
  submit_btn.click(
200
  fn=analyze_resume,
201
- inputs=[pdf_input, job_desc_input],
202
  outputs=[extracted_text, analysis_output]
203
  )
204
 
 
14
 
15
  # Precompiled regex patterns
16
  YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
17
+ ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved|optimized)\s+.*?(?:\s+by\s+)?(\d+%|\$\d+|\d+\s*[a-z]+)', re.I)
18
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
19
+ SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements|github)\s*:?', re.I | re.M)
20
+ DENSITY_PATTERN = re.compile(r'\b(\w+)\b.*\b\1\b', re.I) # Detect repeated keywords
21
+ LEADERSHIP_PATTERN = re.compile(r'(mentor|led|managed|team lead|open source|contributor|tech talk)', re.I)
22
+
23
+ # Skill equivalence and inference
24
+ SKILL_EQUIVALENTS = {
25
+ "node.js": {"nodejs"}, "react": {"preact"}, "mongodb": {"dynamodb"},
26
+ "javascript": {"js"}, "sql": {"mysql", "postgresql"}
27
+ }
28
+ SKILL_INFERENCES = {
29
+ "mern stack": {"mongodb", "express.js", "react", "node.js"},
30
+ "mean stack": {"mongodb", "express.js", "angular", "node.js"}
31
+ }
32
+ RECENT_TECH = {"next.js", "react 18", "node 20", "python 3.11"}
33
+ OUTDATED_TECH = {"jquery", "angularjs", "php 5"}
34
 
35
  def extract_text_from_pdf(pdf_file):
36
  """Extract text from PDF with detailed error handling"""
 
54
  if not text.strip():
55
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
56
 
57
+ return text[:10000]
58
  except PyPDF2.errors.PdfReadError as e:
59
  raise Exception(f"PDF read error: {str(e)}")
60
  except Exception as e:
 
62
  finally:
63
  gc.collect()
64
 
65
+ def extract_keywords(job_desc, role_type="general"):
66
+ """Extract job-specific keywords with role-based weighting"""
67
  if not job_desc:
68
+ return set(), set(), set()
69
 
70
  job_lower = job_desc.lower()
71
+ skill_pattern = re.compile(r'\b(python|sql|excel|java|react|node\.?js|mongodb|aws|docker|api|ui|ux|devops|[a-z]{2,}\d*)\b', re.I)
 
72
  keywords = set(skill_pattern.findall(job_lower))
73
+ frontend_terms = {"react", "vue", "angular", "ui", "ux", "css", "html", "javascript"}
74
+ backend_terms = {"node.js", "python", "sql", "mongodb", "api", "django", "flask", "devops"}
75
+
76
+ # Role-specific weighting
77
+ critical_keywords = set()
78
+ if "frontend" in role_type.lower():
79
+ critical_keywords = keywords & frontend_terms
80
+ elif "backend" in role_type.lower():
81
+ critical_keywords = keywords & backend_terms
82
+ else:
83
+ critical_keywords = keywords
84
+
85
+ return keywords, critical_keywords, set(re.findall(r'\w+', job_lower))
86
+
87
+ def calculate_scores(resume_text, job_desc=None, role_type="general"):
88
+ """Advanced scoring with semantic matching, seniority, and recency"""
89
  resume_lower = resume_text.lower()
90
  scores = {
91
+ "relevance_to_job": 0, "experience_quality": 0, "skills_match": 0,
92
+ "education": 0, "achievements": 0, "clarity": 10, "customization": 0,
93
+ "seniority": 0, "fresher_potential": 0
 
 
 
 
94
  }
95
 
96
+ job_keywords, critical_keywords, job_words = extract_keywords(job_desc, role_type)
97
  resume_words = set(re.findall(r'\w+', resume_lower))
98
 
99
+ # Semantic Skill Matching & Inference
100
+ effective_skills = set()
101
+ for skill in resume_words:
102
+ effective_skills.add(skill)
103
+ for base_skill, equivalents in SKILL_EQUIVALENTS.items():
104
+ if skill in equivalents:
105
+ effective_skills.add(base_skill)
106
+ for stack, inferred in SKILL_INFERENCES.items():
107
+ if stack in resume_lower:
108
+ effective_skills.update(inferred)
109
+
110
+ # Skills Match & Transfer
111
  if job_keywords:
112
+ matches = job_keywords & effective_skills
113
+ critical_matches = critical_keywords & effective_skills
114
+ scores["skills_match"] = min(20, len(matches) * 2 + len(critical_matches) * 3)
115
  scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
 
116
  else:
117
+ scores["skills_match"] = min(10, len(effective_skills) * 2)
118
+ scores["relevance_to_job"] = min(10, len(effective_skills))
 
 
119
 
120
+ # Experience: Projects = Work
121
  years = len(YEAR_PATTERN.findall(resume_text))
122
+ project_count = len(re.findall(r'(project|github|freelance)', resume_lower, re.I))
123
+ scores["experience_quality"] = min(15, years * 2 + project_count * 1)
124
+
125
+ # Seniority & Leadership
126
+ leadership_signals = len(LEADERSHIP_PATTERN.findall(resume_text))
127
+ scores["seniority"] = min(10, years + leadership_signals) if years > 3 else 0
128
+
129
+ # Fresher Potential
130
+ if years < 2:
131
+ learning_signals = len(re.findall(r'(learned|bootcamp|course|upskill)', resume_lower, re.I))
132
+ scores["fresher_potential"] = min(10, learning_signals * 2)
133
 
134
  # Education
135
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
 
138
  scores["education"] = 6
139
  elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
140
  scores["education"] = 4
 
 
141
 
142
+ # Achievements (Mandatory for Mid/Senior)
143
+ achievements = len(ACHIEVEMENT_PATTERN.findall(resume_text))
144
+ scores["achievements"] = min(10, achievements * 3)
145
+ if years > 3 and achievements == 0:
146
+ scores["achievements"] -= 5 # Penalty for missing metrics
147
+
148
+ # Recency Weighting
149
+ recent_bonus = sum(2 for tech in RECENT_TECH if tech in resume_lower)
150
+ outdated_penalty = sum(-1 for tech in OUTDATED_TECH if tech in resume_lower)
151
+ scores["skills_match"] = max(0, scores["skills_match"] + recent_bonus + outdated_penalty)
152
 
153
+ # Clarity & ATS Compliance
154
+ scores["clarity"] -= min(8, len(TYPO_PATTERN.findall(resume_text)))
155
+ if "column" in resume_lower or not resume_text.strip(): # Basic ATS formatting check
156
+ scores["clarity"] -= 5
157
+
158
+ # Keyword Density & Anti-Gaming
159
+ density_count = len(DENSITY_PATTERN.findall(resume_text))
160
+ if density_count > 10: # Excessive repetition
161
+ scores["customization"] -= 5
162
+ elif job_keywords:
163
  scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
164
 
165
+ return scores, min(100, sum(scores.values())), job_keywords, critical_keywords
166
 
167
+ def analyze_resume(pdf_file, job_desc=None, role_type="general", inference_fn=None):
168
+ """Smart ATS analysis with detailed feedback"""
169
  try:
170
  resume_text = extract_text_from_pdf(pdf_file)
171
  except Exception as e:
172
+ return f"Extraction failed: {str(e)}", {"error": str(e)}
 
 
 
173
 
174
+ scores, total_score, job_keywords, critical_keywords = calculate_scores(resume_text, job_desc, role_type)
175
  resume_words = set(re.findall(r'\w+', resume_text.lower()))
176
 
177
  # Basic analysis
178
+ ats_score = scores["relevance_to_job"] + scores["skills_match"] + scores["clarity"]
179
+ human_potential = scores["seniority"] + scores["fresher_potential"] + scores["achievements"]
180
+ flag = "High human potential but low ATS score" if human_potential > 15 and ats_score < 20 else ""
181
+
182
  basic_analysis = {
183
  "strengths": [
184
+ f"Strong {role_type} skills (score: {scores['skills_match']})" if scores["skills_match"] > 10 else "",
185
+ f"Clear seniority signals (score: {scores['seniority']})" if scores["seniority"] > 5 else "",
186
+ f"High fresher potential (score: {scores['fresher_potential']})" if scores["fresher_potential"] > 5 else ""
187
  ],
188
  "improvements": [
189
+ f"Add critical {role_type} keywords (e.g., {list(critical_keywords)[:2]})" if scores["relevance_to_job"] < 10 else "",
190
+ "Include measurable achievements (e.g., 'Reduced latency by 30%')" if scores["achievements"] < 5 else "",
191
+ "Use recent tech (e.g., Next.js) over outdated (e.g., jQuery)" if any(t in resume_text.lower() for t in OUTDATED_TECH) else ""
192
  ],
193
+ "missing_skills": list(critical_keywords - resume_words)[:3] if critical_keywords else ["e.g., Python", "e.g., SQL"],
194
+ "flags": [flag] if flag else []
195
  }
196
 
 
197
  basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
198
  basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
199
 
200
+ # Enhanced analysis with inference
201
  if inference_fn:
202
+ prompt = f"""[Return valid JSON]: Analyze this resume against job description: {job_desc or "None"} (role: {role_type}).
203
+ Resume sample: {resume_text[:200]}, scores: {scores}, job keywords: {list(job_keywords)[:5]}, critical keywords: {list(critical_keywords)[:5]}.
204
+ Provide:
205
+ - "strengths": 2 specific strengths (e.g., 'Uses Next.js for modern frontend'),
206
+ - "improvements": 3 actionable improvements (e.g., 'Add MongoDB to skills'),
207
+ - "missing_skills": 3 skills missing from resume but in job desc,
208
+ - "flags": 1-2 flags (e.g., 'High potential but low ATS score', 'Possible keyword stuffing').
209
+ Account for:
210
+ - Semantic skill matches (e.g., Node.js = NodeJS),
211
+ - Contextual inference (e.g., MERN → Express.js),
212
+ - Seniority (require achievements for >3 years exp),
213
+ - Recency (favor Next.js over jQuery),
214
+ - Role-specific focus (e.g., frontend: UI, backend: APIs).
215
  Return valid JSON only."""
216
 
217
  try:
 
241
  # --- Gradio Interface --- #
242
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
243
  with gr.Sidebar():
244
+ gr.Markdown("# Smart ATS Resume Analyzer")
245
+ gr.Markdown("Upload a PDF resume and optionally provide a job description and role type.")
246
 
247
  with gr.Row():
248
  with gr.Column(scale=1):
249
  pdf_input = gr.File(label="PDF Resume", type="binary")
250
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
251
+ role_type_input = gr.Dropdown(label="Role Type", choices=["General", "Frontend", "Backend"], value="General")
252
  submit_btn = gr.Button("Analyze")
253
 
254
  with gr.Column(scale=2):
 
257
 
258
  submit_btn.click(
259
  fn=analyze_resume,
260
+ inputs=[pdf_input, job_desc_input, role_type_input],
261
  outputs=[extracted_text, analysis_output]
262
  )
263