Mangesh223 commited on
Commit
dfe84e8
·
verified ·
1 Parent(s): d38260d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -203
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  import PyPDF2
3
  import io
4
- import re
5
  import json
6
  import os
7
  import gc
@@ -12,28 +11,8 @@ from dotenv import load_dotenv
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
15
- # Precompiled regex patterns
16
- YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
17
- ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved|optimized)\s+.*?(?:\s+by\s+)?(\d+%|\$\d+|\d+\s*[a-z]+)', re.I)
18
- TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
19
- SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements|github)\s*:?', re.I | re.M)
20
- DENSITY_PATTERN = re.compile(r'\b(\w+)\b.*\b\1\b', re.I) # Detect repeated keywords
21
- LEADERSHIP_PATTERN = re.compile(r'(mentor|led|managed|team lead|open source|contributor|tech talk)', re.I)
22
-
23
- # Skill equivalence and inference
24
- SKILL_EQUIVALENTS = {
25
- "node.js": {"nodejs"}, "react": {"preact"}, "mongodb": {"dynamodb"},
26
- "javascript": {"js"}, "sql": {"mysql", "postgresql"}
27
- }
28
- SKILL_INFERENCES = {
29
- "mern stack": {"mongodb", "express.js", "react", "node.js"},
30
- "mean stack": {"mongodb", "express.js", "angular", "node.js"}
31
- }
32
- RECENT_TECH = {"next.js", "react 18", "node 20", "python 3.11"}
33
- OUTDATED_TECH = {"jquery", "angularjs", "php 5"}
34
-
35
  def extract_text_from_pdf(pdf_file):
36
- """Extract text from PDF with detailed error handling"""
37
  if pdf_file is None:
38
  raise ValueError("No PDF file uploaded")
39
 
@@ -47,210 +26,74 @@ def extract_text_from_pdf(pdf_file):
47
 
48
  try:
49
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
50
- if len(pdf_reader.pages) == 0:
51
- raise ValueError("PDF has no pages")
52
-
53
  text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
54
  if not text.strip():
55
- raise ValueError("No text extracted from PDF (possibly image-based or empty)")
56
-
57
- return text[:10000]
58
- except PyPDF2.errors.PdfReadError as e:
59
- raise Exception(f"PDF read error: {str(e)}")
60
  except Exception as e:
61
  raise Exception(f"Extraction error: {str(e)}")
62
  finally:
63
  gc.collect()
64
 
65
- def extract_keywords(job_desc, role_type="general"):
66
- """Extract job-specific keywords with role-based weighting"""
67
- if not job_desc:
68
- return set(), set(), set()
69
-
70
- job_lower = job_desc.lower()
71
- skill_pattern = re.compile(r'\b(python|sql|excel|java|react|node\.?js|mongodb|aws|docker|api|ui|ux|devops|[a-z]{2,}\d*)\b', re.I)
72
- keywords = set(skill_pattern.findall(job_lower))
73
- frontend_terms = {"react", "vue", "angular", "ui", "ux", "css", "html", "javascript"}
74
- backend_terms = {"node.js", "python", "sql", "mongodb", "api", "django", "flask", "devops"}
75
-
76
- # Role-specific weighting
77
- critical_keywords = set()
78
- if "frontend" in role_type.lower():
79
- critical_keywords = keywords & frontend_terms
80
- elif "backend" in role_type.lower():
81
- critical_keywords = keywords & backend_terms
82
- else:
83
- critical_keywords = keywords
84
-
85
- return keywords, critical_keywords, set(re.findall(r'\w+', job_lower))
86
-
87
- def calculate_scores(resume_text, job_desc=None, role_type="general"):
88
- """Advanced scoring with semantic matching, seniority, and recency"""
89
- resume_lower = resume_text.lower()
90
- scores = {
91
- "relevance_to_job": 0, "experience_quality": 0, "skills_match": 0,
92
- "education": 0, "achievements": 0, "clarity": 10, "customization": 0,
93
- "seniority": 0, "fresher_potential": 0
94
- }
95
-
96
- job_keywords, critical_keywords, job_words = extract_keywords(job_desc, role_type)
97
- resume_words = set(re.findall(r'\w+', resume_lower))
98
-
99
- # Semantic Skill Matching & Inference
100
- effective_skills = set()
101
- for skill in resume_words:
102
- effective_skills.add(skill)
103
- for base_skill, equivalents in SKILL_EQUIVALENTS.items():
104
- if skill in equivalents:
105
- effective_skills.add(base_skill)
106
- for stack, inferred in SKILL_INFERENCES.items():
107
- if stack in resume_lower:
108
- effective_skills.update(inferred)
109
-
110
- # Skills Match & Transfer
111
- if job_keywords:
112
- matches = job_keywords & effective_skills
113
- critical_matches = critical_keywords & effective_skills
114
- scores["skills_match"] = min(20, len(matches) * 2 + len(critical_matches) * 3)
115
- scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
116
- else:
117
- scores["skills_match"] = min(10, len(effective_skills) * 2)
118
- scores["relevance_to_job"] = min(10, len(effective_skills))
119
-
120
- # Experience: Projects = Work
121
- years = len(YEAR_PATTERN.findall(resume_text))
122
- project_count = len(re.findall(r'(project|github|freelance)', resume_lower, re.I))
123
- scores["experience_quality"] = min(15, years * 2 + project_count * 1)
124
-
125
- # Seniority & Leadership
126
- leadership_signals = len(LEADERSHIP_PATTERN.findall(resume_text))
127
- scores["seniority"] = min(10, years + leadership_signals) if years > 3 else 0
128
-
129
- # Fresher Potential
130
- if years < 2:
131
- learning_signals = len(re.findall(r'(learned|bootcamp|course|upskill)', resume_lower, re.I))
132
- scores["fresher_potential"] = min(10, learning_signals * 2)
133
-
134
- # Education
135
- if 'phd' in resume_lower or 'doctorate' in resume_lower:
136
- scores["education"] = 8
137
- elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
138
- scores["education"] = 6
139
- elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
140
- scores["education"] = 4
141
-
142
- # Achievements (Mandatory for Mid/Senior)
143
- achievements = len(ACHIEVEMENT_PATTERN.findall(resume_text))
144
- scores["achievements"] = min(10, achievements * 3)
145
- if years > 3 and achievements == 0:
146
- scores["achievements"] -= 5 # Penalty for missing metrics
147
-
148
- # Recency Weighting
149
- recent_bonus = sum(2 for tech in RECENT_TECH if tech in resume_lower)
150
- outdated_penalty = sum(-1 for tech in OUTDATED_TECH if tech in resume_lower)
151
- scores["skills_match"] = max(0, scores["skills_match"] + recent_bonus + outdated_penalty)
152
-
153
- # Clarity & ATS Compliance
154
- scores["clarity"] -= min(8, len(TYPO_PATTERN.findall(resume_text)))
155
- if "column" in resume_lower or not resume_text.strip(): # Basic ATS formatting check
156
- scores["clarity"] -= 5
157
-
158
- # Keyword Density & Anti-Gaming
159
- density_count = len(DENSITY_PATTERN.findall(resume_text))
160
- if density_count > 10: # Excessive repetition
161
- scores["customization"] -= 5
162
- elif job_keywords:
163
- scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
164
-
165
- return scores, min(100, sum(scores.values())), job_keywords, critical_keywords
166
-
167
  def analyze_resume(pdf_file, job_desc=None, role_type="general", inference_fn=None):
168
- """Smart ATS analysis with detailed feedback"""
169
  try:
170
  resume_text = extract_text_from_pdf(pdf_file)
171
  except Exception as e:
172
  return f"Extraction failed: {str(e)}", {"error": str(e)}
173
 
174
- scores, total_score, job_keywords, critical_keywords = calculate_scores(resume_text, job_desc, role_type)
175
- resume_words = set(re.findall(r'\w+', resume_text.lower()))
176
-
177
- # Basic analysis
178
- ats_score = scores["relevance_to_job"] + scores["skills_match"] + scores["clarity"]
179
- human_potential = scores["seniority"] + scores["fresher_potential"] + scores["achievements"]
180
- flag = "High human potential but low ATS score" if human_potential > 15 and ats_score < 20 else ""
181
-
182
- basic_analysis = {
183
- "strengths": [
184
- f"Strong {role_type} skills (score: {scores['skills_match']})" if scores["skills_match"] > 10 else "",
185
- f"Clear seniority signals (score: {scores['seniority']})" if scores["seniority"] > 5 else "",
186
- f"High fresher potential (score: {scores['fresher_potential']})" if scores["fresher_potential"] > 5 else ""
187
- ],
188
- "improvements": [
189
- f"Add critical {role_type} keywords (e.g., {list(critical_keywords)[:2]})" if scores["relevance_to_job"] < 10 else "",
190
- "Include measurable achievements (e.g., 'Reduced latency by 30%')" if scores["achievements"] < 5 else "",
191
- "Use recent tech (e.g., Next.js) over outdated (e.g., jQuery)" if any(t in resume_text.lower() for t in OUTDATED_TECH) else ""
192
- ],
193
- "missing_skills": list(critical_keywords - resume_words)[:3] if critical_keywords else ["e.g., Python", "e.g., SQL"],
194
- "flags": [flag] if flag else []
195
- }
196
-
197
- basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
198
- basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
199
-
200
- # Enhanced analysis with inference
201
- if inference_fn:
202
- prompt = f"""[Return valid JSON]: Analyze this resume against job description: {job_desc or "None"} (role: {role_type}).
203
- Resume sample: {resume_text[:200]}, scores: {scores}, job keywords: {list(job_keywords)[:5]}, critical keywords: {list(critical_keywords)[:5]}.
204
- Provide:
205
- - "strengths": 2 specific strengths (e.g., 'Uses Next.js for modern frontend'),
206
- - "improvements": 3 actionable improvements (e.g., 'Add MongoDB to skills'),
207
- - "missing_skills": 3 skills missing from resume but in job desc,
208
- - "flags": 1-2 flags (e.g., 'High potential but low ATS score', 'Possible keyword stuffing').
209
- Account for:
210
- - Semantic skill matches (e.g., Node.js = NodeJS),
211
- - Contextual inference (e.g., MERN → Express.js),
212
- - Seniority (require achievements for >3 years exp),
213
- - Recency (favor Next.js over jQuery),
214
- - Role-specific focus (e.g., frontend: UI, backend: APIs).
215
- Return valid JSON only."""
216
-
217
- try:
218
- result = inference_fn(prompt)
219
- if result and result.strip():
220
- enhanced_analysis = json.loads(result)
221
- return (
222
- resume_text[:5000],
223
- {
224
- "score": {"total": total_score, "breakdown": scores},
225
- "analysis": enhanced_analysis,
226
- "raw_text_sample": resume_text[:200]
227
- }
228
- )
229
- except Exception as e:
230
- print(f"Inference error: {str(e)}")
231
 
232
- return (
233
- resume_text[:5000],
234
- {
235
- "score": {"total": total_score, "breakdown": scores},
236
- "analysis": basic_analysis,
 
 
 
 
 
 
 
 
237
  "raw_text_sample": resume_text[:200]
238
  }
239
- )
240
 
241
  # --- Gradio Interface --- #
242
- with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
243
- with gr.Sidebar():
244
- gr.Markdown("# Smart ATS Resume Analyzer")
245
- gr.Markdown("Upload a PDF resume and optionally provide a job description and role type.")
246
-
247
  with gr.Row():
248
  with gr.Column(scale=1):
249
  pdf_input = gr.File(label="PDF Resume", type="binary")
250
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
251
- role_type_input = gr.Dropdown(label="Role Type", choices=["General", "Frontend", "Backend"], value="General")
252
  submit_btn = gr.Button("Analyze")
253
-
254
  with gr.Column(scale=2):
255
  extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
256
  analysis_output = gr.JSON(label="Analysis Results")
 
1
  import gradio as gr
2
  import PyPDF2
3
  import io
 
4
  import json
5
  import os
6
  import gc
 
11
  load_dotenv()
12
  login(token=os.getenv("HF_TOKEN"))
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_from_pdf(pdf_file):
15
+ """Extract raw text from PDF"""
16
  if pdf_file is None:
17
  raise ValueError("No PDF file uploaded")
18
 
 
26
 
27
  try:
28
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
 
 
 
29
  text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
30
  if not text.strip():
31
+ raise ValueError("No text extracted")
32
+ return text[:10000] # Limit to avoid overwhelming AI
 
 
 
33
  except Exception as e:
34
  raise Exception(f"Extraction error: {str(e)}")
35
  finally:
36
  gc.collect()
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def analyze_resume(pdf_file, job_desc=None, role_type="general", inference_fn=None):
39
+ """Smart ATS relying fully on AI for analysis"""
40
  try:
41
  resume_text = extract_text_from_pdf(pdf_file)
42
  except Exception as e:
43
  return f"Extraction failed: {str(e)}", {"error": str(e)}
44
 
45
+ # Fallback if no inference function (minimal manual analysis)
46
+ if not inference_fn:
47
+ basic_analysis = {
48
+ "score": {"total": 10, "breakdown": {"competency": 10}},
49
+ "analysis": {
50
+ "strengths": ["Resume text extracted"],
51
+ "improvements": ["Provide a job description for detailed analysis" if not job_desc else "Add more details"],
52
+ "missing_skills": [],
53
+ "flags": []
54
+ },
55
+ "raw_text_sample": resume_text[:200]
56
+ }
57
+ return resume_text[:5000], basic_analysis
58
+
59
+ # AI-driven analysis
60
+ prompt = f"""[Return valid JSON]: You are a smart ATS designed to evaluate resumes without rejecting worthy candidates. Analyze this resume: '{resume_text[:2000]}' against job description: '{job_desc or "None"}' (role type: {role_type}).
61
+ Provide:
62
+ - "score": {{total: X (0-100), breakdown: {{competency: X (technical/non-technical skills), experience: X (duration and depth), impact: X (achievements), potential: X (learning ability), leadership: X (influence), adaptability: X (fit to role or general)}}}}
63
+ - "analysis": {{"strengths": [2-3 items, e.g., "Strong React skills"], "improvements": [2-3 items, e.g., "Add teamwork examples"], "missing_skills": [0-3 items, only if job_desc provided], "flags": [0-2 items, e.g., "High potential candidate"]}}
64
+ Rules:
65
+ - Detect skills, experience, achievements, learning signals, and leadership dynamically from the resume text.
66
+ - If no job description, assess general potential across technical and non-technical domains.
67
+ - If job description exists, prioritize role-relevant traits but don’t penalize unrelated strengths.
68
+ - Infer skills (e.g., 'MERN' → 'MongoDB'), normalize variations (e.g., 'React.js' = 'React'), and weigh recent tech (e.g., 'Next.js') over outdated (e.g., 'jQuery').
69
+ - Focus on potential: Highlight capability even if formatting or keywords don’t perfectly match.
70
+ - Avoid rejection: Low scores should still come with positive feedback or flags for human review.
71
+ Return valid JSON only."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ try:
74
+ result = inference_fn(prompt)
75
+ if result and result.strip():
76
+ analysis = json.loads(result)
77
+ analysis["raw_text_sample"] = resume_text[:200]
78
+ return resume_text[:5000], analysis
79
+ else:
80
+ raise ValueError("Empty AI response")
81
+ except Exception as e:
82
+ print(f"AI analysis error: {str(e)}")
83
+ return resume_text[:5000], {
84
+ "score": {"total": 10, "breakdown": {"competency": 10}},
85
+ "analysis": {"strengths": ["Text processed"], "improvements": ["Analysis failed, retry"], "missing_skills": [], "flags": []},
86
  "raw_text_sample": resume_text[:200]
87
  }
 
88
 
89
  # --- Gradio Interface --- #
90
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
 
 
 
 
91
  with gr.Row():
92
  with gr.Column(scale=1):
93
  pdf_input = gr.File(label="PDF Resume", type="binary")
94
  job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
95
+ role_type_input = gr.Dropdown(label="Role Type", choices=["General", "Frontend", "Backend", "Non-Technical"], value="General")
96
  submit_btn = gr.Button("Analyze")
 
97
  with gr.Column(scale=2):
98
  extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
99
  analysis_output = gr.JSON(label="Analysis Results")