Mangesh223 commited on
Commit
c9c405f
·
verified ·
1 Parent(s): 9b62178

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -44
app.py CHANGED
@@ -12,23 +12,17 @@ from dotenv import load_dotenv
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
15
- # Skills set for faster lookups
16
- GENERAL_SKILLS = {
17
- 'communication', 'problem solving', 'project management',
18
- 'python', 'sql', 'excel', 'teamwork'
19
- }
20
-
21
  # Precompiled regex patterns
22
  YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
23
  ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
24
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
 
25
 
26
  def extract_text_from_pdf(pdf_file):
27
  """Extract text from PDF with detailed error handling"""
28
  if pdf_file is None:
29
  raise ValueError("No PDF file uploaded")
30
 
31
- # Handle both file path and bytes input
32
  if isinstance(pdf_file, str):
33
  with open(pdf_file, 'rb') as f:
34
  file_bytes = f.read()
@@ -42,8 +36,8 @@ def extract_text_from_pdf(pdf_file):
42
  if len(pdf_reader.pages) == 0:
43
  raise ValueError("PDF has no pages")
44
 
45
- text = "\n".join(page.extract_text() for page in pdf_reader.pages)
46
- if text is None or text.strip() == "":
47
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
48
 
49
  return text[:10000] # Limit to first 10,000 characters
@@ -54,8 +48,23 @@ def extract_text_from_pdf(pdf_file):
54
  finally:
55
  gc.collect()
56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  def calculate_scores(resume_text, job_desc=None):
58
- """Optimized scoring function"""
59
  resume_lower = resume_text.lower()
60
  scores = {
61
  "relevance_to_job": 0,
@@ -67,71 +76,92 @@ def calculate_scores(resume_text, job_desc=None):
67
  "customization": 0
68
  }
69
 
70
- if job_desc:
71
- job_words = set(re.findall(r'\w+', job_desc.lower()))
72
- resume_words = set(re.findall(r'\w+', resume_lower))
73
- scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
 
 
 
 
74
  else:
75
- scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
 
 
 
76
 
77
- scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
78
- scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
 
 
 
79
 
 
80
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
81
  scores["education"] = 8
82
  elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
83
  scores["education"] = 6
84
- elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
85
  scores["education"] = 4
86
  elif 'high school' in resume_lower:
87
  scores["education"] = 2
88
 
89
- return scores, min(100, sum(scores.values()))
 
 
 
 
 
 
 
90
 
91
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
92
- """Analyze resume and return extracted text and analysis as separate outputs"""
93
  try:
94
  resume_text = extract_text_from_pdf(pdf_file)
95
  except Exception as e:
96
  return (
97
- f"Extraction failed: {str(e)}", # First output for textbox
98
- {"error": str(e)} # Second output for JSON
99
  )
100
 
101
- scores, total_score = calculate_scores(resume_text, job_desc)
 
102
 
103
- # Basic analysis if inference fails
104
  basic_analysis = {
105
- "score": {
106
- "total": total_score,
107
- "breakdown": scores
108
- },
109
  "strengths": [
110
- "Good clarity score" if scores["clarity"] > 7 else None,
111
- "Relevant skills" if scores["relevance_to_job"] > 5 else None
112
  ],
113
  "improvements": [
114
- "Add more measurable achievements" if scores["achievements"] < 3 else None,
115
- "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
116
- "Check for typos" if scores["clarity"] < 9 else None
117
  ],
118
- "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
119
  }
120
 
121
- # Try to get enhanced analysis if inference function is available
 
 
 
 
122
  if inference_fn:
123
- prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
124
- - "strengths": 2 key strengths,
125
- - "improvements": 3 specific improvements,
126
- - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
127
- Output a valid JSON string only, no extra text."""
 
 
128
 
129
  try:
130
  result = inference_fn(prompt)
131
  if result and result.strip():
132
  enhanced_analysis = json.loads(result)
133
  return (
134
- resume_text[:5000], # First output for textbox (limited to 5000 chars)
135
  {
136
  "score": {"total": total_score, "breakdown": scores},
137
  "analysis": enhanced_analysis,
@@ -140,10 +170,9 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
140
  )
141
  except Exception as e:
142
  print(f"Inference error: {str(e)}")
143
- # Fall through to basic analysis
144
 
145
  return (
146
- resume_text[:5000], # First output for textbox
147
  {
148
  "score": {"total": total_score, "breakdown": scores},
149
  "analysis": basic_analysis,
@@ -155,7 +184,7 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
155
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
156
  with gr.Sidebar():
157
  gr.Markdown("# Resume Analyzer")
158
- gr.Markdown("Upload your resume in PDF format for analysis")
159
 
160
  with gr.Row():
161
  with gr.Column(scale=1):
 
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
 
 
 
 
 
 
15
  # Precompiled regex patterns
16
  YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
17
  ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
18
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
19
+ SECTION_PATTERN = re.compile(r'^(experience|skills|education|projects|achievements)\s*:?', re.I | re.M)
20
 
21
  def extract_text_from_pdf(pdf_file):
22
  """Extract text from PDF with detailed error handling"""
23
  if pdf_file is None:
24
  raise ValueError("No PDF file uploaded")
25
 
 
26
  if isinstance(pdf_file, str):
27
  with open(pdf_file, 'rb') as f:
28
  file_bytes = f.read()
 
36
  if len(pdf_reader.pages) == 0:
37
  raise ValueError("PDF has no pages")
38
 
39
+ text = "\n".join(page.extract_text() or "" for page in pdf_reader.pages)
40
+ if not text.strip():
41
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
42
 
43
  return text[:10000] # Limit to first 10,000 characters
 
48
  finally:
49
  gc.collect()
50
 
51
+ def extract_keywords(job_desc):
52
+ """Extract key skills, tools, and qualifications from job description"""
53
+ if not job_desc:
54
+ return set()
55
+
56
+ job_lower = job_desc.lower()
57
+ # Common skills/tools pattern (customize based on your domain)
58
+ skill_pattern = re.compile(r'\b(python|sql|excel|java|project management|communication|teamwork|aws|docker|[a-z]{2,}\d*)\b', re.I)
59
+ keywords = set(skill_pattern.findall(job_lower))
60
+ # Boost priority for repeated terms
61
+ for word in set(re.findall(r'\w+', job_lower)):
62
+ if job_lower.count(word) > 2 and len(word) > 3: # Frequent, non-trivial words
63
+ keywords.add(word)
64
+ return keywords
65
+
66
  def calculate_scores(resume_text, job_desc=None):
67
+ """Smart scoring tailored to job description"""
68
  resume_lower = resume_text.lower()
69
  scores = {
70
  "relevance_to_job": 0,
 
76
  "customization": 0
77
  }
78
 
79
+ job_keywords = extract_keywords(job_desc) if job_desc else set()
80
+ resume_words = set(re.findall(r'\w+', resume_lower))
81
+
82
+ # Relevance: Exact matches with job keywords
83
+ if job_keywords:
84
+ matches = job_keywords & resume_words
85
+ scores["relevance_to_job"] = min(20, int(20 * len(matches) / max(1, len(job_keywords))))
86
+ scores["skills_match"] = min(20, sum(2 for word in matches if len(word) > 3) + sum(1 for word in matches))
87
  else:
88
+ # Fallback: Infer skills from resume if no job desc
89
+ inferred_skills = set(re.findall(r'\b(python|sql|excel|java|management|teamwork|analysis)\b', resume_lower, re.I))
90
+ scores["skills_match"] = min(10, len(inferred_skills) * 2)
91
+ scores["relevance_to_job"] = min(10, len(inferred_skills))
92
 
93
+ # Experience: Years + context
94
+ years = len(YEAR_PATTERN.findall(resume_text))
95
+ scores["experience_quality"] = min(10, years * 2)
96
+ if "experience" in resume_lower:
97
+ scores["experience_quality"] += min(5, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
98
 
99
+ # Education
100
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
101
  scores["education"] = 8
102
  elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
103
  scores["education"] = 6
104
+ elif 'bachelor' in resume_lower or 'bs' in resume_lower or 'ba' in resume_lower:
105
  scores["education"] = 4
106
  elif 'high school' in resume_lower:
107
  scores["education"] = 2
108
 
109
+ # Achievements
110
+ scores["achievements"] = min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 3)
111
+
112
+ # Customization: Check if resume mirrors job desc structure
113
+ if job_desc and job_keywords:
114
+ scores["customization"] = min(10, int(10 * len(job_keywords & resume_words) / max(1, len(job_keywords))))
115
+
116
+ return scores, min(100, sum(scores.values())), job_keywords
117
 
118
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
119
+ """Analyze resume with smart, job-specific feedback"""
120
  try:
121
  resume_text = extract_text_from_pdf(pdf_file)
122
  except Exception as e:
123
  return (
124
+ f"Extraction failed: {str(e)}",
125
+ {"error": str(e)}
126
  )
127
 
128
+ scores, total_score, job_keywords = calculate_scores(resume_text, job_desc)
129
+ resume_words = set(re.findall(r'\w+', resume_text.lower()))
130
 
131
+ # Basic analysis
132
  basic_analysis = {
 
 
 
 
133
  "strengths": [
134
+ f"Clear formatting (score: {scores['clarity']})" if scores["clarity"] > 7 else "",
135
+ f"Strong experience (score: {scores['experience_quality']})" if scores["experience_quality"] > 5 else ""
136
  ],
137
  "improvements": [
138
+ "Add specific achievements (e.g., 'Increased sales by 20%')" if scores["achievements"] < 5 else "",
139
+ f"Include more job-specific keywords (e.g., {list(job_keywords)[:2]})" if scores["relevance_to_job"] < 10 and job_keywords else "",
140
+ "Correct typos for better ATS parsing" if scores["clarity"] < 8 else ""
141
  ],
142
+ "missing_skills": list(job_keywords - resume_words)[:3] if job_keywords else ["e.g., Python", "e.g., SQL"]
143
  }
144
 
145
+ # Filter out empty strings
146
+ basic_analysis["strengths"] = [s for s in basic_analysis["strengths"] if s]
147
+ basic_analysis["improvements"] = [s for s in basic_analysis["improvements"] if s]
148
+
149
+ # Enhanced analysis with inference (if available)
150
  if inference_fn:
151
+ prompt = f"""[Return valid JSON]: Analyze this resume against the job description: {job_desc or "None"}.
152
+ Based on scores: {scores}, resume sample: {resume_text[:200]}, and job keywords: {list(job_keywords)[:5]},
153
+ provide:
154
+ - "strengths": 2 specific strengths (e.g., 'Lists 3+ years of Python experience'),
155
+ - "improvements": 3 actionable improvements (e.g., 'Add "AWS" to skills section'),
156
+ - "missing_skills": 3 skills missing from resume but in job desc (or inferred if no job desc).
157
+ Return valid JSON only."""
158
 
159
  try:
160
  result = inference_fn(prompt)
161
  if result and result.strip():
162
  enhanced_analysis = json.loads(result)
163
  return (
164
+ resume_text[:5000],
165
  {
166
  "score": {"total": total_score, "breakdown": scores},
167
  "analysis": enhanced_analysis,
 
170
  )
171
  except Exception as e:
172
  print(f"Inference error: {str(e)}")
 
173
 
174
  return (
175
+ resume_text[:5000],
176
  {
177
  "score": {"total": total_score, "breakdown": scores},
178
  "analysis": basic_analysis,
 
184
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
185
  with gr.Sidebar():
186
  gr.Markdown("# Resume Analyzer")
187
+ gr.Markdown("Upload your resume in PDF format and optionally provide a job description.")
188
 
189
  with gr.Row():
190
  with gr.Column(scale=1):