Mangesh223 commited on
Commit
3455bab
·
verified ·
1 Parent(s): 94d6cfd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -81
app.py CHANGED
@@ -3,7 +3,7 @@ import PyPDF2
3
  import io
4
  import re
5
  import json
6
- import os # Added missing import
7
  import gc
8
  from huggingface_hub import login
9
  from dotenv import load_dotenv
@@ -12,6 +12,17 @@ from dotenv import load_dotenv
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  def extract_text_from_pdf(pdf_file):
16
  """Extract text from PDF with detailed error handling"""
17
  if pdf_file is None:
@@ -43,105 +54,123 @@ def extract_text_from_pdf(pdf_file):
43
  finally:
44
  gc.collect()
45
 
46
- def generate_ai_prompt(resume_text, job_desc=None):
47
- """Generates smart analysis prompt for AI"""
48
- job_desc_section = f"\nCompare against this job description: {job_desc[:2000]}" if job_desc else ""
49
- missing_keywords_section = ',\n "missing_keywords": ["important", "missing", "terms"]' if job_desc else ""
 
 
 
 
 
 
 
 
50
 
51
- return f"""
52
- Analyze this resume comprehensively:
53
- {resume_text[:10000]}
54
- {job_desc_section}
55
- Return JSON with:
56
- {{
57
- "score": 0-100 (overall quality),
58
- "score_breakdown": {{
59
- "skills": 0-25 (variety and relevance),
60
- "experience": 0-20 (duration and roles),
61
- "achievements": 0-20 (quantifiable impact),
62
- "education": 0-15,
63
- "clarity": 0-10 (readability and structure),
64
- "customization": 0-10 (job fit if JD provided)
65
- }},
66
- "detected_skills": ["list", "of", "skills", "with", "variants"],
67
- "strengths": ["list", "of", "2-3", "key", "strengths"],
68
- "improvements": ["3-5", "specific", "actionable", "suggestions"]{missing_keywords_section}
69
- }}
70
- """
71
 
72
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
73
- """Main analysis function with AI integration"""
74
  try:
75
  resume_text = extract_text_from_pdf(pdf_file)
76
  except Exception as e:
77
  return (
78
- f"Error: {str(e)}",
79
- {"error": str(e)}
80
  )
81
-
82
- # Generate AI-powered analysis
83
- prompt = generate_ai_prompt(resume_text, job_desc)
84
 
85
- try:
86
- if inference_fn:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  result = inference_fn(prompt)
88
- analysis = json.loads(result)
89
-
90
- # Ensure score calculation
91
- if "score" not in analysis:
92
- analysis["score"] = min(100, sum(analysis["score_breakdown"].values()))
93
-
94
- return (
95
- resume_text[:5000],
96
- {
97
- "analysis": analysis,
98
- "raw_prompt": prompt[:1000] if len(prompt) > 1000 else prompt
99
- }
100
- )
101
- except Exception as e:
102
- print(f"AI analysis error: {str(e)}")
103
 
104
- # Fallback basic analysis
105
  return (
106
- resume_text[:5000],
107
  {
108
- "error": "AI analysis unavailable",
109
- "raw_text": resume_text[:1000]
 
110
  }
111
  )
112
 
113
- # --- Modern Gradio Interface --- #
114
- with gr.Blocks(theme=gr.themes.Soft(), title="AI Resume Analyzer") as demo:
 
 
 
 
115
  with gr.Row():
116
- with gr.Column():
117
- gr.Markdown("## 🚀 Smart Resume Analysis")
118
- with gr.Tab("Upload"):
119
- pdf_input = gr.File(label="Resume (PDF)", type="binary")
120
- job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=5)
121
- analyze_btn = gr.Button("Analyze", variant="primary")
122
-
123
- with gr.Tab("Example"):
124
- gr.Examples(
125
- examples=["sample_resume.pdf"],
126
- inputs=pdf_input,
127
- label="Try with sample resume"
128
- )
129
 
130
- with gr.Column():
131
- with gr.Tab("Results"):
132
- score_gauge = gr.Gauge(label="Overall Score", minimum=0, maximum=100)
133
- gr.Markdown("### 🔍 Analysis Breakdown")
134
- analysis_output = gr.JSON(label="Details")
135
-
136
- with gr.Tab("Text Preview"):
137
- extracted_text = gr.Textbox(label="Extracted Content", lines=15)
138
-
139
- analyze_btn.click(
140
  fn=analyze_resume,
141
  inputs=[pdf_input, job_desc_input],
142
- outputs=[extracted_text, analysis_output],
143
- api_name="analyze"
144
  )
145
 
146
- if __name__ == "__main__":
147
- demo.launch(server_port=7860, share=True)
 
3
  import io
4
  import re
5
  import json
6
+ import os
7
  import gc
8
  from huggingface_hub import login
9
  from dotenv import load_dotenv
 
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
15
+ # Skills set for faster lookups
16
+ GENERAL_SKILLS = {
17
+ 'communication', 'problem solving', 'project management',
18
+ 'python', 'sql', 'excel', 'teamwork'
19
+ }
20
+
21
+ # Precompiled regex patterns
22
+ YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
23
+ ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
24
+ TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
25
+
26
  def extract_text_from_pdf(pdf_file):
27
  """Extract text from PDF with detailed error handling"""
28
  if pdf_file is None:
 
54
  finally:
55
  gc.collect()
56
 
57
+ def calculate_scores(resume_text, job_desc=None):
58
+ """Optimized scoring function"""
59
+ resume_lower = resume_text.lower()
60
+ scores = {
61
+ "relevance_to_job": 0,
62
+ "experience_quality": 0,
63
+ "skills_match": 0,
64
+ "education": 0,
65
+ "achievements": 0,
66
+ "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
67
+ "customization": 0
68
+ }
69
 
70
+ if job_desc:
71
+ job_words = set(re.findall(r'\w+', job_desc.lower()))
72
+ resume_words = set(re.findall(r'\w+', resume_lower))
73
+ scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words))
74
+ else:
75
+ scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
76
+
77
+ scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
78
+ scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
79
+
80
+ if 'phd' in resume_lower or 'doctorate' in resume_lower:
81
+ scores["education"] = 8
82
+ elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
83
+ scores["education"] = 6
84
+ elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
85
+ scores["education"] = 4
86
+ elif 'high school' in resume_lower:
87
+ scores["education"] = 2
88
+
89
+ return scores, min(100, sum(scores.values()))
90
 
91
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
92
+ """Analyze resume and return extracted text and analysis as separate outputs"""
93
  try:
94
  resume_text = extract_text_from_pdf(pdf_file)
95
  except Exception as e:
96
  return (
97
+ f"Extraction failed: {str(e)}", # First output for textbox
98
+ {"error": str(e)} # Second output for JSON
99
  )
 
 
 
100
 
101
+ scores, total_score = calculate_scores(resume_text, job_desc)
102
+
103
+ # Basic analysis if inference fails
104
+ basic_analysis = {
105
+ "score": {
106
+ "total": total_score,
107
+ "breakdown": scores
108
+ },
109
+ "strengths": [
110
+ "Good clarity score" if scores["clarity"] > 7 else None,
111
+ "Relevant skills" if scores["relevance_to_job"] > 5 else None
112
+ ],
113
+ "improvements": [
114
+ "Add more measurable achievements" if scores["achievements"] < 3 else None,
115
+ "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
116
+ "Check for typos" if scores["clarity"] < 9 else None
117
+ ],
118
+ "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
119
+ }
120
+
121
+ # Try to get enhanced analysis if inference function is available
122
+ if inference_fn:
123
+ prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
124
+ - "strengths": 2 key strengths,
125
+ - "improvements": 3 specific improvements,
126
+ - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
127
+ Output a valid JSON string only, no extra text."""
128
+
129
+ try:
130
  result = inference_fn(prompt)
131
+ if result and result.strip():
132
+ enhanced_analysis = json.loads(result)
133
+ return (
134
+ resume_text[:5000], # First output for textbox (limited to 5000 chars)
135
+ {
136
+ "score": {"total": total_score, "breakdown": scores},
137
+ "analysis": enhanced_analysis,
138
+ "raw_text_sample": resume_text[:200]
139
+ }
140
+ )
141
+ except Exception as e:
142
+ print(f"Inference error: {str(e)}")
143
+ # Fall through to basic analysis
 
 
144
 
 
145
  return (
146
+ resume_text[:5000], # First output for textbox
147
  {
148
+ "score": {"total": total_score, "breakdown": scores},
149
+ "analysis": basic_analysis,
150
+ "raw_text_sample": resume_text[:200]
151
  }
152
  )
153
 
154
+ # --- Gradio Interface --- #
155
+ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
156
+ with gr.Sidebar():
157
+ gr.Markdown("# Resume Analyzer")
158
+ gr.Markdown("Upload your resume in PDF format for analysis")
159
+
160
  with gr.Row():
161
+ with gr.Column(scale=1):
162
+ pdf_input = gr.File(label="PDF Resume", type="binary")
163
+ job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
164
+ submit_btn = gr.Button("Analyze")
 
 
 
 
 
 
 
 
 
165
 
166
+ with gr.Column(scale=2):
167
+ extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
168
+ analysis_output = gr.JSON(label="Analysis Results")
169
+
170
+ submit_btn.click(
 
 
 
 
 
171
  fn=analyze_resume,
172
  inputs=[pdf_input, job_desc_input],
173
+ outputs=[extracted_text, analysis_output]
 
174
  )
175
 
176
+ demo.launch(share=True)