Mangesh223 commited on
Commit
d742c72
·
verified ·
1 Parent(s): dfa143b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -135
app.py CHANGED
@@ -4,7 +4,6 @@ import io
4
  import re
5
  import json
6
  import os
7
- import gc
8
  from huggingface_hub import login
9
  from dotenv import load_dotenv
10
 
@@ -12,165 +11,119 @@ from dotenv import load_dotenv
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
15
- # Skills set for faster lookups
16
- GENERAL_SKILLS = {
17
- 'communication', 'problem solving', 'project management',
18
- 'python', 'sql', 'excel', 'teamwork'
19
- }
20
-
21
- # Precompiled regex patterns
22
- YEAR_PATTERN = re.compile(r'\d{4}\s*[-–]\s*(?:Present|\d{4})')
23
- ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d+%|\$\d+)', re.I)
24
- TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
25
-
26
  def extract_text_from_pdf(pdf_file):
27
- """Extract text from PDF with detailed error handling"""
28
- if pdf_file is None:
29
- raise ValueError("No PDF file uploaded")
30
-
31
- # Handle both file path and bytes input
32
- if isinstance(pdf_file, str):
33
- with open(pdf_file, 'rb') as f:
34
- file_bytes = f.read()
35
- elif isinstance(pdf_file, bytes):
36
- file_bytes = pdf_file
37
- else:
38
- raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
39
-
40
  try:
 
 
 
 
 
41
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
42
- if len(pdf_reader.pages) == 0:
43
- raise ValueError("PDF has no pages")
44
-
45
- text = "\n".join(page.extract_text() for page in pdf_reader.pages)
46
- if text is None or text.strip() == "":
47
- raise ValueError("No text extracted from PDF (possibly image-based or empty)")
48
-
49
- return text[:10000] # Limit to first 10,000 characters
50
- except PyPDF2.errors.PdfReadError as e:
51
- raise Exception(f"PDF read error: {str(e)}")
52
  except Exception as e:
53
- raise Exception(f"Extraction error: {str(e)}")
54
- finally:
55
- gc.collect()
56
-
57
- def calculate_scores(resume_text, job_desc=None):
58
- """Optimized scoring function"""
59
- resume_lower = resume_text.lower()
60
- scores = {
61
- "relevance_to_job": 0,
62
- "experience_quality": 0,
63
- "skills_match": 0,
64
- "education": 0,
65
- "achievements": 0,
66
- "clarity": 10 - min(8, len(TYPO_PATTERN.findall(resume_text))),
67
- "customization": 0
68
- }
69
-
70
- if job_desc:
71
- job_words = set(re.findall(r'\w+', job_desc.lower()))
72
- resume_words = set(re.findall(r'\w+', resume_lower))
73
- scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
74
- else:
75
- scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
76
 
77
- scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
78
- scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
 
 
 
79
 
80
- if 'phd' in resume_lower or 'doctorate' in resume_lower:
81
- scores["education"] = 8
82
- elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
83
- scores["education"] = 6
84
- elif 'bachelor' in resume_lower or ' bs ' in resume_lower or ' ba ' in resume_lower:
85
- scores["education"] = 4
86
- elif 'high school' in resume_lower:
87
- scores["education"] = 2
88
 
89
- return scores, min(100, sum(scores.values()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
92
- """Analyze resume and return extracted text and analysis as separate outputs"""
93
  try:
94
  resume_text = extract_text_from_pdf(pdf_file)
95
  except Exception as e:
96
  return (
97
- f"Extraction failed: {str(e)}", # First output for textbox
98
- {"error": str(e)} # Second output for JSON
99
  )
 
 
 
100
 
101
- scores, total_score = calculate_scores(resume_text, job_desc)
102
-
103
- # Basic analysis if inference fails
104
- basic_analysis = {
105
- "score": {
106
- "total": total_score,
107
- "breakdown": scores
108
- },
109
- "strengths": [
110
- "Good clarity score" if scores["clarity"] > 7 else None,
111
- "Relevant skills" if scores["relevance_to_job"] > 5 else None
112
- ],
113
- "improvements": [
114
- "Add more measurable achievements" if scores["achievements"] < 3 else None,
115
- "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
116
- "Check for typos" if scores["clarity"] < 9 else None
117
- ],
118
- "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
119
- }
120
-
121
- # Try to get enhanced analysis if inference function is available
122
- if inference_fn:
123
- prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
124
- - "strengths": 2 key strengths,
125
- - "improvements": 3 specific improvements,
126
- - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
127
- Output a valid JSON string only, no extra text."""
128
-
129
- try:
130
  result = inference_fn(prompt)
131
- if result and result.strip():
132
- enhanced_analysis = json.loads(result)
133
- return (
134
- resume_text[:5000], # First output for textbox (limited to 5000 chars)
135
- {
136
- "score": {"total": total_score, "breakdown": scores},
137
- "analysis": enhanced_analysis,
138
- "raw_text_sample": resume_text[:200]
139
- }
140
- )
141
- except Exception as e:
142
- print(f"Inference error: {str(e)}")
143
- # Fall through to basic analysis
 
 
144
 
 
145
  return (
146
- resume_text[:5000], # First output for textbox
147
  {
148
- "score": {"total": total_score, "breakdown": scores},
149
- "analysis": basic_analysis,
150
- "raw_text_sample": resume_text[:200]
151
  }
152
  )
153
 
154
- # --- Gradio Interface --- #
155
- with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
156
- with gr.Sidebar():
157
- gr.Markdown("# Resume Analyzer")
158
- gr.Markdown("Upload your resume in PDF format for analysis")
159
-
160
  with gr.Row():
161
- with gr.Column(scale=1):
162
- pdf_input = gr.File(label="PDF Resume", type="binary")
163
- job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
164
- submit_btn = gr.Button("Analyze")
 
 
 
 
 
 
 
 
 
165
 
166
- with gr.Column(scale=2):
167
- extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
168
- analysis_output = gr.JSON(label="Analysis Results")
169
-
170
- submit_btn.click(
 
 
 
 
 
171
  fn=analyze_resume,
172
  inputs=[pdf_input, job_desc_input],
173
- outputs=[extracted_text, analysis_output]
 
174
  )
175
 
176
- demo.launch(share=True)
 
 
4
  import re
5
  import json
6
  import os
 
7
  from huggingface_hub import login
8
  from dotenv import load_dotenv
9
 
 
11
  load_dotenv()
12
  login(token=os.getenv("HF_TOKEN"))
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  def extract_text_from_pdf(pdf_file):
15
+ """Improved PDF text extraction with error handling"""
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
+ if isinstance(pdf_file, bytes):
18
+ file_bytes = pdf_file
19
+ else:
20
+ raise ValueError("Invalid file format")
21
+
22
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
23
+ text = "\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
24
+ return text[:15000] # Increased character limit
 
 
 
 
 
 
 
 
25
  except Exception as e:
26
+ raise Exception(f"PDF processing error: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def generate_ai_prompt(resume_text, job_desc=None):
29
+ """Generates smart analysis prompt for AI"""
30
+ return f"""
31
+ Analyze this resume comprehensively:
32
+ {resume_text[:10000]}
33
 
34
+ {f"Compare against this job description: {job_desc[:2000]}" if job_desc else ""}
 
 
 
 
 
 
 
35
 
36
+ Return JSON with:
37
+ {{
38
+ "score": 0-100 (overall quality),
39
+ "score_breakdown": {{
40
+ "skills": 0-25 (variety and relevance),
41
+ "experience": 0-20 (duration and roles),
42
+ "achievements": 0-20 (quantifiable impact),
43
+ "education": 0-15,
44
+ "clarity": 0-10 (readability and structure),
45
+ "customization": 0-10 (job fit if JD provided)
46
+ }},
47
+ "detected_skills": ["list", "of", "skills", "with", "variants"],
48
+ "strengths": ["list", "of", "2-3", "key", "strengths"],
49
+ "improvements": ["3-5", "specific", "actionable", "suggestions"],
50
+ "missing_keywords": ["important", "missing", "terms"] {if job_desc else ""}
51
+ }}
52
+ """
53
 
54
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
55
+ """Main analysis function with AI integration"""
56
  try:
57
  resume_text = extract_text_from_pdf(pdf_file)
58
  except Exception as e:
59
  return (
60
+ f"Error: {str(e)}",
61
+ {"error": str(e)}
62
  )
63
+
64
+ # Generate AI-powered analysis
65
+ prompt = generate_ai_prompt(resume_text, job_desc)
66
 
67
+ try:
68
+ if inference_fn:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  result = inference_fn(prompt)
70
+ analysis = json.loads(result)
71
+
72
+ # Ensure score calculation
73
+ if "score" not in analysis:
74
+ analysis["score"] = min(100, sum(analysis["score_breakdown"].values()))
75
+
76
+ return (
77
+ resume_text[:5000],
78
+ {
79
+ "analysis": analysis,
80
+ "raw_prompt": prompt[:1000] if len(prompt) > 1000 else prompt
81
+ }
82
+ )
83
+ except Exception as e:
84
+ print(f"AI analysis error: {str(e)}")
85
 
86
+ # Fallback basic analysis
87
  return (
88
+ resume_text[:5000],
89
  {
90
+ "error": "AI analysis unavailable",
91
+ "raw_text": resume_text[:1000]
 
92
  }
93
  )
94
 
95
+ # --- Modern Gradio Interface --- #
96
+ with gr.Blocks(theme=gr.themes.Soft(), title="AI Resume Analyzer") as demo:
 
 
 
 
97
  with gr.Row():
98
+ with gr.Column():
99
+ gr.Markdown("## 🚀 Smart Resume Analysis")
100
+ with gr.Tab("Upload"):
101
+ pdf_input = gr.File(label="Resume (PDF)", type="binary")
102
+ job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=5)
103
+ analyze_btn = gr.Button("Analyze", variant="primary")
104
+
105
+ with gr.Tab("Example"):
106
+ gr.Examples(
107
+ examples=["sample_resume.pdf"],
108
+ inputs=pdf_input,
109
+ label="Try with sample resume"
110
+ )
111
 
112
+ with gr.Column():
113
+ with gr.Tab("Results"):
114
+ score_gauge = gr.Gauge(label="Overall Score", minimum=0, maximum=100)
115
+ gr.Markdown("### 🔍 Analysis Breakdown")
116
+ analysis_output = gr.JSON(label="Details")
117
+
118
+ with gr.Tab("Text Preview"):
119
+ extracted_text = gr.Textbox(label="Extracted Content", lines=15)
120
+
121
+ analyze_btn.click(
122
  fn=analyze_resume,
123
  inputs=[pdf_input, job_desc_input],
124
+ outputs=[extracted_text, analysis_output],
125
+ api_name="analyze"
126
  )
127
 
128
+ if __name__ == "__main__":
129
+ demo.launch(server_port=7860, share=True)