Mangesh223 commited on
Commit
98df801
·
verified ·
1 Parent(s): 54e0095

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -74
app.py CHANGED
@@ -28,19 +28,25 @@ def extract_text_from_pdf(pdf_file):
28
  if pdf_file is None:
29
  raise ValueError("No PDF file uploaded")
30
 
31
- if not isinstance(pdf_file, bytes):
32
- raise TypeError(f"Expected bytes, got {type(pdf_file)}")
 
 
 
 
 
 
33
 
34
  try:
35
- pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
36
  if len(pdf_reader.pages) == 0:
37
  raise ValueError("PDF has no pages")
38
 
39
- text = pdf_reader.pages[0].extract_text()
40
  if text is None or text.strip() == "":
41
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
42
 
43
- return text[:10000]
44
  except PyPDF2.errors.PdfReadError as e:
45
  raise Exception(f"PDF read error: {str(e)}")
46
  except Exception as e:
@@ -64,7 +70,7 @@ def calculate_scores(resume_text, job_desc=None):
64
  if job_desc:
65
  job_words = set(re.findall(r'\w+', job_desc.lower()))
66
  resume_words = set(re.findall(r'\w+', resume_lower))
67
- scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words)))
68
  else:
69
  scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
70
 
@@ -83,92 +89,88 @@ def calculate_scores(resume_text, job_desc=None):
83
  return scores, min(100, sum(scores.values()))
84
 
85
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
86
- """Analyze resume and return extracted text and analysis"""
87
  try:
88
- print(f"Received pdf_file type: {type(pdf_file)}") # Debug: Log input type
89
  resume_text = extract_text_from_pdf(pdf_file)
90
- print(f"Extracted text: {resume_text[:100]}") # Debug: Log first 100 chars
91
  except Exception as e:
92
- return {
93
- "extracted_text": f"Extraction failed: {str(e)}",
94
- "analysis": {"error": f"Text extraction error: {str(e)}", "raw_prompt": "Not generated", "raw_result": "Not applicable"}
95
- }
96
 
97
  scores, total_score = calculate_scores(resume_text, job_desc)
98
 
99
- prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
100
- - "strengths": 2 key strengths (e.g., "High experience quality" if score is high),
101
- - "improvements": 3 specific improvements,
102
- - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
103
- Output a valid JSON string only, no extra text."""
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- try:
106
- if inference_fn is None:
107
- return {
108
- "extracted_text": resume_text,
109
- "analysis": {"error": "Inference function not provided", "raw_prompt": prompt, "raw_result": "Not available"}
110
- }
 
111
 
112
- print(f"Prompt sent to Together AI: {prompt}") # Debug: Log prompt
113
- result = inference_fn(prompt)
114
- print(f"Raw result from Together AI: {result}") # Debug: Log response
115
-
116
- if not result or result.strip() == "":
117
- return {
118
- "extracted_text": resume_text,
119
- "analysis": {"error": "Empty response from Together AI", "raw_prompt": prompt, "raw_result": result}
120
- }
121
-
122
- parsed_result = json.loads(result)
123
- return {
124
- "extracted_text": resume_text,
125
- "analysis": {
126
- "score": {"total": total_score, "breakdown": scores},
127
- "analysis": parsed_result,
128
- "raw_text": resume_text[:500],
129
- "raw_prompt": prompt,
130
- "raw_result": result
131
- }
132
- }
133
- except json.JSONDecodeError as e:
134
- return {
135
- "extracted_text": resume_text,
136
- "analysis": {"error": f"Failed to parse JSON: {str(e)}", "raw_prompt": prompt, "raw_result": result}
137
- }
138
- except Exception as e:
139
- return {
140
- "extracted_text": resume_text,
141
- "analysis": {"error": f"Unexpected inference error: {str(e)}", "raw_prompt": prompt, "raw_result": result if 'result' in locals() else "Not available"}
142
  }
 
143
 
144
  # --- Gradio Interface --- #
145
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
146
  with gr.Sidebar():
147
- gr.Markdown("# Resume Analyzer with Mistral-7B")
148
- gr.Markdown("Powered by mistralai/Mistral-7B-Instruct-v0.3 via Together AI API. Sign in to use.")
149
- button = gr.LoginButton("Sign in")
150
-
151
- inference = gr.load(
152
- "models/mistralai/Mistral-7B-Instruct-v0.3",
153
- accept_token=button,
154
- provider="together"
155
- )
156
 
157
  with gr.Row():
158
  with gr.Column(scale=1):
159
- inputs = [
160
- gr.File(label="PDF Resume", type="binary"),
161
- gr.Textbox(label="Job Description (Optional)", lines=3)
162
- ]
163
  with gr.Column(scale=2):
164
- extracted_text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
165
- analysis_output = gr.JSON(label="Analysis")
166
 
167
- inputs[0].upload(
168
- fn=lambda pdf, job_desc: analyze_resume(pdf, job_desc, inference),
169
- inputs=inputs,
170
- outputs=[extracted_text_output, analysis_output],
171
- queue=True
172
  )
173
 
174
  demo.launch(share=True)
 
28
  if pdf_file is None:
29
  raise ValueError("No PDF file uploaded")
30
 
31
+ # Handle both file path and bytes input
32
+ if isinstance(pdf_file, str):
33
+ with open(pdf_file, 'rb') as f:
34
+ file_bytes = f.read()
35
+ elif isinstance(pdf_file, bytes):
36
+ file_bytes = pdf_file
37
+ else:
38
+ raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
39
 
40
  try:
41
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
42
  if len(pdf_reader.pages) == 0:
43
  raise ValueError("PDF has no pages")
44
 
45
+ text = "\n".join(page.extract_text() for page in pdf_reader.pages)
46
  if text is None or text.strip() == "":
47
  raise ValueError("No text extracted from PDF (possibly image-based or empty)")
48
 
49
+ return text[:10000] # Limit to first 10,000 characters
50
  except PyPDF2.errors.PdfReadError as e:
51
  raise Exception(f"PDF read error: {str(e)}")
52
  except Exception as e:
 
70
  if job_desc:
71
  job_words = set(re.findall(r'\w+', job_desc.lower()))
72
  resume_words = set(re.findall(r'\w+', resume_lower))
73
+ scores["relevance_to_job"] = min(20, int(20 * len(job_words & resume_words) / len(job_words))
74
  else:
75
  scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
76
 
 
89
  return scores, min(100, sum(scores.values()))
90
 
91
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
92
+ """Analyze resume and return extracted text and analysis as separate outputs"""
93
  try:
 
94
  resume_text = extract_text_from_pdf(pdf_file)
 
95
  except Exception as e:
96
+ return (
97
+ f"Extraction failed: {str(e)}", # First output for textbox
98
+ {"error": str(e)} # Second output for JSON
99
+ )
100
 
101
  scores, total_score = calculate_scores(resume_text, job_desc)
102
 
103
+ # Basic analysis if inference fails
104
+ basic_analysis = {
105
+ "score": {
106
+ "total": total_score,
107
+ "breakdown": scores
108
+ },
109
+ "strengths": [
110
+ "Good clarity score" if scores["clarity"] > 7 else None,
111
+ "Relevant skills" if scores["relevance_to_job"] > 5 else None
112
+ ],
113
+ "improvements": [
114
+ "Add more measurable achievements" if scores["achievements"] < 3 else None,
115
+ "Include more relevant keywords" if scores["relevance_to_job"] < 5 else None,
116
+ "Check for typos" if scores["clarity"] < 9 else None
117
+ ],
118
+ "missing_skills": list(GENERAL_SKILLS - set(re.findall(r'\w+', resume_text.lower())))[:2]
119
+ }
120
 
121
+ # Try to get enhanced analysis if inference function is available
122
+ if inference_fn:
123
+ prompt = f"""[Return valid JSON]: Based on these scores: {scores}, provide:
124
+ - "strengths": 2 key strengths,
125
+ - "improvements": 3 specific improvements,
126
+ - "missing_skills": 2 missing skills (use job description if provided: {job_desc or "None"}).
127
+ Output a valid JSON string only, no extra text."""
128
 
129
+ try:
130
+ result = inference_fn(prompt)
131
+ if result and result.strip():
132
+ enhanced_analysis = json.loads(result)
133
+ return (
134
+ resume_text[:5000], # First output for textbox (limited to 5000 chars)
135
+ {
136
+ "score": {"total": total_score, "breakdown": scores},
137
+ "analysis": enhanced_analysis,
138
+ "raw_text_sample": resume_text[:200]
139
+ }
140
+ )
141
+ except Exception as e:
142
+ print(f"Inference error: {str(e)}")
143
+ # Fall through to basic analysis
144
+
145
+ return (
146
+ resume_text[:5000], # First output for textbox
147
+ {
148
+ "score": {"total": total_score, "breakdown": scores},
149
+ "analysis": basic_analysis,
150
+ "raw_text_sample": resume_text[:200]
 
 
 
 
 
 
 
 
151
  }
152
+ )
153
 
154
  # --- Gradio Interface --- #
155
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
156
  with gr.Sidebar():
157
+ gr.Markdown("# Resume Analyzer")
158
+ gr.Markdown("Upload your resume in PDF format for analysis")
 
 
 
 
 
 
 
159
 
160
  with gr.Row():
161
  with gr.Column(scale=1):
162
+ pdf_input = gr.File(label="PDF Resume", type="binary")
163
+ job_desc_input = gr.Textbox(label="Job Description (Optional)", lines=3)
164
+ submit_btn = gr.Button("Analyze")
165
+
166
  with gr.Column(scale=2):
167
+ extracted_text = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
168
+ analysis_output = gr.JSON(label="Analysis Results")
169
 
170
+ submit_btn.click(
171
+ fn=analyze_resume,
172
+ inputs=[pdf_input, job_desc_input],
173
+ outputs=[extracted_text, analysis_output]
 
174
  )
175
 
176
  demo.launch(share=True)