Mangesh223 commited on
Commit
54e0095
·
verified ·
1 Parent(s): bf711eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -27
app.py CHANGED
@@ -24,28 +24,27 @@ ACHIEVEMENT_PATTERN = re.compile(r'(increased|reduced|saved|improved)\s+by\s+(\d
24
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
25
 
26
  def extract_text_from_pdf(pdf_file):
27
- """Extract text from PDF with robust error handling"""
28
  if pdf_file is None:
29
  raise ValueError("No PDF file uploaded")
30
 
31
- # Check if pdf_file is bytes (binary data from Gradio)
32
  if not isinstance(pdf_file, bytes):
33
- raise TypeError(f"Expected binary data (bytes), got {type(pdf_file)}")
34
 
35
  try:
36
- # Read binary data into PdfReader
37
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
38
  if len(pdf_reader.pages) == 0:
39
  raise ValueError("PDF has no pages")
40
 
41
- # Extract text from first page
42
  text = pdf_reader.pages[0].extract_text()
43
  if text is None or text.strip() == "":
44
- raise ValueError("No text extracted from PDF (possibly image-based)")
45
 
46
- return text[:10000] # Limit to first 10k chars
 
 
47
  except Exception as e:
48
- raise Exception(f"PDF extraction failed: {str(e)}")
49
  finally:
50
  gc.collect()
51
 
@@ -62,7 +61,6 @@ def calculate_scores(resume_text, job_desc=None):
62
  "customization": 0
63
  }
64
 
65
- # Relevance calculation
66
  if job_desc:
67
  job_words = set(re.findall(r'\w+', job_desc.lower()))
68
  resume_words = set(re.findall(r'\w+', resume_lower))
@@ -70,11 +68,9 @@ def calculate_scores(resume_text, job_desc=None):
70
  else:
71
  scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
72
 
73
- # Experience calculation
74
  scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
75
  scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
76
 
77
- # Education detection
78
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
79
  scores["education"] = 8
80
  elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
@@ -87,12 +83,16 @@ def calculate_scores(resume_text, job_desc=None):
87
  return scores, min(100, sum(scores.values()))
88
 
89
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
90
- """Analyze resume using Together AI inference"""
91
  try:
92
- # Extract text from the uploaded PDF
93
  resume_text = extract_text_from_pdf(pdf_file)
 
94
  except Exception as e:
95
- return {"error": f"Text extraction error: {str(e)}", "raw_result": "Not applicable"}
 
 
 
96
 
97
  scores, total_score = calculate_scores(resume_text, job_desc)
98
 
@@ -104,25 +104,42 @@ def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
104
 
105
  try:
106
  if inference_fn is None:
107
- return {"error": "Inference function not provided", "raw_result": "Not available"}
 
 
 
108
 
109
- # Send prompt to Together AI (no file upload, just text)
110
  result = inference_fn(prompt)
 
 
111
  if not result or result.strip() == "":
112
- return {"error": "Empty response from Together AI", "raw_result": result}
 
 
 
113
 
114
- # Parse the response as JSON
115
  parsed_result = json.loads(result)
116
  return {
117
- "score": {"total": total_score, "breakdown": scores},
118
- "analysis": parsed_result,
119
- "raw_text": resume_text[:500],
120
- "raw_result": result # Debug: Show raw response
 
 
 
 
121
  }
122
  except json.JSONDecodeError as e:
123
- return {"error": f"Failed to parse JSON: {str(e)}", "raw_result": result}
 
 
 
124
  except Exception as e:
125
- return {"error": f"Unexpected error: {str(e)}", "raw_result": result if 'result' in locals() else "Not available"}
 
 
 
126
 
127
  # --- Gradio Interface --- #
128
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
@@ -131,7 +148,6 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
131
  gr.Markdown("Powered by mistralai/Mistral-7B-Instruct-v0.3 via Together AI API. Sign in to use.")
132
  button = gr.LoginButton("Sign in")
133
 
134
- # Load Mistral-7B from Together AI
135
  inference = gr.load(
136
  "models/mistralai/Mistral-7B-Instruct-v0.3",
137
  accept_token=button,
@@ -145,12 +161,13 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
145
  gr.Textbox(label="Job Description (Optional)", lines=3)
146
  ]
147
  with gr.Column(scale=2):
148
- output = gr.JSON(label="Analysis")
 
149
 
150
  inputs[0].upload(
151
  fn=lambda pdf, job_desc: analyze_resume(pdf, job_desc, inference),
152
  inputs=inputs,
153
- outputs=output,
154
  queue=True
155
  )
156
 
 
24
  TYPO_PATTERN = re.compile(r'\b(?:responsibilities|accomplishment|experiance)\b', re.I)
25
 
26
  def extract_text_from_pdf(pdf_file):
27
+ """Extract text from PDF with detailed error handling"""
28
  if pdf_file is None:
29
  raise ValueError("No PDF file uploaded")
30
 
 
31
  if not isinstance(pdf_file, bytes):
32
+ raise TypeError(f"Expected bytes, got {type(pdf_file)}")
33
 
34
  try:
 
35
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_file))
36
  if len(pdf_reader.pages) == 0:
37
  raise ValueError("PDF has no pages")
38
 
 
39
  text = pdf_reader.pages[0].extract_text()
40
  if text is None or text.strip() == "":
41
+ raise ValueError("No text extracted from PDF (possibly image-based or empty)")
42
 
43
+ return text[:10000]
44
+ except PyPDF2.errors.PdfReadError as e:
45
+ raise Exception(f"PDF read error: {str(e)}")
46
  except Exception as e:
47
+ raise Exception(f"Extraction error: {str(e)}")
48
  finally:
49
  gc.collect()
50
 
 
61
  "customization": 0
62
  }
63
 
 
64
  if job_desc:
65
  job_words = set(re.findall(r'\w+', job_desc.lower()))
66
  resume_words = set(re.findall(r'\w+', resume_lower))
 
68
  else:
69
  scores["relevance_to_job"] = min(10, sum(1 for skill in GENERAL_SKILLS if skill in resume_lower))
70
 
 
71
  scores["experience_quality"] = min(10, len(YEAR_PATTERN.findall(resume_text)))
72
  scores["experience_quality"] += min(10, len(ACHIEVEMENT_PATTERN.findall(resume_text)) * 2)
73
 
 
74
  if 'phd' in resume_lower or 'doctorate' in resume_lower:
75
  scores["education"] = 8
76
  elif 'master' in resume_lower or 'msc' in resume_lower or 'mba' in resume_lower:
 
83
  return scores, min(100, sum(scores.values()))
84
 
85
  def analyze_resume(pdf_file, job_desc=None, inference_fn=None):
86
+ """Analyze resume and return extracted text and analysis"""
87
  try:
88
+ print(f"Received pdf_file type: {type(pdf_file)}") # Debug: Log input type
89
  resume_text = extract_text_from_pdf(pdf_file)
90
+ print(f"Extracted text: {resume_text[:100]}") # Debug: Log first 100 chars
91
  except Exception as e:
92
+ return {
93
+ "extracted_text": f"Extraction failed: {str(e)}",
94
+ "analysis": {"error": f"Text extraction error: {str(e)}", "raw_prompt": "Not generated", "raw_result": "Not applicable"}
95
+ }
96
 
97
  scores, total_score = calculate_scores(resume_text, job_desc)
98
 
 
104
 
105
  try:
106
  if inference_fn is None:
107
+ return {
108
+ "extracted_text": resume_text,
109
+ "analysis": {"error": "Inference function not provided", "raw_prompt": prompt, "raw_result": "Not available"}
110
+ }
111
 
112
+ print(f"Prompt sent to Together AI: {prompt}") # Debug: Log prompt
113
  result = inference_fn(prompt)
114
+ print(f"Raw result from Together AI: {result}") # Debug: Log response
115
+
116
  if not result or result.strip() == "":
117
+ return {
118
+ "extracted_text": resume_text,
119
+ "analysis": {"error": "Empty response from Together AI", "raw_prompt": prompt, "raw_result": result}
120
+ }
121
 
 
122
  parsed_result = json.loads(result)
123
  return {
124
+ "extracted_text": resume_text,
125
+ "analysis": {
126
+ "score": {"total": total_score, "breakdown": scores},
127
+ "analysis": parsed_result,
128
+ "raw_text": resume_text[:500],
129
+ "raw_prompt": prompt,
130
+ "raw_result": result
131
+ }
132
  }
133
  except json.JSONDecodeError as e:
134
+ return {
135
+ "extracted_text": resume_text,
136
+ "analysis": {"error": f"Failed to parse JSON: {str(e)}", "raw_prompt": prompt, "raw_result": result}
137
+ }
138
  except Exception as e:
139
+ return {
140
+ "extracted_text": resume_text,
141
+ "analysis": {"error": f"Unexpected inference error: {str(e)}", "raw_prompt": prompt, "raw_result": result if 'result' in locals() else "Not available"}
142
+ }
143
 
144
  # --- Gradio Interface --- #
145
  with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
 
148
  gr.Markdown("Powered by mistralai/Mistral-7B-Instruct-v0.3 via Together AI API. Sign in to use.")
149
  button = gr.LoginButton("Sign in")
150
 
 
151
  inference = gr.load(
152
  "models/mistralai/Mistral-7B-Instruct-v0.3",
153
  accept_token=button,
 
161
  gr.Textbox(label="Job Description (Optional)", lines=3)
162
  ]
163
  with gr.Column(scale=2):
164
+ extracted_text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=False)
165
+ analysis_output = gr.JSON(label="Analysis")
166
 
167
  inputs[0].upload(
168
  fn=lambda pdf, job_desc: analyze_resume(pdf, job_desc, inference),
169
  inputs=inputs,
170
+ outputs=[extracted_text_output, analysis_output],
171
  queue=True
172
  )
173