Mangesh223 commited on
Commit
94d6cfd
·
verified ·
1 Parent(s): 98d0df5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -10
app.py CHANGED
@@ -1,27 +1,47 @@
1
  import gradio as gr
2
  import PyPDF2
3
  import io
 
4
  import json
5
- from dotenv import load_dotenv
 
6
  from huggingface_hub import login
 
7
 
8
  # --- Configuration --- #
9
  load_dotenv()
10
  login(token=os.getenv("HF_TOKEN"))
11
 
12
  def extract_text_from_pdf(pdf_file):
13
- """Improved PDF text extraction with error handling"""
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
- if isinstance(pdf_file, bytes):
16
- file_bytes = pdf_file
17
- else:
18
- raise ValueError("Invalid file format")
19
-
20
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
21
- text = "\n".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
22
- return text[:15000] # Increased character limit
 
 
 
 
 
 
 
 
23
  except Exception as e:
24
- raise Exception(f"PDF processing error: {str(e)}")
 
 
25
 
26
  def generate_ai_prompt(resume_text, job_desc=None):
27
  """Generates smart analysis prompt for AI"""
 
1
  import gradio as gr
2
  import PyPDF2
3
  import io
4
+ import re
5
  import json
6
+ import os # Added missing import
7
+ import gc
8
  from huggingface_hub import login
9
+ from dotenv import load_dotenv
10
 
11
  # --- Configuration --- #
12
  load_dotenv()
13
  login(token=os.getenv("HF_TOKEN"))
14
 
15
  def extract_text_from_pdf(pdf_file):
16
+ """Extract text from PDF with detailed error handling"""
17
+ if pdf_file is None:
18
+ raise ValueError("No PDF file uploaded")
19
+
20
+ # Handle both file path and bytes input
21
+ if isinstance(pdf_file, str):
22
+ with open(pdf_file, 'rb') as f:
23
+ file_bytes = f.read()
24
+ elif isinstance(pdf_file, bytes):
25
+ file_bytes = pdf_file
26
+ else:
27
+ raise TypeError(f"Expected file path or bytes, got {type(pdf_file)}")
28
+
29
  try:
 
 
 
 
 
30
  pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_bytes))
31
+ if len(pdf_reader.pages) == 0:
32
+ raise ValueError("PDF has no pages")
33
+
34
+ text = "\n".join(page.extract_text() for page in pdf_reader.pages)
35
+ if text is None or text.strip() == "":
36
+ raise ValueError("No text extracted from PDF (possibly image-based or empty)")
37
+
38
+ return text[:10000] # Limit to first 10,000 characters
39
+ except PyPDF2.errors.PdfReadError as e:
40
+ raise Exception(f"PDF read error: {str(e)}")
41
  except Exception as e:
42
+ raise Exception(f"Extraction error: {str(e)}")
43
+ finally:
44
+ gc.collect()
45
 
46
  def generate_ai_prompt(resume_text, job_desc=None):
47
  """Generates smart analysis prompt for AI"""