shaheerawan3 commited on
Commit
01dcc2d
·
verified ·
1 Parent(s): 7588311

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -6
app.py CHANGED
@@ -12,12 +12,14 @@ qa_generator = pipeline("text2text-generation", model="google/flan-t5-base", dev
12
  def extract_text_from_pdf(pdf_file):
13
  """Extract text from uploaded PDF"""
14
  try:
15
- pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_file))
16
- text = ""
17
- # Limit to first 10 pages for CPU performance
18
- max_pages = min(10, len(pdf_reader.pages))
19
- for page_num in range(max_pages):
20
- text += pdf_reader.pages[page_num].extract_text()
 
 
21
  return text[:15000] # Limit tokens
22
  except Exception as e:
23
  return f"Error reading PDF: {str(e)}"
 
12
  def extract_text_from_pdf(pdf_file):
13
  """Extract text from uploaded PDF"""
14
  try:
15
+ # Open the file path directly
16
+ with open(pdf_file.name, 'rb') as f:
17
+ pdf_reader = PyPDF2.PdfReader(f)
18
+ text = ""
19
+ # Limit to first 10 pages for CPU performance
20
+ max_pages = min(10, len(pdf_reader.pages))
21
+ for page_num in range(max_pages):
22
+ text += pdf_reader.pages[page_num].extract_text()
23
  return text[:15000] # Limit tokens
24
  except Exception as e:
25
  return f"Error reading PDF: {str(e)}"