Spaces:

VyasAI
/

PDF_to_TXT

Sleeping

pvyas96 commited on Aug 30, 2024

Commit

e588742

verified ·

1 Parent(s): 3d354e4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,13 +6,12 @@ from pdf2image import convert_from_path
 def pdf_to_text(pdf_file):
     # Open the PDF file
-    pdf = PyPDF2.PdfFileReader(pdf_file)
     # Extract the text from each page
     text = ''
-    for page in range(pdf.numPages):
-        page_obj = pdf.getPage(page)
-        text += page_obj.extractText()
     # If the text is empty, use OCR to extract the text
     if not text:

 def pdf_to_text(pdf_file):
     # Open the PDF file
+    pdf = PyPDF2.PdfReader(pdf_file)
     # Extract the text from each page
     text = ''
+    for page in pdf.pages:
+        text += page.extract_text()
     # If the text is empty, use OCR to extract the text
     if not text: