PDF-Editor

Build error

AzizWazir commited on Dec 12, 2024

Commit

972cb11

verified ·

1 Parent(s): 526c1dd

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,11 +2,9 @@ import streamlit as st
 from PyPDF2 import PdfReader
 from docx import Document
 from io import BytesIO
-from pdf2image import convert_from_bytes
-import pytesseract
 def pdf_to_word(pdf_file, password=None):
-    """Convert a PDF file to a Word file with optional decryption and OCR support."""
     reader = PdfReader(pdf_file)
     # Decrypt the PDF if it's encrypted
@@ -20,19 +18,12 @@ def pdf_to_word(pdf_file, password=None):
             raise ValueError("The PDF is encrypted. Please provide a password.")
     document = Document()
-    # Extract text from each page
     for page in reader.pages:
-        if page.extract_text():  # Use PyPDF2 for text extraction
             text = page.extract_text()
             document.add_paragraph(text)
         else:
-            # Convert the page to an image and use OCR
-            pdf_bytes = pdf_file.read()
-            images = convert_from_bytes(pdf_bytes)
-            for image in images:
-                text = pytesseract.image_to_string(image)
-                document.add_paragraph(text)
     word_file = BytesIO()
     document.save(word_file)
@@ -62,4 +53,6 @@ if uploaded_file is not None:
                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
             )
         except ValueError as ve:
-            st.error(str(ve

 from PyPDF2 import PdfReader
 from docx import Document
 from io import BytesIO
 def pdf_to_word(pdf_file, password=None):
+    """Convert a PDF file to a Word file with optional decryption."""
     reader = PdfReader(pdf_file)
     # Decrypt the PDF if it's encrypted
             raise ValueError("The PDF is encrypted. Please provide a password.")
     document = Document()
     for page in reader.pages:
+        if page.extract_text():  # Ensure text is extracted
             text = page.extract_text()
             document.add_paragraph(text)
         else:
+            document.add_paragraph("[This page contains non-extractable content or images]")
     word_file = BytesIO()
     document.save(word_file)
                 mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
             )
         except ValueError as ve:
+            st.error(str(ve))
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")