Spaces:

AzizWazir
/

PDF-Convertor

Sleeping

AzizWazir commited on Jan 5, 2025

Commit

b1cf141

verified ·

1 Parent(s): 83f9467

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,20 @@
 import os
 import streamlit as st
 from pdf2image import convert_from_path
-import pytesseract
-from PIL import Image
-import pandas as pd
-from docx import Document
-# Set paths for poppler and tesseract (for local testing or adjust as per your environment)
-POPPLER_PATH = "/usr/bin"
-pytesseract.pytesseract.tesseract_cmd = "/usr/bin/tesseract"
 # Function to extract text from an image-based PDF
 def extract_text_from_image_pdf(pdf_path):

 import os
 import streamlit as st
 from pdf2image import convert_from_path
+# Path to your PDF file
+pdf_path = "path_to_your_pdf.pdf"
+# Path to Poppler binary (optional if already in PATH)
+poppler_path = r"C:\path\to\poppler\bin"  # Update this path as needed
+try:
+    # Convert PDF to images
+    images = convert_from_path(pdf_path, poppler_path=poppler_path)
+    print(f"Converted {len(images)} pages to images successfully!")
+except Exception as e:
+    print(f"An error occurred: {e}")
 # Function to extract text from an image-based PDF
 def extract_text_from_image_pdf(pdf_path):