Spaces:

AzizWazir
/

PDF-Convertor

Sleeping

App Files Files Community

AzizWazir commited on Dec 29, 2024

Commit

5fa7f28

verified ·

1 Parent(s): 03dbb03

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -53

app.py DELETED Viewed

@@ -1,53 +0,0 @@
-import streamlit as st
-import pytesseract
-from PIL import Image
-import docx
-import pdf2image
-# Set Tesseract path if not set already
-pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
-def extract_text_from_image_pdf(pdf_file):
-  """Extracts text from a PDF by converting it to images and performing OCR."""
-  # Read the PDF file
-  with open(pdf_file, 'rb') as f:
-    pdf_bytes = f.read()
-  # Extract images from the PDF
-  images = pdf2image.convert_from_bytes(pdf_bytes)
-  # Perform OCR on each image and combine the text
-  extracted_text = ''
-  for image in images:
-    text = pytesseract.image_to_string(image)
-    extracted_text += text + '\n'  # Add newline for better readability
-  return extracted_text
-def main():
-  """Streamlit app for converting PDF images to text."""
-  # Title and description
-  st.title("PDF to Text Converter")
-  st.subheader("Convert your PDF images to editable text documents.")
-  # Upload PDF file
-  uploaded_file = st.file_uploader("Choose a PDF file to convert:", type="pdf")
-  if uploaded_file is not None:
-    # Extract text from the PDF
-    extracted_text = extract_text_from_image_pdf(uploaded_file.name)
-    # Display extracted text
-    st.success("Text extracted from PDF:")
-    st.write(extracted_text)
-    # Download option (optional)
-    if st.button("Download text as .txt file"):
-      with open("extracted_text.txt", "w") as f:
-        f.write(extracted_text)
-      st.success("Text downloaded!")
-if __name__ == "__main__":
-  main()