Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,6 @@ import pytesseract
|
|
| 6 |
from docx import Document
|
| 7 |
import tempfile
|
| 8 |
|
| 9 |
-
# Folder path for PDFs
|
| 10 |
-
pdf_folder_path = "D:/General"
|
| 11 |
-
|
| 12 |
# Function to convert PDF to image
|
| 13 |
def pdf_to_image(pdf_path):
|
| 14 |
try:
|
|
@@ -37,18 +34,21 @@ def save_to_word(text, file_name):
|
|
| 37 |
|
| 38 |
# Streamlit UI
|
| 39 |
st.title("PDF to Word Converter")
|
| 40 |
-
st.write("
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
if pdf_files:
|
| 46 |
-
for pdf_file in pdf_files:
|
| 47 |
-
pdf_path = os.path.join(pdf_folder_path, pdf_file)
|
| 48 |
-
|
| 49 |
# Convert PDF to images
|
| 50 |
-
images = pdf_to_image(
|
| 51 |
-
|
| 52 |
if images:
|
| 53 |
# Extract text from images
|
| 54 |
extracted_text = ""
|
|
@@ -59,8 +59,8 @@ if pdf_files:
|
|
| 59 |
|
| 60 |
# Save the extracted text to Word
|
| 61 |
if extracted_text:
|
| 62 |
-
word_file = save_to_word(extracted_text,
|
| 63 |
-
st.success(f"Conversion of {
|
| 64 |
-
st.download_button(f"Download {
|
| 65 |
else:
|
| 66 |
-
st.write("
|
|
|
|
| 6 |
from docx import Document
|
| 7 |
import tempfile
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
# Function to convert PDF to image
|
| 10 |
def pdf_to_image(pdf_path):
|
| 11 |
try:
|
|
|
|
| 34 |
|
| 35 |
# Streamlit UI
|
| 36 |
st.title("PDF to Word Converter")
|
| 37 |
+
st.write("Upload a PDF to convert it to a Word document")
|
| 38 |
+
|
| 39 |
+
# File upload feature
|
| 40 |
+
uploaded_files = st.file_uploader("Choose PDF files", type="pdf", accept_multiple_files=True)
|
| 41 |
|
| 42 |
+
if uploaded_files:
|
| 43 |
+
for uploaded_file in uploaded_files:
|
| 44 |
+
# Save the uploaded PDF to a temporary file
|
| 45 |
+
temp_pdf_path = tempfile.mktemp(suffix=".pdf")
|
| 46 |
+
with open(temp_pdf_path, "wb") as temp_pdf:
|
| 47 |
+
temp_pdf.write(uploaded_file.getbuffer())
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# Convert PDF to images
|
| 50 |
+
images = pdf_to_image(temp_pdf_path)
|
| 51 |
+
|
| 52 |
if images:
|
| 53 |
# Extract text from images
|
| 54 |
extracted_text = ""
|
|
|
|
| 59 |
|
| 60 |
# Save the extracted text to Word
|
| 61 |
if extracted_text:
|
| 62 |
+
word_file = save_to_word(extracted_text, uploaded_file.name)
|
| 63 |
+
st.success(f"Conversion of {uploaded_file.name} complete! Download the Word file below.")
|
| 64 |
+
st.download_button(f"Download {uploaded_file.name} as Word", word_file, file_name=f"{uploaded_file.name}.docx")
|
| 65 |
else:
|
| 66 |
+
st.write("Please upload PDF files to convert.")
|