pdf_convert / src /streamlit_app.py
arunram-spglobal's picture
PDF to Doc
2eee1ce verified
import streamlit as st
from docx import Document
import pdfplumber
import pytesseract
from PIL import Image
import fitz # PyMuPDF
import io
st.title("πŸ“„ Image & PDF β†’ Word Converter")
uploaded_files = st.file_uploader(
"Upload PDF or Image files",
type=["pdf", "jpg", "jpeg", "png"],
accept_multiple_files=True
)
if uploaded_files:
for uploaded in uploaded_files:
file_name = uploaded.name
ext = file_name.split(".")[-1].lower()
st.write(f"### Processing: {file_name}")
text = ""
# ----------------- PDF -----------------
if ext == "pdf":
pdf = pdfplumber.open(uploaded)
for page in pdf.pages:
text += page.extract_text() or ""
pdf.close()
# -------------- Images -----------------
else:
img = Image.open(uploaded)
text = pytesseract.image_to_string(img)
# ----------------- Create Word -----------------
doc = Document()
doc.add_heading(f"Converted from {file_name}", level=1)
doc.add_paragraph(text)
# Save to in-memory buffer
buffer = io.BytesIO()
doc.save(buffer)
buffer.seek(0)
st.download_button(
label=f"Download Word file for {file_name}",
data=buffer,
file_name=file_name.replace(ext, "docx"),
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)