import streamlit as st from docx import Document import pdfplumber import pytesseract from PIL import Image import fitz # PyMuPDF import io st.title("📄 Image & PDF → Word Converter") uploaded_files = st.file_uploader( "Upload PDF or Image files", type=["pdf", "jpg", "jpeg", "png"], accept_multiple_files=True ) if uploaded_files: for uploaded in uploaded_files: file_name = uploaded.name ext = file_name.split(".")[-1].lower() st.write(f"### Processing: {file_name}") text = "" # ----------------- PDF ----------------- if ext == "pdf": pdf = pdfplumber.open(uploaded) for page in pdf.pages: text += page.extract_text() or "" pdf.close() # -------------- Images ----------------- else: img = Image.open(uploaded) text = pytesseract.image_to_string(img) # ----------------- Create Word ----------------- doc = Document() doc.add_heading(f"Converted from {file_name}", level=1) doc.add_paragraph(text) # Save to in-memory buffer buffer = io.BytesIO() doc.save(buffer) buffer.seek(0) st.download_button( label=f"Download Word file for {file_name}", data=buffer, file_name=file_name.replace(ext, "docx"), mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" )