Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from docx import Document | |
| import pdfplumber | |
| import pytesseract | |
| from PIL import Image | |
| import fitz # PyMuPDF | |
| import io | |
| st.title("π Image & PDF β Word Converter") | |
| uploaded_files = st.file_uploader( | |
| "Upload PDF or Image files", | |
| type=["pdf", "jpg", "jpeg", "png"], | |
| accept_multiple_files=True | |
| ) | |
| if uploaded_files: | |
| for uploaded in uploaded_files: | |
| file_name = uploaded.name | |
| ext = file_name.split(".")[-1].lower() | |
| st.write(f"### Processing: {file_name}") | |
| text = "" | |
| # ----------------- PDF ----------------- | |
| if ext == "pdf": | |
| pdf = pdfplumber.open(uploaded) | |
| for page in pdf.pages: | |
| text += page.extract_text() or "" | |
| pdf.close() | |
| # -------------- Images ----------------- | |
| else: | |
| img = Image.open(uploaded) | |
| text = pytesseract.image_to_string(img) | |
| # ----------------- Create Word ----------------- | |
| doc = Document() | |
| doc.add_heading(f"Converted from {file_name}", level=1) | |
| doc.add_paragraph(text) | |
| # Save to in-memory buffer | |
| buffer = io.BytesIO() | |
| doc.save(buffer) | |
| buffer.seek(0) | |
| st.download_button( | |
| label=f"Download Word file for {file_name}", | |
| data=buffer, | |
| file_name=file_name.replace(ext, "docx"), | |
| mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" | |
| ) | |