Spaces:
Sleeping
Sleeping
File size: 1,960 Bytes
47b61ab d73dc88 5043be1 47b61ab d73dc88 1560fe7 5043be1 47b61ab 5043be1 47b61ab d73dc88 47b61ab 47e620b 47b61ab 5043be1 47b61ab 5043be1 47e620b 47b61ab 5043be1 47b61ab d73dc88 47b61ab d73dc88 47b61ab d73dc88 47b61ab d73dc88 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import streamlit as st
import io
import pytesseract
from PIL import Image
from docx import Document
from transformers import pipeline
from reportlab.pdfgen import canvas
# Set up OCR pipelines
handwritten_ocr = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
def extract_text(image):
"""Extracts text from both handwritten and printed documents."""
try:
# Try Hugging Face model for handwriting
handwritten_text = handwritten_ocr(image)[0]['generated_text']
except Exception:
handwritten_text = ""
# Use Tesseract for printed text
printed_text = pytesseract.image_to_string(image)
# Combine both results
extracted_text = handwritten_text.strip() + "\n" + printed_text.strip()
return extracted_text.strip() or "No text detected."
# Streamlit UI
st.title("📝 Handwritten & Printed Text Extractor")
# File uploader
uploaded_file = st.file_uploader("📤 Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
# Open and display the uploaded image
image = Image.open(uploaded_file)
st.image(image, caption="📷 Uploaded Image", use_container_width=True)
# Convert to RGB
image = image.convert("RGB")
# Extract text
extracted_text = extract_text(image)
# Display extracted text
st.subheader("📜 Extracted Text")
st.write(extracted_text)
# Save as DOCX
doc = Document()
doc.add_paragraph(extracted_text)
docx_buffer = io.BytesIO()
doc.save(docx_buffer)
docx_buffer.seek(0)
# Save as PDF
pdf_buffer = io.BytesIO()
pdf = canvas.Canvas(pdf_buffer)
pdf.drawString(100, 750, extracted_text)
pdf.save()
pdf_buffer.seek(0)
# Download buttons
st.download_button("⬇️ Download as DOCX", data=docx_buffer, file_name="extracted_text.docx")
st.download_button("⬇️ Download as PDF", data=pdf_buffer, file_name="extracted_text.pdf", mime="application/pdf")
|