Rachit2011's picture
Update app.py
c1c7a26 verified
# app.py
"""
Handwritten -> Text Gradio app for Hugging Face Spaces.
Primary OCR: Microsoft TrOCR (handwritten). Fallback: EasyOCR (if installed).
Supports upload and webcam captures.
"""
from PIL import Image, ImageOps
import io
import torch
import traceback
import gradio as gr
# Try to import TrOCR (transformers). If transformers or torch not available,
# the Space build will fail and you'll see logs β€” that's normal.
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
MODEL_NAME = "microsoft/trocr-small-handwritten" # small model for faster builds
device = "cuda" if torch.cuda.is_available() else "cpu"
# Load TrOCR processor + model (this may download the model on first build)
processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device)
# Try to import EasyOCR as a fallback (optional, may increase build time)
try:
import easyocr
EASYOCR_AVAILABLE = True
# instantiate reader with common languages; add more codes if you need them
easyocr_reader = easyocr.Reader(["en", "hi"], gpu=torch.cuda.is_available())
except Exception:
EASYOCR_AVAILABLE = False
easyocr_reader = None
def preprocess_image(pil_image: Image.Image) -> Image.Image:
"""Standardise image: orientation, RGB, mild resize if extremely large."""
if pil_image is None:
return None
if pil_image.mode != "RGB":
pil_image = pil_image.convert("RGB")
pil_image = ImageOps.exif_transpose(pil_image)
# Optional: downscale very large images to save memory/time
max_dim = 1600
if max(pil_image.size) > max_dim:
scale = max_dim / max(pil_image.size)
new_size = (int(pil_image.size[0] * scale), int(pil_image.size[1] * scale))
pil_image = pil_image.resize(new_size, Image.LANCZOS)
return pil_image
def trotocr_recognize(pil_image: Image.Image) -> str:
"""Run Microsoft TrOCR on one image and return text."""
inputs = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
# generation parameters can be tuned
generated_ids = model.generate(inputs, max_length=512)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return generated_text.strip()
def easyocr_recognize(pil_image: Image.Image) -> str:
"""Run EasyOCR if available (fallback)."""
if not EASYOCR_AVAILABLE:
return ""
# easyocr expects numpy array
import numpy as np
arr = np.array(pil_image)
results = easyocr_reader.readtext(arr)
# results: list of (bbox, text, confidence)
texts = [r[1] for r in results]
return "\n".join(texts).strip()
def transcribe(image: Image.Image) -> str:
"""Main wrapper: preprocess -> try TrOCR -> fallback EasyOCR -> return best result."""
if image is None:
return "No image provided."
try:
img = preprocess_image(image)
# Primary: TrOCR
text = trotocr_recognize(img)
# If TrOCR returns something short/empty and EasyOCR is available, try fallback
if (not text or len(text) < 3) and EASYOCR_AVAILABLE:
fallback = easyocr_recognize(img)
if fallback:
return fallback
return text if text else "No text recognised. Try a clearer photo or crop the writing."
except Exception as e:
# In Spaces it's useful to show a friendly error + a short traceback
tb = traceback.format_exc()
return f"Error during recognition:\n{e}\n\nTraceback:\n{tb}"
title = "Handwritten β†’ Text (TrOCR) β€” Upload or take a photo"
description = """
Upload a photo of handwritten notes or click the camera icon to take a picture.
This app uses Microsoft TrOCR (handwritten model). For some scripts EasyOCR is used as a fallback.
Tip: crop tightly around the writing for better results.
"""
with gr.Blocks(css=".footer {display:none !important;}") as demo:
gr.Markdown(f"# {title}\n\n{description}")
with gr.Row():
img = gr.Image(source="upload", type="pil", tool="editor", label="Upload or use webcam (choose from dropdown)")
out = gr.Textbox(label="Recognised text", lines=12)
with gr.Row():
btn = gr.Button("Transcribe")
clear = gr.Button("Clear")
info = gr.Markdown("Model: microsoft/trocr-small-handwritten. EasyOCR fallback: "
f"{'enabled' if EASYOCR_AVAILABLE else 'not installed'}.")
btn.click(fn=transcribe, inputs=img, outputs=out)
clear.click(fn=lambda: (None, ""), inputs=None, outputs=[img, out])
gr.Markdown(
"### Notes\n"
"- For multi-line pages, crop to a single column of writing when possible.\n"
"- If your language is not recognised well, consider fine-tuning or using EasyOCR with extra languages.\n"
"- This Space may be slow on the free tier (CPU only). Consider a smaller model or a paid GPU space."
)
if __name__ == "__main__":
demo.launch()