Spaces:

Rachit2011
/

handwriting_detector_AI

Sleeping

App Files Files Community

handwriting_detector_AI / app.py

Rachit2011

Update app.py

c1c7a26 verified 7 months ago

raw

history blame contribute delete

4.98 kB

	# app.py
	"""
	Handwritten -> Text Gradio app for Hugging Face Spaces.
	Primary OCR: Microsoft TrOCR (handwritten). Fallback: EasyOCR (if installed).
	Supports upload and webcam captures.
	"""

	from PIL import Image, ImageOps
	import io
	import torch
	import traceback

	import gradio as gr

	# Try to import TrOCR (transformers). If transformers or torch not available,
	# the Space build will fail and you'll see logs — that's normal.
	from transformers import TrOCRProcessor, VisionEncoderDecoderModel

	MODEL_NAME = "microsoft/trocr-small-handwritten" # small model for faster builds

	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Load TrOCR processor + model (this may download the model on first build)
	processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
	model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device)


	# Try to import EasyOCR as a fallback (optional, may increase build time)
	try:
	import easyocr
	EASYOCR_AVAILABLE = True
	# instantiate reader with common languages; add more codes if you need them
	easyocr_reader = easyocr.Reader(["en", "hi"], gpu=torch.cuda.is_available())
	except Exception:
	EASYOCR_AVAILABLE = False
	easyocr_reader = None


	def preprocess_image(pil_image: Image.Image) -> Image.Image:
	"""Standardise image: orientation, RGB, mild resize if extremely large."""
	if pil_image is None:
	return None
	if pil_image.mode != "RGB":
	pil_image = pil_image.convert("RGB")
	pil_image = ImageOps.exif_transpose(pil_image)
	# Optional: downscale very large images to save memory/time
	max_dim = 1600
	if max(pil_image.size) > max_dim:
	scale = max_dim / max(pil_image.size)
	new_size = (int(pil_image.size[0] * scale), int(pil_image.size[1] * scale))
	pil_image = pil_image.resize(new_size, Image.LANCZOS)
	return pil_image


	def trotocr_recognize(pil_image: Image.Image) -> str:
	"""Run Microsoft TrOCR on one image and return text."""
	inputs = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
	# generation parameters can be tuned
	generated_ids = model.generate(inputs, max_length=512)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return generated_text.strip()


	def easyocr_recognize(pil_image: Image.Image) -> str:
	"""Run EasyOCR if available (fallback)."""
	if not EASYOCR_AVAILABLE:
	return ""
	# easyocr expects numpy array
	import numpy as np
	arr = np.array(pil_image)
	results = easyocr_reader.readtext(arr)
	# results: list of (bbox, text, confidence)
	texts = [r[1] for r in results]
	return "\n".join(texts).strip()


	def transcribe(image: Image.Image) -> str:
	"""Main wrapper: preprocess -> try TrOCR -> fallback EasyOCR -> return best result."""
	if image is None:
	return "No image provided."

	try:
	img = preprocess_image(image)
	# Primary: TrOCR
	text = trotocr_recognize(img)
	# If TrOCR returns something short/empty and EasyOCR is available, try fallback
	if (not text or len(text) < 3) and EASYOCR_AVAILABLE:
	fallback = easyocr_recognize(img)
	if fallback:
	return fallback
	return text if text else "No text recognised. Try a clearer photo or crop the writing."
	except Exception as e:
	# In Spaces it's useful to show a friendly error + a short traceback
	tb = traceback.format_exc()
	return f"Error during recognition:\n{e}\n\nTraceback:\n{tb}"


	title = "Handwritten → Text (TrOCR) — Upload or take a photo"
	description = """
	Upload a photo of handwritten notes or click the camera icon to take a picture.
	This app uses Microsoft TrOCR (handwritten model). For some scripts EasyOCR is used as a fallback.
	Tip: crop tightly around the writing for better results.
	"""

	with gr.Blocks(css=".footer {display:none !important;}") as demo:
	gr.Markdown(f"# {title}\n\n{description}")

	with gr.Row():
	img = gr.Image(source="upload", type="pil", tool="editor", label="Upload or use webcam (choose from dropdown)")
	out = gr.Textbox(label="Recognised text", lines=12)
	with gr.Row():
	btn = gr.Button("Transcribe")
	clear = gr.Button("Clear")
	info = gr.Markdown("Model: microsoft/trocr-small-handwritten. EasyOCR fallback: "
	f"{'enabled' if EASYOCR_AVAILABLE else 'not installed'}.")

	btn.click(fn=transcribe, inputs=img, outputs=out)
	clear.click(fn=lambda: (None, ""), inputs=None, outputs=[img, out])

	gr.Markdown(
	"### Notes\n"
	"- For multi-line pages, crop to a single column of writing when possible.\n"
	"- If your language is not recognised well, consider fine-tuning or using EasyOCR with extra languages.\n"
	"- This Space may be slow on the free tier (CPU only). Consider a smaller model or a paid GPU space."
	)

	if __name__ == "__main__":
	demo.launch()