Spaces:
Sleeping
Sleeping
| # app.py | |
| """ | |
| Handwritten -> Text Gradio app for Hugging Face Spaces. | |
| Primary OCR: Microsoft TrOCR (handwritten). Fallback: EasyOCR (if installed). | |
| Supports upload and webcam captures. | |
| """ | |
| from PIL import Image, ImageOps | |
| import io | |
| import torch | |
| import traceback | |
| import gradio as gr | |
| # Try to import TrOCR (transformers). If transformers or torch not available, | |
| # the Space build will fail and you'll see logs β that's normal. | |
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
| MODEL_NAME = "microsoft/trocr-small-handwritten" # small model for faster builds | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Load TrOCR processor + model (this may download the model on first build) | |
| processor = TrOCRProcessor.from_pretrained(MODEL_NAME) | |
| model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device) | |
| # Try to import EasyOCR as a fallback (optional, may increase build time) | |
| try: | |
| import easyocr | |
| EASYOCR_AVAILABLE = True | |
| # instantiate reader with common languages; add more codes if you need them | |
| easyocr_reader = easyocr.Reader(["en", "hi"], gpu=torch.cuda.is_available()) | |
| except Exception: | |
| EASYOCR_AVAILABLE = False | |
| easyocr_reader = None | |
| def preprocess_image(pil_image: Image.Image) -> Image.Image: | |
| """Standardise image: orientation, RGB, mild resize if extremely large.""" | |
| if pil_image is None: | |
| return None | |
| if pil_image.mode != "RGB": | |
| pil_image = pil_image.convert("RGB") | |
| pil_image = ImageOps.exif_transpose(pil_image) | |
| # Optional: downscale very large images to save memory/time | |
| max_dim = 1600 | |
| if max(pil_image.size) > max_dim: | |
| scale = max_dim / max(pil_image.size) | |
| new_size = (int(pil_image.size[0] * scale), int(pil_image.size[1] * scale)) | |
| pil_image = pil_image.resize(new_size, Image.LANCZOS) | |
| return pil_image | |
| def trotocr_recognize(pil_image: Image.Image) -> str: | |
| """Run Microsoft TrOCR on one image and return text.""" | |
| inputs = processor(images=pil_image, return_tensors="pt").pixel_values.to(device) | |
| # generation parameters can be tuned | |
| generated_ids = model.generate(inputs, max_length=512) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| return generated_text.strip() | |
| def easyocr_recognize(pil_image: Image.Image) -> str: | |
| """Run EasyOCR if available (fallback).""" | |
| if not EASYOCR_AVAILABLE: | |
| return "" | |
| # easyocr expects numpy array | |
| import numpy as np | |
| arr = np.array(pil_image) | |
| results = easyocr_reader.readtext(arr) | |
| # results: list of (bbox, text, confidence) | |
| texts = [r[1] for r in results] | |
| return "\n".join(texts).strip() | |
| def transcribe(image: Image.Image) -> str: | |
| """Main wrapper: preprocess -> try TrOCR -> fallback EasyOCR -> return best result.""" | |
| if image is None: | |
| return "No image provided." | |
| try: | |
| img = preprocess_image(image) | |
| # Primary: TrOCR | |
| text = trotocr_recognize(img) | |
| # If TrOCR returns something short/empty and EasyOCR is available, try fallback | |
| if (not text or len(text) < 3) and EASYOCR_AVAILABLE: | |
| fallback = easyocr_recognize(img) | |
| if fallback: | |
| return fallback | |
| return text if text else "No text recognised. Try a clearer photo or crop the writing." | |
| except Exception as e: | |
| # In Spaces it's useful to show a friendly error + a short traceback | |
| tb = traceback.format_exc() | |
| return f"Error during recognition:\n{e}\n\nTraceback:\n{tb}" | |
| title = "Handwritten β Text (TrOCR) β Upload or take a photo" | |
| description = """ | |
| Upload a photo of handwritten notes or click the camera icon to take a picture. | |
| This app uses Microsoft TrOCR (handwritten model). For some scripts EasyOCR is used as a fallback. | |
| Tip: crop tightly around the writing for better results. | |
| """ | |
| with gr.Blocks(css=".footer {display:none !important;}") as demo: | |
| gr.Markdown(f"# {title}\n\n{description}") | |
| with gr.Row(): | |
| img = gr.Image(source="upload", type="pil", tool="editor", label="Upload or use webcam (choose from dropdown)") | |
| out = gr.Textbox(label="Recognised text", lines=12) | |
| with gr.Row(): | |
| btn = gr.Button("Transcribe") | |
| clear = gr.Button("Clear") | |
| info = gr.Markdown("Model: microsoft/trocr-small-handwritten. EasyOCR fallback: " | |
| f"{'enabled' if EASYOCR_AVAILABLE else 'not installed'}.") | |
| btn.click(fn=transcribe, inputs=img, outputs=out) | |
| clear.click(fn=lambda: (None, ""), inputs=None, outputs=[img, out]) | |
| gr.Markdown( | |
| "### Notes\n" | |
| "- For multi-line pages, crop to a single column of writing when possible.\n" | |
| "- If your language is not recognised well, consider fine-tuning or using EasyOCR with extra languages.\n" | |
| "- This Space may be slow on the free tier (CPU only). Consider a smaller model or a paid GPU space." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |