Spaces:

PrabhatGupta786
/

Hand_written_text_recognition

Sleeping

App Files Files Community

PrabhatGupta786 commited on Apr 9

Commit

a80d44c

verified ·

1 Parent(s): de5c1ea

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -8

app.py CHANGED Viewed

@@ -1,17 +1,64 @@
 def full_pipeline(input_img):
     if input_img is None:
         return "Please upload an image."
     lines = get_lines_from_image(input_img)
     if not lines:
-        return "No text lines detected. Try a clearer image."
     final_transcript = []
-    # Process one line at a time to avoid CPU/RAM OOM (Out of Memory)
     for line_img in lines:
         try:
-            # Resize line to 384px height (standard for TrOCR) to save processing time
             w, h = line_img.size
             new_h = 384
             new_w = int((new_h / h) * w)
@@ -26,9 +73,21 @@ def full_pipeline(input_img):
             if text.strip():
                 final_transcript.append(text.strip())
-        except Exception as e:
-            print(f"Error processing line: {e}")
-            continue
-    return " ".join(final_transcript)

+import torch
+import cv2
+import numpy as np
+import gradio as gr
+from PIL import Image
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+# 1. Setup - Using 'base' instead of 'large' to prevent RAM crashes on Hugging Face
+# This model is ~1GB smaller and significantly faster on CPUs.
+device = "cpu"
+model_id = 'microsoft/trocr-base-handwritten'
+print(f"Loading model {model_id}...")
+processor = TrOCRProcessor.from_pretrained(model_id)
+model = VisionEncoderDecoderModel.from_pretrained(model_id).to(device)
+def get_lines_from_image(img_array):
+    # Convert to grayscale
+    gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+    # Adaptive thresholding handles uneven lighting better than global thresholding
+    binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
+                                   cv2.THRESH_BINARY_INV, 11, 2)
+    # Dilate horizontally to join characters into lines
+    kernel = np.ones((5, 80), np.uint8)
+    dilation = cv2.dilate(binary, kernel, iterations=1)
+    # Find contours for line segmentation
+    contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
+    line_images = []
+    for ctr in contours:
+        x, y, w, h = cv2.boundingRect(ctr)
+        # Filter out tiny noise
+        if h > 20 and w > 20:
+            # Add small padding
+            y_s, y_e = max(0, y-5), min(img_array.shape[0], y+h+5)
+            x_s, x_e = max(0, x-5), min(img_array.shape[1], x+w+5)
+            roi = img_array[y_s:y_e, x_s:x_s+w]
+            # Ensure RGB for PIL
+            line_images.append(Image.fromarray(roi).convert("RGB"))
+    return line_images
 def full_pipeline(input_img):
     if input_img is None:
         return "Please upload an image."
     lines = get_lines_from_image(input_img)
     if not lines:
+        return "No text lines detected. Please ensure your image is clear and not too dark."
     final_transcript = []
+    # Process sequentially to keep memory usage low and stable
     for line_img in lines:
         try:
+            # Resizing to 384 height helps TrOCR's internal attention mechanism
             w, h = line_img.size
             new_h = 384
             new_w = int((new_h / h) * w)
             if text.strip():
                 final_transcript.append(text.strip())
+        except Exception:
+            continue # Skip lines that fail to avoid crashing the whole process
+    return "\n".join(final_transcript)
+# Gradio Interface
+demo = gr.Interface(
+    fn=full_pipeline,
+    # 'editor' allows users to fix orientation/crop before submitting
+    inputs=gr.Image(label="Upload Handwriting", type="numpy"),
+    outputs=gr.Textbox(label="Typed Text", show_copy_button=True),
+    title="Handwritten Paragraph to Typed Text",
+    description="Optimized for CPU. Upload a clear image of handwritten text. Tip: Crop the image to just the text area for best results.",
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()