Spaces:

PrabhatGupta786
/

Hand_written_text_recognition

Sleeping

App Files Files Community

PrabhatGupta786 commited on Apr 9

Commit

cbafc2a

verified ·

1 Parent(s): 1b92121

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -36

app.py CHANGED Viewed

@@ -3,75 +3,86 @@ import cv2
 import numpy as np
 from PIL import Image
 from transformers import pipeline
-# Load the stable small model
-print("Loading OCR Pipeline...")
-pipe = pipeline("image-to-text", model="microsoft/trocr-small-handwritten")
 def get_lines(img_array):
-    # 1. Convert to grayscale
     gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
-    # 2. IMPROVED: Adaptive Thresholding
-    # Instead of a fixed '180', this adjusts to the lighting of the photo
     binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, 21, 10)
-    # 3. IMPROVED: Stronger Dilation
-    # We use a wider kernel to ensure separate words on the same line get joined
     kernel = np.ones((5, 100), np.uint8)
     dilation = cv2.dilate(binary, kernel, iterations=1)
     contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-    # Sort from top to bottom
     contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
     line_images = []
     for ctr in contours:
         x, y, w, h = cv2.boundingRect(ctr)
-        # Only keep boxes that look like text (width > height and not tiny)
-        if w > 30 and h > 10:
-            roi = img_array[max(0, y-10):y+h+10, max(0, x-10):x+w+10]
             if roi.size > 0:
                 line_images.append(Image.fromarray(roi).convert("RGB"))
     return line_images
-def process_handwriting(input_img):
     if input_img is None:
-        return "Error: No image received."
     lines = get_lines(input_img)
-    # DEBUG FALLBACK: If segmentation fails, try the whole image once
     if not lines:
-        print("No lines detected, trying full image...")
         try:
-            full_img_pil = Image.fromarray(input_img).convert("RGB")
-            prediction = pipe(full_img_pil)
-            return f"Note: No lines detected. Full image result: {prediction[0]['generated_text']}"
-        except Exception as e:
-            return f"Error: Could not process image. Details: {str(e)}"
-    results = []
-    for i, line in enumerate(lines):
         try:
-            prediction = pipe(line)
-            text = prediction[0]['generated_text']
             if text.strip():
-                results.append(text.strip())
-        except Exception as e:
             continue
-    return "\n".join(results) if results else "The model couldn't read any text. Try a clearer image."
 demo = gr.Interface(
-    fn=process_handwriting,
-    inputs=gr.Image(type="numpy"),
-    outputs="text",
-    title="Handwritten to Typed Text",
-    description="If output is empty, try cropping the image closer to the text."
 )
 if __name__ == "__main__":

 import numpy as np
 from PIL import Image
 from transformers import pipeline
+import gc  # Essential for cleaning up RAM
+import torch
+# 1. Use the 'small' model to stay under the 16GB RAM limit
+MODEL_NAME = "microsoft/trocr-small-handwritten"
+print(f"Loading {MODEL_NAME}...")
+try:
+    # Use pipeline for memory-efficient loading
+    pipe = pipeline("image-to-text", model=MODEL_NAME, device=-1) # -1 forces CPU
+except Exception as e:
+    print(f"Load Error: {e}")
 def get_lines(img_array):
     gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+    # Adaptive Thresholding helps in various lighting conditions
     binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY_INV, 21, 10)
+    # Dilation connects letters into a single line block
     kernel = np.ones((5, 100), np.uint8)
     dilation = cv2.dilate(binary, kernel, iterations=1)
     contours, _ = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
     contours = sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[1])
     line_images = []
     for ctr in contours:
         x, y, w, h = cv2.boundingRect(ctr)
+        if w > 30 and h > 15:
+            # Crop with small padding
+            roi = img_array[max(0, y-5):y+h+5, max(0, x-5):x+w+5]
             if roi.size > 0:
                 line_images.append(Image.fromarray(roi).convert("RGB"))
+    # Cleanup OpenCV objects
+    del gray, binary, dilation
+    gc.collect()
     return line_images
+def process_image(input_img):
     if input_img is None:
+        return "Please upload an image."
     lines = get_lines(input_img)
+    # If segmentation fails, try the whole image as a backup
     if not lines:
         try:
+            full_img = Image.fromarray(input_img).convert("RGB")
+            # Downsize for safety
+            full_img.thumbnail((800, 800))
+            res = pipe(full_img)
+            return f"[Single Line Mode]: {res[0]['generated_text']}"
+        except:
+            return "No text detected."
+    final_results = []
+    for line in lines:
         try:
+            # Process one line
+            out = pipe(line)
+            text = out[0]['generated_text']
             if text.strip():
+                final_results.append(text.strip())
+        except Exception:
             continue
+        finally:
+            # Force RAM cleanup after EVERY line
+            gc.collect()
+    return "\n".join(final_results)
 demo = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="numpy", label="Upload Handwriting"),
+    outputs=gr.Textbox(label="Result"),
+    title="Stable Handwritten OCR (v3)",
+    description="Optimized for HF Free Tier. Uses TrOCR-Small and aggressive RAM management."
 )
 if __name__ == "__main__":