Spaces:

iammraat
/

ocr

Sleeping

App Files Files Community

iammraat commited on 20 days ago

Commit

294cb1b

verified ·

1 Parent(s): 39b580c

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -17

app.py CHANGED Viewed

@@ -369,8 +369,6 @@
 import gradio as gr
 import torch
 import numpy as np
@@ -392,17 +390,73 @@ detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
                      det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
 # ==========================================
-# 🧠 LOGIC FIX: MERGE WORDS INTO LINES
 # ==========================================
-def merge_boxes_into_lines(raw_boxes, y_thresh=30):
     """
-    Takes scattered word boxes and glues them into clean line boxes.
     """
-    # 🔴 FIX: Check length explicitly to avoid NumPy Ambiguity Error
     if raw_boxes is None or len(raw_boxes) == 0:
         return []
-    # 1. Convert all polygons to Axis-Aligned Rectangles [x1, y1, x2, y2]
     rects = []
     for box in raw_boxes:
         box = np.array(box).astype(np.float32)
@@ -412,20 +466,21 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         y2 = np.max(box[:, 1])
         rects.append([x1, y1, x2, y2])
-    # 2. Sort by Y center to handle lines top-to-bottom
     rects.sort(key=lambda r: (r[1] + r[3]) / 2)
     merged_lines = []
     while rects:
-        # Start a new line with the first box
         current_line = [rects.pop(0)]
         line_y_center = (current_line[0][1] + current_line[0][3]) / 2
-        # Find all other boxes that belong to this vertical line
         remaining = []
         for r in rects:
             r_y_center = (r[1] + r[3]) / 2
-            # If Y-center is close enough (within 30px), it's the same line
             if abs(r_y_center - line_y_center) < y_thresh:
                 current_line.append(r)
             else:
@@ -433,7 +488,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         rects = remaining
-        # 3. Create a single bounding box for this entire line
         lx1 = min(r[0] for r in current_line)
         ly1 = min(r[1] for r in current_line)
         lx2 = max(r[2] for r in current_line)
@@ -441,7 +496,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         merged_lines.append([lx1, ly1, lx2, ly2])
-    # 4. Final Sort by Y position
     merged_lines.sort(key=lambda r: r[1])
     return merged_lines
@@ -451,16 +506,14 @@ def process_image(image):
     # DETECT
     try:
-        # We bypass the .ocr() wrapper to avoid 'if not boxes' bug inside library
         dt_boxes, _ = detector.text_detector(image_np)
     except Exception as e:
         return image, [], f"Detection Error: {str(e)}"
-    # Check explicitly (Fixes the crash you just saw)
     if dt_boxes is None or len(dt_boxes) == 0:
         return image, [], "No text detected."
-    # MERGE (Word -> Line Level)
     line_boxes = merge_boxes_into_lines(dt_boxes)
     annotated_img = image_np.copy()
@@ -510,7 +563,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             btn = gr.Button("Transcribe", variant="primary")
         with gr.Column(scale=1):
-            output_img = gr.Image(label="Detected Lines (Green Boxes)")
             output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
     with gr.Row():

 import gradio as gr
 import torch
 import numpy as np
                      det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
 # ==========================================
+# 🧠 LOGIC FIX 1: CONSOLIDATE OVERLAPS
 # ==========================================
+def calculate_iou(box1, box2):
+    """Calculates Intersection over Union (IoU) between two [x1, y1, x2, y2] boxes."""
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    # No intersection
+    if x2 < x1 or y2 < y1:
+        return 0.0
+    intersection = (x2 - x1) * (y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    return intersection / float(area1 + area2 - intersection)
+def consolidate_boxes(boxes, iou_threshold=0.1):
     """
+    Iteratively merges any boxes that overlap significantly.
+    Input: List of [x1, y1, x2, y2]
     """
+    if not boxes: return []
+    # Convert all to float for calc
+    active_boxes = [list(map(float, b)) for b in boxes]
+    changed = True
+    while changed:
+        changed = False
+        new_boxes = []
+        while active_boxes:
+            current = active_boxes.pop(0)
+            merged = False
+            # Check current box against all remaining boxes in the new list
+            for i, other in enumerate(new_boxes):
+                if calculate_iou(current, other) > iou_threshold:
+                    # Merge them: Take min of mins and max of maxes
+                    x1 = min(current[0], other[0])
+                    y1 = min(current[1], other[1])
+                    x2 = max(current[2], other[2])
+                    y2 = max(current[3], other[3])
+                    # Replace the existing box with the merged one
+                    new_boxes[i] = [x1, y1, x2, y2]
+                    merged = True
+                    changed = True # Flag to run another pass
+                    break
+            if not merged:
+                new_boxes.append(current)
+        active_boxes = new_boxes
+    return active_boxes
+# ==========================================
+# 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
+# ==========================================
+def merge_boxes_into_lines(raw_boxes, y_thresh=30):
     if raw_boxes is None or len(raw_boxes) == 0:
         return []
+    # 1. Convert raw polygons to Axis-Aligned Rectangles
     rects = []
     for box in raw_boxes:
         box = np.array(box).astype(np.float32)
         y2 = np.max(box[:, 1])
         rects.append([x1, y1, x2, y2])
+    # 🔴 NEW STEP: Remove overlapping duplicates before line merging
+    # This prevents "double-reading" the same word
+    rects = consolidate_boxes(rects, iou_threshold=0.2)
+    # 2. Sort by Y center
     rects.sort(key=lambda r: (r[1] + r[3]) / 2)
     merged_lines = []
     while rects:
         current_line = [rects.pop(0)]
         line_y_center = (current_line[0][1] + current_line[0][3]) / 2
         remaining = []
         for r in rects:
             r_y_center = (r[1] + r[3]) / 2
             if abs(r_y_center - line_y_center) < y_thresh:
                 current_line.append(r)
             else:
         rects = remaining
+        # 3. Create Line Box
         lx1 = min(r[0] for r in current_line)
         ly1 = min(r[1] for r in current_line)
         lx2 = max(r[2] for r in current_line)
         merged_lines.append([lx1, ly1, lx2, ly2])
+    # 4. Sort by Y
     merged_lines.sort(key=lambda r: r[1])
     return merged_lines
     # DETECT
     try:
         dt_boxes, _ = detector.text_detector(image_np)
     except Exception as e:
         return image, [], f"Detection Error: {str(e)}"
     if dt_boxes is None or len(dt_boxes) == 0:
         return image, [], "No text detected."
+    # PROCESS (Consolidate -> Merge Lines)
     line_boxes = merge_boxes_into_lines(dt_boxes)
     annotated_img = image_np.copy()
             btn = gr.Button("Transcribe", variant="primary")
         with gr.Column(scale=1):
+            output_img = gr.Image(label="Detected Lines (Merged & Consolidated)")
             output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
     with gr.Row():