Spaces:

iammraat
/

ocr

Sleeping

App Files Files Community

iammraat commited on Jan 24

Commit

75a0625

verified ·

1 Parent(s): 294cb1b

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -56

app.py CHANGED Viewed

@@ -368,7 +368,6 @@
 import gradio as gr
 import torch
 import numpy as np
@@ -385,69 +384,62 @@ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwrit
 # --- 2. SETUP PADDLEOCR ---
 print("Loading PaddleOCR...")
-# High resolution settings to detect faint text
 detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
                      det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
 # ==========================================
-# 🧠 LOGIC FIX 1: CONSOLIDATE OVERLAPS
 # ==========================================
-def calculate_iou(box1, box2):
-    """Calculates Intersection over Union (IoU) between two [x1, y1, x2, y2] boxes."""
     x1 = max(box1[0], box2[0])
     y1 = max(box1[1], box2[1])
     x2 = min(box1[2], box2[2])
     y2 = min(box1[3], box2[3])
-    # No intersection
     if x2 < x1 or y2 < y1:
         return 0.0
-    intersection = (x2 - x1) * (y2 - y1)
-    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
-    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
-    return intersection / float(area1 + area2 - intersection)
-def consolidate_boxes(boxes, iou_threshold=0.1):
     """
-    Iteratively merges any boxes that overlap significantly.
-    Input: List of [x1, y1, x2, y2]
     """
     if not boxes: return []
-    # Convert all to float for calc
-    active_boxes = [list(map(float, b)) for b in boxes]
-    changed = True
-    while changed:
-        changed = False
-        new_boxes = []
-        while active_boxes:
-            current = active_boxes.pop(0)
-            merged = False
-            # Check current box against all remaining boxes in the new list
-            for i, other in enumerate(new_boxes):
-                if calculate_iou(current, other) > iou_threshold:
-                    # Merge them: Take min of mins and max of maxes
-                    x1 = min(current[0], other[0])
-                    y1 = min(current[1], other[1])
-                    x2 = max(current[2], other[2])
-                    y2 = max(current[3], other[3])
-                    # Replace the existing box with the merged one
-                    new_boxes[i] = [x1, y1, x2, y2]
-                    merged = True
-                    changed = True # Flag to run another pass
-                    break
-            if not merged:
-                new_boxes.append(current)
-        active_boxes = new_boxes
-    return active_boxes
 # ==========================================
 # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
@@ -466,11 +458,10 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         y2 = np.max(box[:, 1])
         rects.append([x1, y1, x2, y2])
-    # 🔴 NEW STEP: Remove overlapping duplicates before line merging
-    # This prevents "double-reading" the same word
-    rects = consolidate_boxes(rects, iou_threshold=0.2)
-    # 2. Sort by Y center
     rects.sort(key=lambda r: (r[1] + r[3]) / 2)
     merged_lines = []
@@ -481,6 +472,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         remaining = []
         for r in rects:
             r_y_center = (r[1] + r[3]) / 2
             if abs(r_y_center - line_y_center) < y_thresh:
                 current_line.append(r)
             else:
@@ -488,7 +480,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         rects = remaining
-        # 3. Create Line Box
         lx1 = min(r[0] for r in current_line)
         ly1 = min(r[1] for r in current_line)
         lx2 = max(r[2] for r in current_line)
@@ -496,10 +488,11 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
         merged_lines.append([lx1, ly1, lx2, ly2])
-    # 4. Sort by Y
     merged_lines.sort(key=lambda r: r[1])
     return merged_lines
 def process_image(image):
     if image is None: return None, [], "Please upload an image."
     image_np = np.array(image.convert("RGB"))
@@ -513,7 +506,7 @@ def process_image(image):
     if dt_boxes is None or len(dt_boxes) == 0:
         return image, [], "No text detected."
-    # PROCESS (Consolidate -> Merge Lines)
     line_boxes = merge_boxes_into_lines(dt_boxes)
     annotated_img = image_np.copy()
@@ -527,7 +520,7 @@ def process_image(image):
         if (x2 - x1) < 20 or (y2 - y1) < 15:
             continue
-        # Draw Straight Rectangle (Green)
         cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
         # PADDING
@@ -555,7 +548,7 @@ def process_image(image):
 # --- UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# ⚡ Smart Line-Level OCR")
     with gr.Row():
         with gr.Column(scale=1):
@@ -563,11 +556,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             btn = gr.Button("Transcribe", variant="primary")
         with gr.Column(scale=1):
-            output_img = gr.Image(label="Detected Lines (Merged & Consolidated)")
             output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
     with gr.Row():
-        gallery = gr.Gallery(label="Line Crops", columns=4, height=200)
     btn.click(process_image, input_img, [output_img, gallery, output_txt])

 import gradio as gr
 import torch
 import numpy as np
 # --- 2. SETUP PADDLEOCR ---
 print("Loading PaddleOCR...")
+# High resolution to catch faint text
 detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
                      det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
 # ==========================================
+# 🧠 LOGIC FIX 1: REMOVE NESTED BOXES
 # ==========================================
+def calculate_overlap_area(box1, box2):
+    """Calculates the intersection area between two boxes."""
     x1 = max(box1[0], box2[0])
     y1 = max(box1[1], box2[1])
     x2 = min(box1[2], box2[2])
     y2 = min(box1[3], box2[3])
     if x2 < x1 or y2 < y1:
         return 0.0
+    return (x2 - x1) * (y2 - y1)
+def filter_nested_boxes(boxes, containment_thresh=0.80):
     """
+    Removes boxes that are mostly contained within other larger boxes.
     """
     if not boxes: return []
+    # Convert all to [x1, y1, x2, y2, area]
+    active = []
+    for b in boxes:
+        area = (b[2] - b[0]) * (b[3] - b[1])
+        active.append(list(b) + [area])
+    # Sort by area (Largest to Smallest) - Crucial!
+    # We want to keep the big 'parent' box and delete the small 'child' box.
+    active.sort(key=lambda x: x[4], reverse=True)
+    final_boxes = []
+    for i, current in enumerate(active):
+        is_nested = False
+        curr_area = current[4]
+        # Check against all boxes we've already accepted (which are bigger/same size)
+        for kept in final_boxes:
+            overlap = calculate_overlap_area(current, kept)
+            # Check if 'current' is inside 'kept'
+            # If >80% of current box is covered by kept box, it's a duplicate/nested box
+            if (overlap / curr_area) > containment_thresh:
+                is_nested = True
+                break
+        if not is_nested:
+            final_boxes.append(current[:4]) # Store only coord, drop area
+    return final_boxes
 # ==========================================
 # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
         y2 = np.max(box[:, 1])
         rects.append([x1, y1, x2, y2])
+    # 🔴 STEP 2: Filter Nested Boxes (Remove the 'child' boxes)
+    rects = filter_nested_boxes(rects)
+    # 3. Sort by Y center
     rects.sort(key=lambda r: (r[1] + r[3]) / 2)
     merged_lines = []
         remaining = []
         for r in rects:
             r_y_center = (r[1] + r[3]) / 2
+            # If Y-center is close (same horizontal line)
             if abs(r_y_center - line_y_center) < y_thresh:
                 current_line.append(r)
             else:
         rects = remaining
+        # 4. Create Line Box
         lx1 = min(r[0] for r in current_line)
         ly1 = min(r[1] for r in current_line)
         lx2 = max(r[2] for r in current_line)
         merged_lines.append([lx1, ly1, lx2, ly2])
+    # Final Sort by Y
     merged_lines.sort(key=lambda r: r[1])
     return merged_lines
 def process_image(image):
     if image is None: return None, [], "Please upload an image."
     image_np = np.array(image.convert("RGB"))
     if dt_boxes is None or len(dt_boxes) == 0:
         return image, [], "No text detected."
+    # PROCESS (Filter Nested -> Merge Lines)
     line_boxes = merge_boxes_into_lines(dt_boxes)
     annotated_img = image_np.copy()
         if (x2 - x1) < 20 or (y2 - y1) < 15:
             continue
+        # Draw (Green)
         cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
         # PADDING
 # --- UI ---
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# ⚡ Smart Line-Level OCR (Cleaned)")
     with gr.Row():
         with gr.Column(scale=1):
             btn = gr.Button("Transcribe", variant="primary")
         with gr.Column(scale=1):
+            output_img = gr.Image(label="Cleaned Lines (Green Boxes)")
             output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
     with gr.Row():
+        gallery = gr.Gallery(label="Final Line Crops", columns=4, height=200)
     btn.click(process_image, input_img, [output_img, gallery, output_txt])