iammraat commited on
Commit
294cb1b
·
verified ·
1 Parent(s): 39b580c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -17
app.py CHANGED
@@ -369,8 +369,6 @@
369
 
370
 
371
 
372
-
373
-
374
  import gradio as gr
375
  import torch
376
  import numpy as np
@@ -392,17 +390,73 @@ detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
392
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
393
 
394
  # ==========================================
395
- # 🧠 LOGIC FIX: MERGE WORDS INTO LINES
396
  # ==========================================
397
- def merge_boxes_into_lines(raw_boxes, y_thresh=30):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  """
399
- Takes scattered word boxes and glues them into clean line boxes.
 
400
  """
401
- # 🔴 FIX: Check length explicitly to avoid NumPy Ambiguity Error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  if raw_boxes is None or len(raw_boxes) == 0:
403
  return []
404
 
405
- # 1. Convert all polygons to Axis-Aligned Rectangles [x1, y1, x2, y2]
406
  rects = []
407
  for box in raw_boxes:
408
  box = np.array(box).astype(np.float32)
@@ -412,20 +466,21 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
412
  y2 = np.max(box[:, 1])
413
  rects.append([x1, y1, x2, y2])
414
 
415
- # 2. Sort by Y center to handle lines top-to-bottom
 
 
 
 
416
  rects.sort(key=lambda r: (r[1] + r[3]) / 2)
417
 
418
  merged_lines = []
419
  while rects:
420
- # Start a new line with the first box
421
  current_line = [rects.pop(0)]
422
  line_y_center = (current_line[0][1] + current_line[0][3]) / 2
423
 
424
- # Find all other boxes that belong to this vertical line
425
  remaining = []
426
  for r in rects:
427
  r_y_center = (r[1] + r[3]) / 2
428
- # If Y-center is close enough (within 30px), it's the same line
429
  if abs(r_y_center - line_y_center) < y_thresh:
430
  current_line.append(r)
431
  else:
@@ -433,7 +488,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
433
 
434
  rects = remaining
435
 
436
- # 3. Create a single bounding box for this entire line
437
  lx1 = min(r[0] for r in current_line)
438
  ly1 = min(r[1] for r in current_line)
439
  lx2 = max(r[2] for r in current_line)
@@ -441,7 +496,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
441
 
442
  merged_lines.append([lx1, ly1, lx2, ly2])
443
 
444
- # 4. Final Sort by Y position
445
  merged_lines.sort(key=lambda r: r[1])
446
  return merged_lines
447
 
@@ -451,16 +506,14 @@ def process_image(image):
451
 
452
  # DETECT
453
  try:
454
- # We bypass the .ocr() wrapper to avoid 'if not boxes' bug inside library
455
  dt_boxes, _ = detector.text_detector(image_np)
456
  except Exception as e:
457
  return image, [], f"Detection Error: {str(e)}"
458
 
459
- # Check explicitly (Fixes the crash you just saw)
460
  if dt_boxes is None or len(dt_boxes) == 0:
461
  return image, [], "No text detected."
462
 
463
- # MERGE (Word -> Line Level)
464
  line_boxes = merge_boxes_into_lines(dt_boxes)
465
 
466
  annotated_img = image_np.copy()
@@ -510,7 +563,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
510
  btn = gr.Button("Transcribe", variant="primary")
511
 
512
  with gr.Column(scale=1):
513
- output_img = gr.Image(label="Detected Lines (Green Boxes)")
514
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
515
 
516
  with gr.Row():
 
369
 
370
 
371
 
 
 
372
  import gradio as gr
373
  import torch
374
  import numpy as np
 
390
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
391
 
392
  # ==========================================
393
+ # 🧠 LOGIC FIX 1: CONSOLIDATE OVERLAPS
394
  # ==========================================
395
+ def calculate_iou(box1, box2):
396
+ """Calculates Intersection over Union (IoU) between two [x1, y1, x2, y2] boxes."""
397
+ x1 = max(box1[0], box2[0])
398
+ y1 = max(box1[1], box2[1])
399
+ x2 = min(box1[2], box2[2])
400
+ y2 = min(box1[3], box2[3])
401
+
402
+ # No intersection
403
+ if x2 < x1 or y2 < y1:
404
+ return 0.0
405
+
406
+ intersection = (x2 - x1) * (y2 - y1)
407
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
408
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
409
+
410
+ return intersection / float(area1 + area2 - intersection)
411
+
412
+ def consolidate_boxes(boxes, iou_threshold=0.1):
413
  """
414
+ Iteratively merges any boxes that overlap significantly.
415
+ Input: List of [x1, y1, x2, y2]
416
  """
417
+ if not boxes: return []
418
+
419
+ # Convert all to float for calc
420
+ active_boxes = [list(map(float, b)) for b in boxes]
421
+
422
+ changed = True
423
+ while changed:
424
+ changed = False
425
+ new_boxes = []
426
+ while active_boxes:
427
+ current = active_boxes.pop(0)
428
+ merged = False
429
+
430
+ # Check current box against all remaining boxes in the new list
431
+ for i, other in enumerate(new_boxes):
432
+ if calculate_iou(current, other) > iou_threshold:
433
+ # Merge them: Take min of mins and max of maxes
434
+ x1 = min(current[0], other[0])
435
+ y1 = min(current[1], other[1])
436
+ x2 = max(current[2], other[2])
437
+ y2 = max(current[3], other[3])
438
+
439
+ # Replace the existing box with the merged one
440
+ new_boxes[i] = [x1, y1, x2, y2]
441
+ merged = True
442
+ changed = True # Flag to run another pass
443
+ break
444
+
445
+ if not merged:
446
+ new_boxes.append(current)
447
+
448
+ active_boxes = new_boxes
449
+
450
+ return active_boxes
451
+
452
+ # ==========================================
453
+ # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
454
+ # ==========================================
455
+ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
456
  if raw_boxes is None or len(raw_boxes) == 0:
457
  return []
458
 
459
+ # 1. Convert raw polygons to Axis-Aligned Rectangles
460
  rects = []
461
  for box in raw_boxes:
462
  box = np.array(box).astype(np.float32)
 
466
  y2 = np.max(box[:, 1])
467
  rects.append([x1, y1, x2, y2])
468
 
469
+ # 🔴 NEW STEP: Remove overlapping duplicates before line merging
470
+ # This prevents "double-reading" the same word
471
+ rects = consolidate_boxes(rects, iou_threshold=0.2)
472
+
473
+ # 2. Sort by Y center
474
  rects.sort(key=lambda r: (r[1] + r[3]) / 2)
475
 
476
  merged_lines = []
477
  while rects:
 
478
  current_line = [rects.pop(0)]
479
  line_y_center = (current_line[0][1] + current_line[0][3]) / 2
480
 
 
481
  remaining = []
482
  for r in rects:
483
  r_y_center = (r[1] + r[3]) / 2
 
484
  if abs(r_y_center - line_y_center) < y_thresh:
485
  current_line.append(r)
486
  else:
 
488
 
489
  rects = remaining
490
 
491
+ # 3. Create Line Box
492
  lx1 = min(r[0] for r in current_line)
493
  ly1 = min(r[1] for r in current_line)
494
  lx2 = max(r[2] for r in current_line)
 
496
 
497
  merged_lines.append([lx1, ly1, lx2, ly2])
498
 
499
+ # 4. Sort by Y
500
  merged_lines.sort(key=lambda r: r[1])
501
  return merged_lines
502
 
 
506
 
507
  # DETECT
508
  try:
 
509
  dt_boxes, _ = detector.text_detector(image_np)
510
  except Exception as e:
511
  return image, [], f"Detection Error: {str(e)}"
512
 
 
513
  if dt_boxes is None or len(dt_boxes) == 0:
514
  return image, [], "No text detected."
515
 
516
+ # PROCESS (Consolidate -> Merge Lines)
517
  line_boxes = merge_boxes_into_lines(dt_boxes)
518
 
519
  annotated_img = image_np.copy()
 
563
  btn = gr.Button("Transcribe", variant="primary")
564
 
565
  with gr.Column(scale=1):
566
+ output_img = gr.Image(label="Detected Lines (Merged & Consolidated)")
567
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
568
 
569
  with gr.Row():