iammraat commited on
Commit
75a0625
·
verified ·
1 Parent(s): 294cb1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -56
app.py CHANGED
@@ -368,7 +368,6 @@
368
 
369
 
370
 
371
-
372
  import gradio as gr
373
  import torch
374
  import numpy as np
@@ -385,69 +384,62 @@ model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwrit
385
 
386
  # --- 2. SETUP PADDLEOCR ---
387
  print("Loading PaddleOCR...")
388
- # High resolution settings to detect faint text
389
  detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
390
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
391
 
 
392
  # ==========================================
393
- # 🧠 LOGIC FIX 1: CONSOLIDATE OVERLAPS
394
  # ==========================================
395
- def calculate_iou(box1, box2):
396
- """Calculates Intersection over Union (IoU) between two [x1, y1, x2, y2] boxes."""
397
  x1 = max(box1[0], box2[0])
398
  y1 = max(box1[1], box2[1])
399
  x2 = min(box1[2], box2[2])
400
  y2 = min(box1[3], box2[3])
401
-
402
- # No intersection
403
  if x2 < x1 or y2 < y1:
404
  return 0.0
 
405
 
406
- intersection = (x2 - x1) * (y2 - y1)
407
- area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
408
- area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
409
-
410
- return intersection / float(area1 + area2 - intersection)
411
-
412
- def consolidate_boxes(boxes, iou_threshold=0.1):
413
  """
414
- Iteratively merges any boxes that overlap significantly.
415
- Input: List of [x1, y1, x2, y2]
416
  """
417
  if not boxes: return []
418
 
419
- # Convert all to float for calc
420
- active_boxes = [list(map(float, b)) for b in boxes]
 
 
 
 
 
 
 
421
 
422
- changed = True
423
- while changed:
424
- changed = False
425
- new_boxes = []
426
- while active_boxes:
427
- current = active_boxes.pop(0)
428
- merged = False
 
 
429
 
430
- # Check current box against all remaining boxes in the new list
431
- for i, other in enumerate(new_boxes):
432
- if calculate_iou(current, other) > iou_threshold:
433
- # Merge them: Take min of mins and max of maxes
434
- x1 = min(current[0], other[0])
435
- y1 = min(current[1], other[1])
436
- x2 = max(current[2], other[2])
437
- y2 = max(current[3], other[3])
438
-
439
- # Replace the existing box with the merged one
440
- new_boxes[i] = [x1, y1, x2, y2]
441
- merged = True
442
- changed = True # Flag to run another pass
443
- break
444
 
445
- if not merged:
446
- new_boxes.append(current)
447
-
448
- active_boxes = new_boxes
449
-
450
- return active_boxes
451
 
452
  # ==========================================
453
  # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
@@ -466,11 +458,10 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
466
  y2 = np.max(box[:, 1])
467
  rects.append([x1, y1, x2, y2])
468
 
469
- # 🔴 NEW STEP: Remove overlapping duplicates before line merging
470
- # This prevents "double-reading" the same word
471
- rects = consolidate_boxes(rects, iou_threshold=0.2)
472
 
473
- # 2. Sort by Y center
474
  rects.sort(key=lambda r: (r[1] + r[3]) / 2)
475
 
476
  merged_lines = []
@@ -481,6 +472,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
481
  remaining = []
482
  for r in rects:
483
  r_y_center = (r[1] + r[3]) / 2
 
484
  if abs(r_y_center - line_y_center) < y_thresh:
485
  current_line.append(r)
486
  else:
@@ -488,7 +480,7 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
488
 
489
  rects = remaining
490
 
491
- # 3. Create Line Box
492
  lx1 = min(r[0] for r in current_line)
493
  ly1 = min(r[1] for r in current_line)
494
  lx2 = max(r[2] for r in current_line)
@@ -496,10 +488,11 @@ def merge_boxes_into_lines(raw_boxes, y_thresh=30):
496
 
497
  merged_lines.append([lx1, ly1, lx2, ly2])
498
 
499
- # 4. Sort by Y
500
  merged_lines.sort(key=lambda r: r[1])
501
  return merged_lines
502
 
 
503
  def process_image(image):
504
  if image is None: return None, [], "Please upload an image."
505
  image_np = np.array(image.convert("RGB"))
@@ -513,7 +506,7 @@ def process_image(image):
513
  if dt_boxes is None or len(dt_boxes) == 0:
514
  return image, [], "No text detected."
515
 
516
- # PROCESS (Consolidate -> Merge Lines)
517
  line_boxes = merge_boxes_into_lines(dt_boxes)
518
 
519
  annotated_img = image_np.copy()
@@ -527,7 +520,7 @@ def process_image(image):
527
  if (x2 - x1) < 20 or (y2 - y1) < 15:
528
  continue
529
 
530
- # Draw Straight Rectangle (Green)
531
  cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
532
 
533
  # PADDING
@@ -555,7 +548,7 @@ def process_image(image):
555
 
556
  # --- UI ---
557
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
558
- gr.Markdown("# ⚡ Smart Line-Level OCR")
559
 
560
  with gr.Row():
561
  with gr.Column(scale=1):
@@ -563,11 +556,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
563
  btn = gr.Button("Transcribe", variant="primary")
564
 
565
  with gr.Column(scale=1):
566
- output_img = gr.Image(label="Detected Lines (Merged & Consolidated)")
567
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
568
 
569
  with gr.Row():
570
- gallery = gr.Gallery(label="Line Crops", columns=4, height=200)
571
 
572
  btn.click(process_image, input_img, [output_img, gallery, output_txt])
573
 
 
368
 
369
 
370
 
 
371
  import gradio as gr
372
  import torch
373
  import numpy as np
 
384
 
385
  # --- 2. SETUP PADDLEOCR ---
386
  print("Loading PaddleOCR...")
387
+ # High resolution to catch faint text
388
  detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
389
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
390
 
391
+
392
  # ==========================================
393
+ # 🧠 LOGIC FIX 1: REMOVE NESTED BOXES
394
  # ==========================================
395
+ def calculate_overlap_area(box1, box2):
396
+ """Calculates the intersection area between two boxes."""
397
  x1 = max(box1[0], box2[0])
398
  y1 = max(box1[1], box2[1])
399
  x2 = min(box1[2], box2[2])
400
  y2 = min(box1[3], box2[3])
401
+
 
402
  if x2 < x1 or y2 < y1:
403
  return 0.0
404
+ return (x2 - x1) * (y2 - y1)
405
 
406
+ def filter_nested_boxes(boxes, containment_thresh=0.80):
 
 
 
 
 
 
407
  """
408
+ Removes boxes that are mostly contained within other larger boxes.
 
409
  """
410
  if not boxes: return []
411
 
412
+ # Convert all to [x1, y1, x2, y2, area]
413
+ active = []
414
+ for b in boxes:
415
+ area = (b[2] - b[0]) * (b[3] - b[1])
416
+ active.append(list(b) + [area])
417
+
418
+ # Sort by area (Largest to Smallest) - Crucial!
419
+ # We want to keep the big 'parent' box and delete the small 'child' box.
420
+ active.sort(key=lambda x: x[4], reverse=True)
421
 
422
+ final_boxes = []
423
+
424
+ for i, current in enumerate(active):
425
+ is_nested = False
426
+ curr_area = current[4]
427
+
428
+ # Check against all boxes we've already accepted (which are bigger/same size)
429
+ for kept in final_boxes:
430
+ overlap = calculate_overlap_area(current, kept)
431
 
432
+ # Check if 'current' is inside 'kept'
433
+ # If >80% of current box is covered by kept box, it's a duplicate/nested box
434
+ if (overlap / curr_area) > containment_thresh:
435
+ is_nested = True
436
+ break
437
+
438
+ if not is_nested:
439
+ final_boxes.append(current[:4]) # Store only coord, drop area
 
 
 
 
 
 
440
 
441
+ return final_boxes
442
+
 
 
 
 
443
 
444
  # ==========================================
445
  # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
 
458
  y2 = np.max(box[:, 1])
459
  rects.append([x1, y1, x2, y2])
460
 
461
+ # 🔴 STEP 2: Filter Nested Boxes (Remove the 'child' boxes)
462
+ rects = filter_nested_boxes(rects)
 
463
 
464
+ # 3. Sort by Y center
465
  rects.sort(key=lambda r: (r[1] + r[3]) / 2)
466
 
467
  merged_lines = []
 
472
  remaining = []
473
  for r in rects:
474
  r_y_center = (r[1] + r[3]) / 2
475
+ # If Y-center is close (same horizontal line)
476
  if abs(r_y_center - line_y_center) < y_thresh:
477
  current_line.append(r)
478
  else:
 
480
 
481
  rects = remaining
482
 
483
+ # 4. Create Line Box
484
  lx1 = min(r[0] for r in current_line)
485
  ly1 = min(r[1] for r in current_line)
486
  lx2 = max(r[2] for r in current_line)
 
488
 
489
  merged_lines.append([lx1, ly1, lx2, ly2])
490
 
491
+ # Final Sort by Y
492
  merged_lines.sort(key=lambda r: r[1])
493
  return merged_lines
494
 
495
+
496
  def process_image(image):
497
  if image is None: return None, [], "Please upload an image."
498
  image_np = np.array(image.convert("RGB"))
 
506
  if dt_boxes is None or len(dt_boxes) == 0:
507
  return image, [], "No text detected."
508
 
509
+ # PROCESS (Filter Nested -> Merge Lines)
510
  line_boxes = merge_boxes_into_lines(dt_boxes)
511
 
512
  annotated_img = image_np.copy()
 
520
  if (x2 - x1) < 20 or (y2 - y1) < 15:
521
  continue
522
 
523
+ # Draw (Green)
524
  cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
525
 
526
  # PADDING
 
548
 
549
  # --- UI ---
550
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
551
+ gr.Markdown("# ⚡ Smart Line-Level OCR (Cleaned)")
552
 
553
  with gr.Row():
554
  with gr.Column(scale=1):
 
556
  btn = gr.Button("Transcribe", variant="primary")
557
 
558
  with gr.Column(scale=1):
559
+ output_img = gr.Image(label="Cleaned Lines (Green Boxes)")
560
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
561
 
562
  with gr.Row():
563
+ gallery = gr.Gallery(label="Final Line Crops", columns=4, height=200)
564
 
565
  btn.click(process_image, input_img, [output_img, gallery, output_txt])
566