iammraat commited on
Commit
81dee09
·
verified ·
1 Parent(s): 75a0625

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +341 -102
app.py CHANGED
@@ -368,6 +368,229 @@
368
 
369
 
370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  import gradio as gr
372
  import torch
373
  import numpy as np
@@ -376,24 +599,18 @@ from PIL import Image
376
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
377
  from paddleocr import PaddleOCR
378
 
379
- # --- 1. SETUP TR-OCR ---
380
  device = "cuda" if torch.cuda.is_available() else "cpu"
381
  print(f"Loading TrOCR on {device}...")
382
  processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
383
  model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten').to(device).eval()
384
 
385
- # --- 2. SETUP PADDLEOCR ---
386
  print("Loading PaddleOCR...")
387
- # High resolution to catch faint text
388
  detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
389
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
390
 
391
-
392
- # ==========================================
393
- # 🧠 LOGIC FIX 1: REMOVE NESTED BOXES
394
- # ==========================================
395
- def calculate_overlap_area(box1, box2):
396
- """Calculates the intersection area between two boxes."""
397
  x1 = max(box1[0], box2[0])
398
  y1 = max(box1[1], box2[1])
399
  x2 = min(box1[2], box2[2])
@@ -401,112 +618,139 @@ def calculate_overlap_area(box1, box2):
401
 
402
  if x2 < x1 or y2 < y1:
403
  return 0.0
404
- return (x2 - x1) * (y2 - y1)
 
 
 
 
 
405
 
406
- def filter_nested_boxes(boxes, containment_thresh=0.80):
407
- """
408
- Removes boxes that are mostly contained within other larger boxes.
409
- """
410
- if not boxes: return []
411
 
412
- # Convert all to [x1, y1, x2, y2, area]
413
- active = []
414
  for b in boxes:
415
  area = (b[2] - b[0]) * (b[3] - b[1])
416
- active.append(list(b) + [area])
417
-
418
- # Sort by area (Largest to Smallest) - Crucial!
419
- # We want to keep the big 'parent' box and delete the small 'child' box.
420
- active.sort(key=lambda x: x[4], reverse=True)
421
 
422
- final_boxes = []
 
423
 
424
- for i, current in enumerate(active):
425
- is_nested = False
426
- curr_area = current[4]
427
-
428
- # Check against all boxes we've already accepted (which are bigger/same size)
429
- for kept in final_boxes:
430
- overlap = calculate_overlap_area(current, kept)
431
-
432
- # Check if 'current' is inside 'kept'
433
- # If >80% of current box is covered by kept box, it's a duplicate/nested box
434
- if (overlap / curr_area) > containment_thresh:
435
- is_nested = True
436
  break
437
 
438
- if not is_nested:
439
- final_boxes.append(current[:4]) # Store only coord, drop area
440
-
441
- return final_boxes
442
-
443
 
444
- # ==========================================
445
- # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
446
- # ==========================================
447
- def merge_boxes_into_lines(raw_boxes, y_thresh=30):
448
- if raw_boxes is None or len(raw_boxes) == 0:
449
  return []
450
-
451
- # 1. Convert raw polygons to Axis-Aligned Rectangles
452
  rects = []
453
  for box in raw_boxes:
454
  box = np.array(box).astype(np.float32)
455
- x1 = np.min(box[:, 0])
456
- y1 = np.min(box[:, 1])
457
- x2 = np.max(box[:, 0])
458
- y2 = np.max(box[:, 1])
459
  rects.append([x1, y1, x2, y2])
460
-
461
- # 🔴 STEP 2: Filter Nested Boxes (Remove the 'child' boxes)
462
- rects = filter_nested_boxes(rects)
463
-
464
- # 3. Sort by Y center
465
- rects.sort(key=lambda r: (r[1] + r[3]) / 2)
466
-
467
- merged_lines = []
468
- while rects:
469
- current_line = [rects.pop(0)]
470
- line_y_center = (current_line[0][1] + current_line[0][3]) / 2
471
-
472
- remaining = []
473
- for r in rects:
474
- r_y_center = (r[1] + r[3]) / 2
475
- # If Y-center is close (same horizontal line)
476
- if abs(r_y_center - line_y_center) < y_thresh:
477
- current_line.append(r)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  else:
479
- remaining.append(r)
480
-
481
- rects = remaining
482
-
483
- # 4. Create Line Box
484
- lx1 = min(r[0] for r in current_line)
485
- ly1 = min(r[1] for r in current_line)
486
- lx2 = max(r[2] for r in current_line)
487
- ly2 = max(r[3] for r in current_line)
488
-
489
- merged_lines.append([lx1, ly1, lx2, ly2])
490
-
491
- # Final Sort by Y
492
- merged_lines.sort(key=lambda r: r[1])
493
- return merged_lines
494
-
495
 
496
  def process_image(image):
497
- if image is None: return None, [], "Please upload an image."
 
 
498
  image_np = np.array(image.convert("RGB"))
499
-
500
- # DETECT
501
  try:
502
  dt_boxes, _ = detector.text_detector(image_np)
503
  except Exception as e:
504
  return image, [], f"Detection Error: {str(e)}"
505
-
506
  if dt_boxes is None or len(dt_boxes) == 0:
507
  return image, [], "No text detected."
508
 
509
- # PROCESS (Filter Nested -> Merge Lines)
510
  line_boxes = merge_boxes_into_lines(dt_boxes)
511
 
512
  annotated_img = image_np.copy()
@@ -516,14 +760,11 @@ def process_image(image):
516
  for box in line_boxes:
517
  x1, y1, x2, y2 = map(int, box)
518
 
519
- # Filter Noise
520
  if (x2 - x1) < 20 or (y2 - y1) < 15:
521
  continue
522
-
523
- # Draw (Green)
524
  cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
525
 
526
- # PADDING
527
  PAD = 10
528
  h, w, _ = image_np.shape
529
  x1 = max(0, x1 - PAD)
@@ -535,20 +776,18 @@ def process_image(image):
535
  pil_crop = Image.fromarray(crop)
536
  debug_crops.append(pil_crop)
537
 
538
- # RECOGNIZE
539
  with torch.no_grad():
540
  pixel_values = processor(images=pil_crop, return_tensors="pt").pixel_values.to(device)
541
  generated_ids = model.generate(pixel_values)
542
  text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
543
  if text.strip():
544
  results.append(text)
545
-
546
  full_text = "\n".join(results)
547
  return Image.fromarray(annotated_img), debug_crops, full_text
548
 
549
- # --- UI ---
550
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
551
- gr.Markdown("# ⚡ Smart Line-Level OCR (Cleaned)")
552
 
553
  with gr.Row():
554
  with gr.Column(scale=1):
@@ -556,12 +795,12 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
556
  btn = gr.Button("Transcribe", variant="primary")
557
 
558
  with gr.Column(scale=1):
559
- output_img = gr.Image(label="Cleaned Lines (Green Boxes)")
560
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
561
-
562
  with gr.Row():
563
- gallery = gr.Gallery(label="Final Line Crops", columns=4, height=200)
564
-
565
  btn.click(process_image, input_img, [output_img, gallery, output_txt])
566
 
567
  if __name__ == "__main__":
 
368
 
369
 
370
 
371
+
372
+
373
+
374
+
375
+
376
+
377
+
378
+
379
+ # import gradio as gr
380
+ # import torch
381
+ # import numpy as np
382
+ # import cv2
383
+ # from PIL import Image
384
+ # from transformers import TrOCRProcessor, VisionEncoderDecoderModel
385
+ # from paddleocr import PaddleOCR
386
+
387
+ # # --- 1. SETUP TR-OCR ---
388
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
389
+ # print(f"Loading TrOCR on {device}...")
390
+ # processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
391
+ # model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten').to(device).eval()
392
+
393
+ # # --- 2. SETUP PADDLEOCR ---
394
+ # print("Loading PaddleOCR...")
395
+ # # High resolution to catch faint text
396
+ # detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
397
+ # det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
398
+
399
+
400
+ # # ==========================================
401
+ # # 🧠 LOGIC FIX 1: REMOVE NESTED BOXES
402
+ # # ==========================================
403
+ # def calculate_overlap_area(box1, box2):
404
+ # """Calculates the intersection area between two boxes."""
405
+ # x1 = max(box1[0], box2[0])
406
+ # y1 = max(box1[1], box2[1])
407
+ # x2 = min(box1[2], box2[2])
408
+ # y2 = min(box1[3], box2[3])
409
+
410
+ # if x2 < x1 or y2 < y1:
411
+ # return 0.0
412
+ # return (x2 - x1) * (y2 - y1)
413
+
414
+ # def filter_nested_boxes(boxes, containment_thresh=0.80):
415
+ # """
416
+ # Removes boxes that are mostly contained within other larger boxes.
417
+ # """
418
+ # if not boxes: return []
419
+
420
+ # # Convert all to [x1, y1, x2, y2, area]
421
+ # active = []
422
+ # for b in boxes:
423
+ # area = (b[2] - b[0]) * (b[3] - b[1])
424
+ # active.append(list(b) + [area])
425
+
426
+ # # Sort by area (Largest to Smallest) - Crucial!
427
+ # # We want to keep the big 'parent' box and delete the small 'child' box.
428
+ # active.sort(key=lambda x: x[4], reverse=True)
429
+
430
+ # final_boxes = []
431
+
432
+ # for i, current in enumerate(active):
433
+ # is_nested = False
434
+ # curr_area = current[4]
435
+
436
+ # # Check against all boxes we've already accepted (which are bigger/same size)
437
+ # for kept in final_boxes:
438
+ # overlap = calculate_overlap_area(current, kept)
439
+
440
+ # # Check if 'current' is inside 'kept'
441
+ # # If >80% of current box is covered by kept box, it's a duplicate/nested box
442
+ # if (overlap / curr_area) > containment_thresh:
443
+ # is_nested = True
444
+ # break
445
+
446
+ # if not is_nested:
447
+ # final_boxes.append(current[:4]) # Store only coord, drop area
448
+
449
+ # return final_boxes
450
+
451
+
452
+ # # ==========================================
453
+ # # 🧠 LOGIC FIX 2: MERGE WORDS INTO LINES
454
+ # # ==========================================
455
+ # def merge_boxes_into_lines(raw_boxes, y_thresh=30):
456
+ # if raw_boxes is None or len(raw_boxes) == 0:
457
+ # return []
458
+
459
+ # # 1. Convert raw polygons to Axis-Aligned Rectangles
460
+ # rects = []
461
+ # for box in raw_boxes:
462
+ # box = np.array(box).astype(np.float32)
463
+ # x1 = np.min(box[:, 0])
464
+ # y1 = np.min(box[:, 1])
465
+ # x2 = np.max(box[:, 0])
466
+ # y2 = np.max(box[:, 1])
467
+ # rects.append([x1, y1, x2, y2])
468
+
469
+ # # 🔴 STEP 2: Filter Nested Boxes (Remove the 'child' boxes)
470
+ # rects = filter_nested_boxes(rects)
471
+
472
+ # # 3. Sort by Y center
473
+ # rects.sort(key=lambda r: (r[1] + r[3]) / 2)
474
+
475
+ # merged_lines = []
476
+ # while rects:
477
+ # current_line = [rects.pop(0)]
478
+ # line_y_center = (current_line[0][1] + current_line[0][3]) / 2
479
+
480
+ # remaining = []
481
+ # for r in rects:
482
+ # r_y_center = (r[1] + r[3]) / 2
483
+ # # If Y-center is close (same horizontal line)
484
+ # if abs(r_y_center - line_y_center) < y_thresh:
485
+ # current_line.append(r)
486
+ # else:
487
+ # remaining.append(r)
488
+
489
+ # rects = remaining
490
+
491
+ # # 4. Create Line Box
492
+ # lx1 = min(r[0] for r in current_line)
493
+ # ly1 = min(r[1] for r in current_line)
494
+ # lx2 = max(r[2] for r in current_line)
495
+ # ly2 = max(r[3] for r in current_line)
496
+
497
+ # merged_lines.append([lx1, ly1, lx2, ly2])
498
+
499
+ # # Final Sort by Y
500
+ # merged_lines.sort(key=lambda r: r[1])
501
+ # return merged_lines
502
+
503
+
504
+ # def process_image(image):
505
+ # if image is None: return None, [], "Please upload an image."
506
+ # image_np = np.array(image.convert("RGB"))
507
+
508
+ # # DETECT
509
+ # try:
510
+ # dt_boxes, _ = detector.text_detector(image_np)
511
+ # except Exception as e:
512
+ # return image, [], f"Detection Error: {str(e)}"
513
+
514
+ # if dt_boxes is None or len(dt_boxes) == 0:
515
+ # return image, [], "No text detected."
516
+
517
+ # # PROCESS (Filter Nested -> Merge Lines)
518
+ # line_boxes = merge_boxes_into_lines(dt_boxes)
519
+
520
+ # annotated_img = image_np.copy()
521
+ # results = []
522
+ # debug_crops = []
523
+
524
+ # for box in line_boxes:
525
+ # x1, y1, x2, y2 = map(int, box)
526
+
527
+ # # Filter Noise
528
+ # if (x2 - x1) < 20 or (y2 - y1) < 15:
529
+ # continue
530
+
531
+ # # Draw (Green)
532
+ # cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
533
+
534
+ # # PADDING
535
+ # PAD = 10
536
+ # h, w, _ = image_np.shape
537
+ # x1 = max(0, x1 - PAD)
538
+ # y1 = max(0, y1 - PAD)
539
+ # x2 = min(w, x2 + PAD)
540
+ # y2 = min(h, y2 + PAD)
541
+
542
+ # crop = image_np[y1:y2, x1:x2]
543
+ # pil_crop = Image.fromarray(crop)
544
+ # debug_crops.append(pil_crop)
545
+
546
+ # # RECOGNIZE
547
+ # with torch.no_grad():
548
+ # pixel_values = processor(images=pil_crop, return_tensors="pt").pixel_values.to(device)
549
+ # generated_ids = model.generate(pixel_values)
550
+ # text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
551
+ # if text.strip():
552
+ # results.append(text)
553
+
554
+ # full_text = "\n".join(results)
555
+ # return Image.fromarray(annotated_img), debug_crops, full_text
556
+
557
+ # # --- UI ---
558
+ # with gr.Blocks(theme=gr.themes.Soft()) as demo:
559
+ # gr.Markdown("# ⚡ Smart Line-Level OCR (Cleaned)")
560
+
561
+ # with gr.Row():
562
+ # with gr.Column(scale=1):
563
+ # input_img = gr.Image(type="pil", label="Upload Image")
564
+ # btn = gr.Button("Transcribe", variant="primary")
565
+
566
+ # with gr.Column(scale=1):
567
+ # output_img = gr.Image(label="Cleaned Lines (Green Boxes)")
568
+ # output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
569
+
570
+ # with gr.Row():
571
+ # gallery = gr.Gallery(label="Final Line Crops", columns=4, height=200)
572
+
573
+ # btn.click(process_image, input_img, [output_img, gallery, output_txt])
574
+
575
+ # if __name__ == "__main__":
576
+ # demo.launch()
577
+
578
+
579
+
580
+
581
+
582
+
583
+
584
+
585
+
586
+
587
+
588
+
589
+
590
+
591
+
592
+
593
+
594
  import gradio as gr
595
  import torch
596
  import numpy as np
 
599
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
600
  from paddleocr import PaddleOCR
601
 
602
+ # Setup
603
  device = "cuda" if torch.cuda.is_available() else "cpu"
604
  print(f"Loading TrOCR on {device}...")
605
  processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
606
  model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten').to(device).eval()
607
 
 
608
  print("Loading PaddleOCR...")
 
609
  detector = PaddleOCR(use_angle_cls=True, lang='en', show_log=False,
610
  det_limit_side_len=2500, det_db_thresh=0.1, det_db_box_thresh=0.3)
611
 
612
+ def calculate_iou(box1, box2):
613
+ """Calculate Intersection over Union"""
 
 
 
 
614
  x1 = max(box1[0], box2[0])
615
  y1 = max(box1[1], box2[1])
616
  x2 = min(box1[2], box2[2])
 
618
 
619
  if x2 < x1 or y2 < y1:
620
  return 0.0
621
+
622
+ intersection = (x2 - x1) * (y2 - y1)
623
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
624
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
625
+
626
+ return intersection / min(area1, area2)
627
 
628
+ def remove_nested_boxes(boxes, iou_thresh=0.7):
629
+ """Remove boxes that are nested inside others"""
630
+ if not boxes:
631
+ return []
 
632
 
633
+ # Add area to each box
634
+ boxes_with_area = []
635
  for b in boxes:
636
  area = (b[2] - b[0]) * (b[3] - b[1])
637
+ boxes_with_area.append((*b, area))
 
 
 
 
638
 
639
+ # Sort by area descending (keep larger boxes)
640
+ boxes_with_area.sort(key=lambda x: x[4], reverse=True)
641
 
642
+ keep = []
643
+ for i, current in enumerate(boxes_with_area):
644
+ should_keep = True
645
+ curr_box = current[:4]
646
+
647
+ for kept in keep:
648
+ iou = calculate_iou(curr_box, kept)
649
+ if iou > iou_thresh:
650
+ should_keep = False
 
 
 
651
  break
652
 
653
+ if should_keep:
654
+ keep.append(curr_box)
655
+
656
+ return keep
 
657
 
658
+ def merge_boxes_into_lines(raw_boxes, y_overlap_thresh=0.5, x_gap_thresh=100):
659
+ """Merge boxes into lines with better horizontal merging"""
660
+ if not raw_boxes or len(raw_boxes) == 0:
 
 
661
  return []
662
+
663
+ # Convert polygons to rectangles
664
  rects = []
665
  for box in raw_boxes:
666
  box = np.array(box).astype(np.float32)
667
+ x1, y1 = np.min(box[:, 0]), np.min(box[:, 1])
668
+ x2, y2 = np.max(box[:, 0]), np.max(box[:, 1])
 
 
669
  rects.append([x1, y1, x2, y2])
670
+
671
+ # Remove nested boxes
672
+ rects = remove_nested_boxes(rects)
673
+
674
+ if not rects:
675
+ return []
676
+
677
+ # Sort by Y position
678
+ rects.sort(key=lambda r: r[1])
679
+
680
+ # Group into lines based on Y overlap
681
+ lines = []
682
+ current_line = [rects[0]]
683
+
684
+ for rect in rects[1:]:
685
+ # Check if rect belongs to current line
686
+ line_y1 = min(r[1] for r in current_line)
687
+ line_y2 = max(r[3] for r in current_line)
688
+ line_height = line_y2 - line_y1
689
+
690
+ rect_y1, rect_y2 = rect[1], rect[3]
691
+ rect_height = rect_y2 - rect_y1
692
+
693
+ # Calculate vertical overlap
694
+ overlap_y1 = max(line_y1, rect_y1)
695
+ overlap_y2 = min(line_y2, rect_y2)
696
+ overlap = max(0, overlap_y2 - overlap_y1)
697
+
698
+ # If significant vertical overlap, it's the same line
699
+ if overlap > y_overlap_thresh * min(line_height, rect_height):
700
+ current_line.append(rect)
701
+ else:
702
+ # Save current line and start new one
703
+ lines.append(current_line)
704
+ current_line = [rect]
705
+
706
+ lines.append(current_line)
707
+
708
+ # Merge boxes in each line
709
+ merged = []
710
+ for line in lines:
711
+ # Sort line boxes left to right
712
+ line.sort(key=lambda r: r[0])
713
+
714
+ # Merge horizontally close boxes
715
+ merged_line = [line[0]]
716
+ for rect in line[1:]:
717
+ last = merged_line[-1]
718
+ # If close horizontally, merge
719
+ if rect[0] - last[2] < x_gap_thresh:
720
+ merged_line[-1] = [
721
+ min(last[0], rect[0]),
722
+ min(last[1], rect[1]),
723
+ max(last[2], rect[2]),
724
+ max(last[3], rect[3])
725
+ ]
726
  else:
727
+ merged_line.append(rect)
728
+
729
+ # Final merge: combine all boxes in line into one
730
+ x1 = min(r[0] for r in merged_line)
731
+ y1 = min(r[1] for r in merged_line)
732
+ x2 = max(r[2] for r in merged_line)
733
+ y2 = max(r[3] for r in merged_line)
734
+ merged.append([x1, y1, x2, y2])
735
+
736
+ # Sort by Y
737
+ merged.sort(key=lambda r: r[1])
738
+ return merged
 
 
 
 
739
 
740
  def process_image(image):
741
+ if image is None:
742
+ return None, [], "Please upload an image."
743
+
744
  image_np = np.array(image.convert("RGB"))
745
+
 
746
  try:
747
  dt_boxes, _ = detector.text_detector(image_np)
748
  except Exception as e:
749
  return image, [], f"Detection Error: {str(e)}"
750
+
751
  if dt_boxes is None or len(dt_boxes) == 0:
752
  return image, [], "No text detected."
753
 
 
754
  line_boxes = merge_boxes_into_lines(dt_boxes)
755
 
756
  annotated_img = image_np.copy()
 
760
  for box in line_boxes:
761
  x1, y1, x2, y2 = map(int, box)
762
 
 
763
  if (x2 - x1) < 20 or (y2 - y1) < 15:
764
  continue
765
+
 
766
  cv2.rectangle(annotated_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
767
 
 
768
  PAD = 10
769
  h, w, _ = image_np.shape
770
  x1 = max(0, x1 - PAD)
 
776
  pil_crop = Image.fromarray(crop)
777
  debug_crops.append(pil_crop)
778
 
 
779
  with torch.no_grad():
780
  pixel_values = processor(images=pil_crop, return_tensors="pt").pixel_values.to(device)
781
  generated_ids = model.generate(pixel_values)
782
  text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
783
  if text.strip():
784
  results.append(text)
785
+
786
  full_text = "\n".join(results)
787
  return Image.fromarray(annotated_img), debug_crops, full_text
788
 
 
789
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
790
+ gr.Markdown("# ⚡ Smart Line-Level OCR (Fixed)")
791
 
792
  with gr.Row():
793
  with gr.Column(scale=1):
 
795
  btn = gr.Button("Transcribe", variant="primary")
796
 
797
  with gr.Column(scale=1):
798
+ output_img = gr.Image(label="Detected Lines")
799
  output_txt = gr.Textbox(label="Extracted Text", lines=15, show_copy_button=True)
800
+
801
  with gr.Row():
802
+ gallery = gr.Gallery(label="Line Crops", columns=4, height=200)
803
+
804
  btn.click(process_image, input_img, [output_img, gallery, output_txt])
805
 
806
  if __name__ == "__main__":