heerjtdev commited on
Commit
b0b67f8
Β·
verified Β·
1 Parent(s): 06eecf1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -15
app.py CHANGED
@@ -578,6 +578,13 @@
578
  # )
579
 
580
 
 
 
 
 
 
 
 
581
  import base64
582
  from PIL import Image
583
  import re
@@ -635,7 +642,7 @@ IOU_MERGE_THRESHOLD = 0.4
635
  IOA_SUPPRESSION_THRESHOLD = 0.7
636
 
637
  # ============================================================================
638
- # --- BOX COMBINATION LOGIC (Retained) ---
639
  # ============================================================================
640
 
641
  def calculate_iou(box1, box2):
@@ -680,9 +687,11 @@ def filter_nested_boxes(detections, ioa_threshold=0.80):
680
 
681
  def merge_overlapping_boxes(detections, iou_threshold):
682
  if not detections: return []
 
683
  detections.sort(key=lambda d: d['conf'], reverse=True)
684
  merged_detections = []
685
  is_merged = [False] * len(detections)
 
686
  for i in range(len(detections)):
687
  if is_merged[i]: continue
688
  current_box = detections[i]['coords']
@@ -700,14 +709,21 @@ def merge_overlapping_boxes(detections, iou_threshold):
700
  is_merged[j] = True
701
  merged_detections.append({
702
  'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
703
- 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
 
 
 
704
  })
705
- # This step ensures top-to-bottom reading order for sequential numbering (EQUATION1, EQUATION2, etc.)
706
- merged_detections.sort(key=lambda d: d['y1'])
 
 
 
 
707
  return merged_detections
708
 
709
  # ============================================================================
710
- # --- UTILITY FUNCTIONS ---
711
  # ============================================================================
712
 
713
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
@@ -804,6 +820,8 @@ def run_yolo_detection_and_count(
804
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
805
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
806
 
 
 
807
  for det in final_detections:
808
  bbox = det["coords"]
809
  crop_pil = crop_and_convert_to_pil(image, bbox)
@@ -829,10 +847,9 @@ def run_yolo_detection_and_count(
829
 
830
 
831
  # ============================================================================
832
- # --- MAIN DOCUMENT PROCESSING FUNCTION (MODIFIED OUTPUT) ---
833
  # ============================================================================
834
 
835
- # The return type is updated to reflect the new structured output dictionary
836
  def run_single_pdf_preprocessing(
837
  pdf_path: str
838
  ) -> Tuple[int, int, int, str, float, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
@@ -852,7 +869,6 @@ def run_single_pdf_preprocessing(
852
 
853
 
854
  # 1. Validation and Model Loading (YOLO)
855
- # ... (Model loading logic retained)
856
  t0 = time.time()
857
  if not os.path.exists(pdf_path):
858
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
@@ -944,7 +960,7 @@ def run_single_pdf_preprocessing(
944
 
945
  # 4. Final Report Generation and Gallery Formatting
946
 
947
- # --- NEW: Create the structured JSON output as requested by the user ---
948
  structured_latex_output = {
949
  "Total Pages": total_pages,
950
  "Total Equations": total_equation_count,
@@ -988,7 +1004,7 @@ def run_single_pdf_preprocessing(
988
 
989
 
990
  # ============================================================================
991
- # --- GRADIO INTERFACE FUNCTION & DEFINITION (MODIFIED OUTPUT) ---
992
  # ============================================================================
993
 
994
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
@@ -1005,7 +1021,7 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[in
1005
  num_figures,
1006
  report,
1007
  total_time,
1008
- structured_latex_output, # Variable name changed to match the new output
1009
  gallery_items
1010
  ) = run_single_pdf_preprocessing(pdf_path)
1011
 
@@ -1033,7 +1049,6 @@ if __name__ == "__main__":
1033
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
1034
  output_report = gr.Markdown(label="Processing Summary and Full Log")
1035
 
1036
- # This JSON component now displays the structured output requested by the user
1037
  output_structured_latex = gr.JSON(label="Structured LaTeX Output (EQUATIONx : <latex code>)")
1038
 
1039
  output_gallery = gr.Gallery(
@@ -1052,12 +1067,12 @@ if __name__ == "__main__":
1052
  output_equations,
1053
  output_figures,
1054
  output_report,
1055
- output_structured_latex, # Updated component
1056
  output_gallery
1057
  ],
1058
- title="πŸ“Š YOLO Detection & Math OCR Pipeline (Structured Output)",
1059
  description=(
1060
- "Upload a PDF. YOLO detects equations/figures, and OCR converts equations to LaTeX. See the Structured LaTeX Output panel for the requested format."
1061
  ),
1062
  )
1063
 
 
578
  # )
579
 
580
 
581
+
582
+
583
+
584
+
585
+
586
+
587
+
588
  import base64
589
  from PIL import Image
590
  import re
 
642
  IOA_SUPPRESSION_THRESHOLD = 0.7
643
 
644
  # ============================================================================
645
+ # --- BOX COMBINATION LOGIC (FIXED) ---
646
  # ============================================================================
647
 
648
  def calculate_iou(box1, box2):
 
687
 
688
  def merge_overlapping_boxes(detections, iou_threshold):
689
  if not detections: return []
690
+ # 1. Sort by confidence (YOLO standard)
691
  detections.sort(key=lambda d: d['conf'], reverse=True)
692
  merged_detections = []
693
  is_merged = [False] * len(detections)
694
+
695
  for i in range(len(detections)):
696
  if is_merged[i]: continue
697
  current_box = detections[i]['coords']
 
709
  is_merged[j] = True
710
  merged_detections.append({
711
  'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
712
+ # 'y1' is retained for clarity, though 'coords' contains it
713
+ 'y1': merged_y1,
714
+ 'class': current_class,
715
+ 'conf': detections[i]['conf']
716
  })
717
+
718
+ # --- FIX IMPLEMENTATION: READING ORDER SORT ---
719
+ # Sort primarily by y1 (vertical position), secondarily by x1 (horizontal position).
720
+ # This correctly handles two-column layouts like Q.10 options (A), (B), (C), (D)
721
+ merged_detections.sort(key=lambda d: (d['coords'][1], d['coords'][0]))
722
+
723
  return merged_detections
724
 
725
  # ============================================================================
726
+ # --- UTILITY FUNCTIONS (Retained) ---
727
  # ============================================================================
728
 
729
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 
820
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
821
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
822
 
823
+ # Note: final_detections is now sorted by (y1, x1) in reading order.
824
+
825
  for det in final_detections:
826
  bbox = det["coords"]
827
  crop_pil = crop_and_convert_to_pil(image, bbox)
 
847
 
848
 
849
  # ============================================================================
850
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Retained Logic) ---
851
  # ============================================================================
852
 
 
853
  def run_single_pdf_preprocessing(
854
  pdf_path: str
855
  ) -> Tuple[int, int, int, str, float, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
 
869
 
870
 
871
  # 1. Validation and Model Loading (YOLO)
 
872
  t0 = time.time()
873
  if not os.path.exists(pdf_path):
874
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
 
960
 
961
  # 4. Final Report Generation and Gallery Formatting
962
 
963
+ # Create the structured JSON output as requested by the user
964
  structured_latex_output = {
965
  "Total Pages": total_pages,
966
  "Total Equations": total_equation_count,
 
1004
 
1005
 
1006
  # ============================================================================
1007
+ # --- GRADIO INTERFACE FUNCTION & DEFINITION (Retained) ---
1008
  # ============================================================================
1009
 
1010
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
 
1021
  num_figures,
1022
  report,
1023
  total_time,
1024
+ structured_latex_output,
1025
  gallery_items
1026
  ) = run_single_pdf_preprocessing(pdf_path)
1027
 
 
1049
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
1050
  output_report = gr.Markdown(label="Processing Summary and Full Log")
1051
 
 
1052
  output_structured_latex = gr.JSON(label="Structured LaTeX Output (EQUATIONx : <latex code>)")
1053
 
1054
  output_gallery = gr.Gallery(
 
1067
  output_equations,
1068
  output_figures,
1069
  output_report,
1070
+ output_structured_latex,
1071
  output_gallery
1072
  ],
1073
+ title="πŸ“Š YOLO Detection & Math OCR Pipeline (Reading Order Fix)",
1074
  description=(
1075
+ "Upload a PDF. YOLO detects equations/figures, and OCR converts equations to LaTeX. Now includes a fix for two-column reading order."
1076
  ),
1077
  )
1078