Update app.py
Browse files
app.py
CHANGED
|
@@ -578,6 +578,13 @@
|
|
| 578 |
# )
|
| 579 |
|
| 580 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 581 |
import base64
|
| 582 |
from PIL import Image
|
| 583 |
import re
|
|
@@ -635,7 +642,7 @@ IOU_MERGE_THRESHOLD = 0.4
|
|
| 635 |
IOA_SUPPRESSION_THRESHOLD = 0.7
|
| 636 |
|
| 637 |
# ============================================================================
|
| 638 |
-
# --- BOX COMBINATION LOGIC (
|
| 639 |
# ============================================================================
|
| 640 |
|
| 641 |
def calculate_iou(box1, box2):
|
|
@@ -680,9 +687,11 @@ def filter_nested_boxes(detections, ioa_threshold=0.80):
|
|
| 680 |
|
| 681 |
def merge_overlapping_boxes(detections, iou_threshold):
|
| 682 |
if not detections: return []
|
|
|
|
| 683 |
detections.sort(key=lambda d: d['conf'], reverse=True)
|
| 684 |
merged_detections = []
|
| 685 |
is_merged = [False] * len(detections)
|
|
|
|
| 686 |
for i in range(len(detections)):
|
| 687 |
if is_merged[i]: continue
|
| 688 |
current_box = detections[i]['coords']
|
|
@@ -700,14 +709,21 @@ def merge_overlapping_boxes(detections, iou_threshold):
|
|
| 700 |
is_merged[j] = True
|
| 701 |
merged_detections.append({
|
| 702 |
'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
|
| 703 |
-
'y1'
|
|
|
|
|
|
|
|
|
|
| 704 |
})
|
| 705 |
-
|
| 706 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 707 |
return merged_detections
|
| 708 |
|
| 709 |
# ============================================================================
|
| 710 |
-
# --- UTILITY FUNCTIONS ---
|
| 711 |
# ============================================================================
|
| 712 |
|
| 713 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
|
@@ -804,6 +820,8 @@ def run_yolo_detection_and_count(
|
|
| 804 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 805 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 806 |
|
|
|
|
|
|
|
| 807 |
for det in final_detections:
|
| 808 |
bbox = det["coords"]
|
| 809 |
crop_pil = crop_and_convert_to_pil(image, bbox)
|
|
@@ -829,10 +847,9 @@ def run_yolo_detection_and_count(
|
|
| 829 |
|
| 830 |
|
| 831 |
# ============================================================================
|
| 832 |
-
# --- MAIN DOCUMENT PROCESSING FUNCTION (
|
| 833 |
# ============================================================================
|
| 834 |
|
| 835 |
-
# The return type is updated to reflect the new structured output dictionary
|
| 836 |
def run_single_pdf_preprocessing(
|
| 837 |
pdf_path: str
|
| 838 |
) -> Tuple[int, int, int, str, float, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
|
|
@@ -852,7 +869,6 @@ def run_single_pdf_preprocessing(
|
|
| 852 |
|
| 853 |
|
| 854 |
# 1. Validation and Model Loading (YOLO)
|
| 855 |
-
# ... (Model loading logic retained)
|
| 856 |
t0 = time.time()
|
| 857 |
if not os.path.exists(pdf_path):
|
| 858 |
report = f"β FATAL ERROR: Input PDF not found at {pdf_path}."
|
|
@@ -944,7 +960,7 @@ def run_single_pdf_preprocessing(
|
|
| 944 |
|
| 945 |
# 4. Final Report Generation and Gallery Formatting
|
| 946 |
|
| 947 |
-
#
|
| 948 |
structured_latex_output = {
|
| 949 |
"Total Pages": total_pages,
|
| 950 |
"Total Equations": total_equation_count,
|
|
@@ -988,7 +1004,7 @@ def run_single_pdf_preprocessing(
|
|
| 988 |
|
| 989 |
|
| 990 |
# ============================================================================
|
| 991 |
-
# --- GRADIO INTERFACE FUNCTION & DEFINITION (
|
| 992 |
# ============================================================================
|
| 993 |
|
| 994 |
def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
|
|
@@ -1005,7 +1021,7 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[in
|
|
| 1005 |
num_figures,
|
| 1006 |
report,
|
| 1007 |
total_time,
|
| 1008 |
-
structured_latex_output,
|
| 1009 |
gallery_items
|
| 1010 |
) = run_single_pdf_preprocessing(pdf_path)
|
| 1011 |
|
|
@@ -1033,7 +1049,6 @@ if __name__ == "__main__":
|
|
| 1033 |
output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
|
| 1034 |
output_report = gr.Markdown(label="Processing Summary and Full Log")
|
| 1035 |
|
| 1036 |
-
# This JSON component now displays the structured output requested by the user
|
| 1037 |
output_structured_latex = gr.JSON(label="Structured LaTeX Output (EQUATIONx : <latex code>)")
|
| 1038 |
|
| 1039 |
output_gallery = gr.Gallery(
|
|
@@ -1052,12 +1067,12 @@ if __name__ == "__main__":
|
|
| 1052 |
output_equations,
|
| 1053 |
output_figures,
|
| 1054 |
output_report,
|
| 1055 |
-
output_structured_latex,
|
| 1056 |
output_gallery
|
| 1057 |
],
|
| 1058 |
-
title="π YOLO Detection & Math OCR Pipeline (
|
| 1059 |
description=(
|
| 1060 |
-
"Upload a PDF. YOLO detects equations/figures, and OCR converts equations to LaTeX.
|
| 1061 |
),
|
| 1062 |
)
|
| 1063 |
|
|
|
|
| 578 |
# )
|
| 579 |
|
| 580 |
|
| 581 |
+
|
| 582 |
+
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
|
| 587 |
+
|
| 588 |
import base64
|
| 589 |
from PIL import Image
|
| 590 |
import re
|
|
|
|
| 642 |
IOA_SUPPRESSION_THRESHOLD = 0.7
|
| 643 |
|
| 644 |
# ============================================================================
|
| 645 |
+
# --- BOX COMBINATION LOGIC (FIXED) ---
|
| 646 |
# ============================================================================
|
| 647 |
|
| 648 |
def calculate_iou(box1, box2):
|
|
|
|
| 687 |
|
| 688 |
def merge_overlapping_boxes(detections, iou_threshold):
|
| 689 |
if not detections: return []
|
| 690 |
+
# 1. Sort by confidence (YOLO standard)
|
| 691 |
detections.sort(key=lambda d: d['conf'], reverse=True)
|
| 692 |
merged_detections = []
|
| 693 |
is_merged = [False] * len(detections)
|
| 694 |
+
|
| 695 |
for i in range(len(detections)):
|
| 696 |
if is_merged[i]: continue
|
| 697 |
current_box = detections[i]['coords']
|
|
|
|
| 709 |
is_merged[j] = True
|
| 710 |
merged_detections.append({
|
| 711 |
'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
|
| 712 |
+
# 'y1' is retained for clarity, though 'coords' contains it
|
| 713 |
+
'y1': merged_y1,
|
| 714 |
+
'class': current_class,
|
| 715 |
+
'conf': detections[i]['conf']
|
| 716 |
})
|
| 717 |
+
|
| 718 |
+
# --- FIX IMPLEMENTATION: READING ORDER SORT ---
|
| 719 |
+
# Sort primarily by y1 (vertical position), secondarily by x1 (horizontal position).
|
| 720 |
+
# This correctly handles two-column layouts like Q.10 options (A), (B), (C), (D)
|
| 721 |
+
merged_detections.sort(key=lambda d: (d['coords'][1], d['coords'][0]))
|
| 722 |
+
|
| 723 |
return merged_detections
|
| 724 |
|
| 725 |
# ============================================================================
|
| 726 |
+
# --- UTILITY FUNCTIONS (Retained) ---
|
| 727 |
# ============================================================================
|
| 728 |
|
| 729 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
|
|
|
| 820 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 821 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 822 |
|
| 823 |
+
# Note: final_detections is now sorted by (y1, x1) in reading order.
|
| 824 |
+
|
| 825 |
for det in final_detections:
|
| 826 |
bbox = det["coords"]
|
| 827 |
crop_pil = crop_and_convert_to_pil(image, bbox)
|
|
|
|
| 847 |
|
| 848 |
|
| 849 |
# ============================================================================
|
| 850 |
+
# --- MAIN DOCUMENT PROCESSING FUNCTION (Retained Logic) ---
|
| 851 |
# ============================================================================
|
| 852 |
|
|
|
|
| 853 |
def run_single_pdf_preprocessing(
|
| 854 |
pdf_path: str
|
| 855 |
) -> Tuple[int, int, int, str, float, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
|
|
|
|
| 869 |
|
| 870 |
|
| 871 |
# 1. Validation and Model Loading (YOLO)
|
|
|
|
| 872 |
t0 = time.time()
|
| 873 |
if not os.path.exists(pdf_path):
|
| 874 |
report = f"β FATAL ERROR: Input PDF not found at {pdf_path}."
|
|
|
|
| 960 |
|
| 961 |
# 4. Final Report Generation and Gallery Formatting
|
| 962 |
|
| 963 |
+
# Create the structured JSON output as requested by the user
|
| 964 |
structured_latex_output = {
|
| 965 |
"Total Pages": total_pages,
|
| 966 |
"Total Equations": total_equation_count,
|
|
|
|
| 1004 |
|
| 1005 |
|
| 1006 |
# ============================================================================
|
| 1007 |
+
# --- GRADIO INTERFACE FUNCTION & DEFINITION (Retained) ---
|
| 1008 |
# ============================================================================
|
| 1009 |
|
| 1010 |
def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, Union[int, str]], List[Tuple[Image.Image, str]]]:
|
|
|
|
| 1021 |
num_figures,
|
| 1022 |
report,
|
| 1023 |
total_time,
|
| 1024 |
+
structured_latex_output,
|
| 1025 |
gallery_items
|
| 1026 |
) = run_single_pdf_preprocessing(pdf_path)
|
| 1027 |
|
|
|
|
| 1049 |
output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
|
| 1050 |
output_report = gr.Markdown(label="Processing Summary and Full Log")
|
| 1051 |
|
|
|
|
| 1052 |
output_structured_latex = gr.JSON(label="Structured LaTeX Output (EQUATIONx : <latex code>)")
|
| 1053 |
|
| 1054 |
output_gallery = gr.Gallery(
|
|
|
|
| 1067 |
output_equations,
|
| 1068 |
output_figures,
|
| 1069 |
output_report,
|
| 1070 |
+
output_structured_latex,
|
| 1071 |
output_gallery
|
| 1072 |
],
|
| 1073 |
+
title="π YOLO Detection & Math OCR Pipeline (Reading Order Fix)",
|
| 1074 |
description=(
|
| 1075 |
+
"Upload a PDF. YOLO detects equations/figures, and OCR converts equations to LaTeX. Now includes a fix for two-column reading order."
|
| 1076 |
),
|
| 1077 |
)
|
| 1078 |
|