Spaces:

heerjtdev
/

feeedback

Running

App Files Files Community

heerjtdev commited on Dec 8, 2025

Commit

40b8823

verified ·

1 Parent(s): 12f4426

Update app.py

Browse files

Files changed (1) hide show

app.py +394 -38

app.py CHANGED Viewed

@@ -1,16 +1,323 @@
 import fitz  # PyMuPDF
 import numpy as np
 import cv2
 import torch
 import torch.serialization
 import os
-from typing import Optional, Tuple
 from ultralytics import YOLO
 import logging
 import gradio as gr
 import shutil
 import tempfile
-import json # Still needed for simple JSON logging
 # ============================================================================
 # --- Global Patches and Setup ---
@@ -23,6 +330,7 @@ def patched_torch_load(*args, **kwargs):
     return _original_torch_load(*args, **kwargs)
 torch.load = patched_torch_load
 logging.basicConfig(level=logging.WARNING)
 # ============================================================================
@@ -30,8 +338,9 @@ logging.basicConfig(level=logging.WARNING)
 # ============================================================================
 WEIGHTS_PATH = 'best.pt'
-# Detection parameters (Required for your box combination logic)
 CONF_THRESHOLD = 0.2
 TARGET_CLASSES = ['figure', 'equation']
 IOU_MERGE_THRESHOLD = 0.4
@@ -42,9 +351,11 @@ GLOBAL_FIGURE_COUNT = 0
 GLOBAL_EQUATION_COUNT = 0
 # ============================================================================
-# --- BOX COMBINATION LOGIC (Retained from your original script) ---
 # ============================================================================
 def calculate_iou(box1, box2):
     x1_a, y1_a, x2_a, y2_a = box1
     x1_b, y1_b, x2_b, y2_b = box2
@@ -112,7 +423,7 @@ def merge_overlapping_boxes(detections, iou_threshold):
     return merged_detections
 # ============================================================================
-# --- UTILITY FUNCTIONS (Minimally Required) ---
 # ============================================================================
 def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
@@ -129,10 +440,10 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 def run_yolo_detection_and_count(
         image: np.ndarray, model: YOLO, page_num: int
-) -> Tuple[int, int]:
     """
     Runs YOLO inference, applies NMS/filtering, and updates global counters.
-    Returns the counts for the current page.
     """
     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
@@ -142,6 +453,7 @@ def run_yolo_detection_and_count(
     try:
         # Run prediction
         results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
         if results and results[0].boxes:
@@ -157,61 +469,71 @@ def run_yolo_detection_and_count(
                     })
     except Exception as e:
         logging.error(f"YOLO inference failed on page {page_num}: {e}")
-        return 0, 0
     # Apply NMS/Merging/Filtering based on your provided logic
     merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
     final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
     # Update Global Counters
     for det in final_detections:
         if det['class'] == 'figure':
             GLOBAL_FIGURE_COUNT += 1
             page_figures += 1
         elif det['class'] == 'equation':
             GLOBAL_EQUATION_COUNT += 1
             page_equations += 1
     logging.warning(f"  -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
-    return page_equations, page_figures
 # ============================================================================
-# --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
 # ============================================================================
-def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
     """
-    Runs the pipeline and returns just the counts and a report.
-    No intermediate JSON saving or complex output structure.
     """
     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
-    # Reset globals
     GLOBAL_FIGURE_COUNT = 0
     GLOBAL_EQUATION_COUNT = 0
     if not os.path.exists(pdf_path):
         report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
-        return 0, 0, 0, report
-    # Model Loading (CRITICAL: Requires best.pt)
     try:
         model = YOLO(WEIGHTS_PATH)
-        logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
     except Exception as e:
         report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
-        return 0, 0, 0, report
     try:
         doc = fitz.open(pdf_path)
         total_pages = doc.page_count
-        logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
     except Exception as e:
         report = f"❌ ERROR loading PDF file: {e}"
-        return 0, 0, 0, report
-    mat = fitz.Matrix(2.0, 2.0)
     for page_num_0_based in range(doc.page_count):
         fitz_page = doc.load_page(page_num_0_based)
@@ -224,9 +546,33 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
             logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
             continue
-        # Core Detection and Counting
-        run_yolo_detection_and_count(original_img, model, page_num)
     doc.close()
     # Final Report Generation
@@ -237,32 +583,34 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
         f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
     )
-    return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
 # ============================================================================
-# --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
 # ============================================================================
-def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
     """
-    Gradio wrapper function to handle file upload and return all results as strings.
     """
     if pdf_file is None:
-        return "N/A", "N/A", "N/A", "Please upload a PDF file."
     pdf_path = pdf_file.name
     try:
         # Run the core logic
-        num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
-        # Return results as formatted strings
-        return str(num_pages), str(num_equations), str(num_figures), report
     except Exception as e:
         error_msg = f"An unexpected error occurred: {e}"
-        return "Error", "Error", "Error", error_msg
 # ============================================================================
@@ -276,20 +624,28 @@ if __name__ == "__main__":
     input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
-    # Outputs are now discrete number fields and a final markdown report
     output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
     output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
     output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
     output_report = gr.Markdown(label="Processing Summary")
     interface = gr.Interface(
         fn=gradio_process_pdf,
         inputs=input_file,
-        outputs=[output_pages, output_equations, output_figures, output_report],
-        title="🎯 Minimalist YOLO Counting for PDF Elements",
         description=(
-            "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
-            "and get the total counts for pages, equations, and figures."
         ),
     )

+# import fitz  # PyMuPDF
+# import numpy as np
+# import cv2
+# import torch
+# import torch.serialization
+# import os
+# from typing import Optional, Tuple
+# from ultralytics import YOLO
+# import logging
+# import gradio as gr
+# import shutil
+# import tempfile
+# import json # Still needed for simple JSON logging
+# # ============================================================================
+# # --- Global Patches and Setup ---
+# # ============================================================================
+# # Patch torch.load to prevent weights_only error with older models
+# _original_torch_load = torch.load
+# def patched_torch_load(*args, **kwargs):
+#     kwargs["weights_only"] = False
+#     return _original_torch_load(*args, **kwargs)
+# torch.load = patched_torch_load
+# logging.basicConfig(level=logging.WARNING)
+# # ============================================================================
+# # --- CONFIGURATION AND CONSTANTS ---
+# # ============================================================================
+# WEIGHTS_PATH = 'best.pt'
+# # Detection parameters (Required for your box combination logic)
+# CONF_THRESHOLD = 0.2
+# TARGET_CLASSES = ['figure', 'equation']
+# IOU_MERGE_THRESHOLD = 0.4
+# IOA_SUPPRESSION_THRESHOLD = 0.7
+# # Global counters (Reset per run)
+# GLOBAL_FIGURE_COUNT = 0
+# GLOBAL_EQUATION_COUNT = 0
+# # ============================================================================
+# # --- BOX COMBINATION LOGIC (Retained from your original script) ---
+# # ============================================================================
+# def calculate_iou(box1, box2):
+#     x1_a, y1_a, x2_a, y2_a = box1
+#     x1_b, y1_b, x2_b, y2_b = box2
+#     x_left = max(x1_a, x1_b)
+#     y_top = max(y1_a, y1_b)
+#     x_right = min(x2_a, x2_b)
+#     y_bottom = min(y2_a, y2_b)
+#     intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
+#     box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
+#     box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
+#     union_area = float(box_a_area + box_b_area - intersection_area)
+#     return intersection_area / union_area if union_area > 0 else 0
+# def filter_nested_boxes(detections, ioa_threshold=0.80):
+#     if not detections: return []
+#     for d in detections:
+#         x1, y1, x2, y2 = d['coords']
+#         d['area'] = (x2 - x1) * (y2 - y1)
+#     detections.sort(key=lambda x: x['area'], reverse=True)
+#     keep_indices = []
+#     is_suppressed = [False] * len(detections)
+#     for i in range(len(detections)):
+#         if is_suppressed[i]: continue
+#         keep_indices.append(i)
+#         box_a = detections[i]['coords']
+#         for j in range(i + 1, len(detections)):
+#             if is_suppressed[j]: continue
+#             box_b = detections[j]['coords']
+#             x_left = max(box_a[0], box_b[0])
+#             y_top = max(box_a[1], box_b[1])
+#             x_right = min(box_a[2], box_b[2])
+#             y_bottom = min(box_a[3], box_b[3])
+#             intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
+#             area_b = detections[j]['area']
+#             if area_b > 0 and intersection / area_b > ioa_threshold:
+#                 is_suppressed[j] = True
+#     return [detections[i] for i in keep_indices]
+# def merge_overlapping_boxes(detections, iou_threshold):
+#     if not detections: return []
+#     detections.sort(key=lambda d: d['conf'], reverse=True)
+#     merged_detections = []
+#     is_merged = [False] * len(detections)
+#     for i in range(len(detections)):
+#         if is_merged[i]: continue
+#         current_box = detections[i]['coords']
+#         current_class = detections[i]['class']
+#         merged_x1, merged_y1, merged_x2, merged_y2 = current_box
+#         for j in range(i + 1, len(detections)):
+#             if is_merged[j] or detections[j]['class'] != current_class: continue
+#             other_box = detections[j]['coords']
+#             iou = calculate_iou(current_box, other_box)
+#             if iou > iou_threshold:
+#                 merged_x1 = min(merged_x1, other_box[0])
+#                 merged_y1 = min(merged_y1, other_box[1])
+#                 merged_x2 = max(merged_x2, other_box[2])
+#                 merged_y2 = max(merged_y2, other_box[3])
+#                 is_merged[j] = True
+#         merged_detections.append({
+#             'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
+#             'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
+#         })
+#     return merged_detections
+# # ============================================================================
+# # --- UTILITY FUNCTIONS (Minimally Required) ---
+# # ============================================================================
+# def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
+#     """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
+#     img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
+#         (pix.h, pix.w, pix.n)
+#     )
+#     if pix.n == 4:
+#         img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
+#     elif pix.n == 1:
+#         img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
+#     return img
+# def run_yolo_detection_and_count(
+#         image: np.ndarray, model: YOLO, page_num: int
+# ) -> Tuple[int, int]:
+#     """
+#     Runs YOLO inference, applies NMS/filtering, and updates global counters.
+#     Returns the counts for the current page.
+#     """
+#     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
+#     yolo_detections = []
+#     page_equations = 0
+#     page_figures = 0
+#     try:
+#         # Run prediction
+#         results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
+#         if results and results[0].boxes:
+#             for box in results[0].boxes.data.tolist():
+#                 x1, y1, x2, y2, conf, cls_id = box
+#                 cls_name = model.names[int(cls_id)]
+#                 if cls_name in TARGET_CLASSES:
+#                     yolo_detections.append({
+#                         'coords': (x1, y1, x2, y2),
+#                         'class': cls_name,
+#                         'conf': conf
+#                     })
+#     except Exception as e:
+#         logging.error(f"YOLO inference failed on page {page_num}: {e}")
+#         return 0, 0
+#     # Apply NMS/Merging/Filtering based on your provided logic
+#     merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
+#     final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
+#     # Update Global Counters
+#     for det in final_detections:
+#         if det['class'] == 'figure':
+#             GLOBAL_FIGURE_COUNT += 1
+#             page_figures += 1
+#         elif det['class'] == 'equation':
+#             GLOBAL_EQUATION_COUNT += 1
+#             page_equations += 1
+#     logging.warning(f"  -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
+#     return page_equations, page_figures
+# # ============================================================================
+# # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
+# # ============================================================================
+# def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
+#     """
+#     Runs the pipeline and returns just the counts and a report.
+#     No intermediate JSON saving or complex output structure.
+#     """
+#     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
+#     # Reset globals
+#     GLOBAL_FIGURE_COUNT = 0
+#     GLOBAL_EQUATION_COUNT = 0
+#     if not os.path.exists(pdf_path):
+#         report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
+#         return 0, 0, 0, report
+#     # Model Loading (CRITICAL: Requires best.pt)
+#     try:
+#         model = YOLO(WEIGHTS_PATH)
+#         logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
+#     except Exception as e:
+#         report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
+#         return 0, 0, 0, report
+#     try:
+#         doc = fitz.open(pdf_path)
+#         total_pages = doc.page_count
+#         logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
+#     except Exception as e:
+#         report = f"❌ ERROR loading PDF file: {e}"
+#         return 0, 0, 0, report
+#     mat = fitz.Matrix(2.0, 2.0)
+#     for page_num_0_based in range(doc.page_count):
+#         fitz_page = doc.load_page(page_num_0_based)
+#         page_num = page_num_0_based + 1
+#         try:
+#             pix = fitz_page.get_pixmap(matrix=mat)
+#             original_img = pixmap_to_numpy(pix)
+#         except Exception as e:
+#             logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
+#             continue
+#         # Core Detection and Counting
+#         run_yolo_detection_and_count(original_img, model, page_num)
+#     doc.close()
+#     # Final Report Generation
+#     report = (
+#         f"✅ **YOLO Counting Complete!**\n\n"
+#         f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
+#         f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
+#         f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
+#     )
+#     return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
+# # ============================================================================
+# # --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
+# # ============================================================================
+# def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
+#     """
+#     Gradio wrapper function to handle file upload and return all results as strings.
+#     """
+#     if pdf_file is None:
+#         return "N/A", "N/A", "N/A", "Please upload a PDF file."
+#     pdf_path = pdf_file.name
+#     try:
+#         # Run the core logic
+#         num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
+#         # Return results as formatted strings
+#         return str(num_pages), str(num_equations), str(num_figures), report
+#     except Exception as e:
+#         error_msg = f"An unexpected error occurred: {e}"
+#         return "Error", "Error", "Error", error_msg
+# # ============================================================================
+# # --- GRADIO INTERFACE DEFINITION ---
+# # ============================================================================
+# if __name__ == "__main__":
+#     if not os.path.exists(WEIGHTS_PATH):
+#         logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
+#     input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
+#     # Outputs are now discrete number fields and a final markdown report
+#     output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
+#     output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
+#     output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
+#     output_report = gr.Markdown(label="Processing Summary")
+#     interface = gr.Interface(
+#         fn=gradio_process_pdf,
+#         inputs=input_file,
+#         outputs=[output_pages, output_equations, output_figures, output_report],
+#         title="🎯 Minimalist YOLO Counting for PDF Elements",
+#         description=(
+#             "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
+#             "and get the total counts for pages, equations, and figures."
+#         ),
+#     )
+#     print("\nStarting Gradio application...")
+#     interface.launch(inbrowser=True)
 import fitz  # PyMuPDF
 import numpy as np
 import cv2
 import torch
 import torch.serialization
 import os
+from typing import Optional, Tuple, List, Dict, Any
 from ultralytics import YOLO
 import logging
 import gradio as gr
 import shutil
 import tempfile
+from PIL import Image
+import io
 # ============================================================================
 # --- Global Patches and Setup ---
     return _original_torch_load(*args, **kwargs)
 torch.load = patched_torch_load
+# Suppress warnings during normal operation
 logging.basicConfig(level=logging.WARNING)
 # ============================================================================
 # ============================================================================
 WEIGHTS_PATH = 'best.pt'
+SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
+# Detection parameters
 CONF_THRESHOLD = 0.2
 TARGET_CLASSES = ['figure', 'equation']
 IOU_MERGE_THRESHOLD = 0.4
 GLOBAL_EQUATION_COUNT = 0
 # ============================================================================
+# --- BOX COMBINATION LOGIC (Retained) ---
 # ============================================================================
+# (calculate_iou, filter_nested_boxes, merge_overlapping_boxes functions remain unchanged)
 def calculate_iou(box1, box2):
     x1_a, y1_a, x2_a, y2_a = box1
     x1_b, y1_b, x2_b, y2_b = box2
     return merged_detections
 # ============================================================================
+# --- UTILITY FUNCTIONS (Modified to capture coordinates) ---
 # ============================================================================
 def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 def run_yolo_detection_and_count(
         image: np.ndarray, model: YOLO, page_num: int
+) -> Tuple[int, int, List[Dict[str, Any]]]:
     """
     Runs YOLO inference, applies NMS/filtering, and updates global counters.
+    Returns counts AND a list of equation detection results.
     """
     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
     try:
         # Run prediction
+        # Setting device to 'cpu' is a safety measure if CUDA isn't available
         results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
         if results and results[0].boxes:
                     })
     except Exception as e:
         logging.error(f"YOLO inference failed on page {page_num}: {e}")
+        return 0, 0, []
     # Apply NMS/Merging/Filtering based on your provided logic
     merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
     final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
+    equation_results = []
     # Update Global Counters
     for det in final_detections:
+        # Scale coordinates back to the original PDF space (points)
+        x1_pix, y1_pix, x2_pix, y2_pix = det['coords']
+        x1_pdf = x1_pix / SCALE_FACTOR
+        y1_pdf = y1_pix / SCALE_FACTOR
+        x2_pdf = x2_pix / SCALE_FACTOR
+        y2_pdf = y2_pix / SCALE_FACTOR
         if det['class'] == 'figure':
             GLOBAL_FIGURE_COUNT += 1
             page_figures += 1
         elif det['class'] == 'equation':
             GLOBAL_EQUATION_COUNT += 1
             page_equations += 1
+            equation_results.append({
+                'page': page_num,
+                'bbox_pdf': (x1_pdf, y1_pdf, x2_pdf, y2_pdf) # Coordinates in PDF space
+            })
     logging.warning(f"  -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
+    return page_equations, page_figures, equation_results
 # ============================================================================
+# --- MAIN DOCUMENT PROCESSING FUNCTION (Modified to handle image cropping) ---
 # ============================================================================
+def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[str]]:
     """
+    Runs the pipeline, returns counts, report, and a list of paths/bytes for
+    the cropped equation images.
     """
     global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
     GLOBAL_FIGURE_COUNT = 0
     GLOBAL_EQUATION_COUNT = 0
     if not os.path.exists(pdf_path):
         report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
+        return 0, 0, 0, report, []
     try:
         model = YOLO(WEIGHTS_PATH)
     except Exception as e:
         report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
+        return 0, 0, 0, report, []
     try:
         doc = fitz.open(pdf_path)
         total_pages = doc.page_count
     except Exception as e:
         report = f"❌ ERROR loading PDF file: {e}"
+        return 0, 0, 0, report, []
+    mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
+    all_equation_images = [] # List to store cropped image data (base64 or bytes)
     for page_num_0_based in range(doc.page_count):
         fitz_page = doc.load_page(page_num_0_based)
             logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
             continue
+        # Core Detection, Counting, and Equation Result Collection
+        _, _, equation_results_page = run_yolo_detection_and_count(
+            original_img, model, page_num
+        )
+        # --- Image Cropping and Saving for Debugging ---
+        for eq in equation_results_page:
+            # bbox_pdf is in PyMuPDF's Rect format (x0, y0, x1, y1)
+            bbox = eq['bbox_pdf']
+            try:
+                # Crop the equation using the bounding box on the fitz page
+                # We use a slight border (e.g., 5 points) for better visualization
+                rect = fitz.Rect(bbox).prerotate(fitz_page.rotation)
+                clip_rect = rect + (0, 0, 5, 5) # Add small padding
+                # Get the pixmap for the cropped area
+                eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
+                # Convert the pixmap to a format Gradio can display (PNG bytes)
+                img_bytes = eq_pix.tobytes("png")
+                all_equation_images.append(img_bytes)
+            except Exception as e:
+                logging.error(f"Error cropping equation on page {page_num}: {e}")
     doc.close()
     # Final Report Generation
         f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
     )
+    # Note the return type change to include the list of image bytes
+    return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
 # ============================================================================
+# --- GRADIO INTERFACE FUNCTION (Modified for image output) ---
 # ============================================================================
+def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
     """
+    Gradio wrapper function to handle file upload and return all results + images.
     """
     if pdf_file is None:
+        return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
     pdf_path = pdf_file.name
     try:
         # Run the core logic
+        num_pages, num_equations, num_figures, report, equation_images = run_single_pdf_preprocessing(pdf_path)
+        # Return results as formatted strings and the list of image bytes
+        return str(num_pages), str(num_equations), str(num_figures), report, equation_images
     except Exception as e:
         error_msg = f"An unexpected error occurred: {e}"
+        logging.error(error_msg, exc_info=True)
+        return "Error", "Error", "Error", error_msg, []
 # ============================================================================
     input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
+    # Outputs
     output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
     output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
     output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
     output_report = gr.Markdown(label="Processing Summary")
+    # NEW: Gradio Gallery to display the list of cropped images
+    output_gallery = gr.Gallery(
+        label="Detected Equations for Debugging",
+        columns=5,
+        height="auto",
+        object_fit="contain",
+        allow_preview=True
+    )
     interface = gr.Interface(
         fn=gradio_process_pdf,
         inputs=input_file,
+        outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
+        title="🎯 Minimalist YOLO Counting & Equation Debugger",
         description=(
+            "Upload a PDF to run YOLO detection. The counts are displayed, and a gallery "
+            "of **all detected equation images** is shown below for debugging the detection accuracy."
         ),
     )