Update app.py
Browse files
app.py
CHANGED
|
@@ -303,7 +303,6 @@
|
|
| 303 |
|
| 304 |
|
| 305 |
|
| 306 |
-
|
| 307 |
import fitz # PyMuPDF
|
| 308 |
import numpy as np
|
| 309 |
import cv2
|
|
@@ -316,7 +315,6 @@ import logging
|
|
| 316 |
import gradio as gr
|
| 317 |
import shutil
|
| 318 |
import tempfile
|
| 319 |
-
from PIL import Image
|
| 320 |
import io
|
| 321 |
|
| 322 |
# ============================================================================
|
|
@@ -351,11 +349,9 @@ GLOBAL_FIGURE_COUNT = 0
|
|
| 351 |
GLOBAL_EQUATION_COUNT = 0
|
| 352 |
|
| 353 |
# ============================================================================
|
| 354 |
-
# --- BOX COMBINATION LOGIC
|
| 355 |
# ============================================================================
|
| 356 |
|
| 357 |
-
# (calculate_iou, filter_nested_boxes, merge_overlapping_boxes functions remain unchanged)
|
| 358 |
-
|
| 359 |
def calculate_iou(box1, box2):
|
| 360 |
x1_a, y1_a, x2_a, y2_a = box1
|
| 361 |
x1_b, y1_b, x2_b, y2_b = box2
|
|
@@ -423,7 +419,7 @@ def merge_overlapping_boxes(detections, iou_threshold):
|
|
| 423 |
return merged_detections
|
| 424 |
|
| 425 |
# ============================================================================
|
| 426 |
-
# --- UTILITY FUNCTIONS
|
| 427 |
# ============================================================================
|
| 428 |
|
| 429 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
|
@@ -443,7 +439,7 @@ def run_yolo_detection_and_count(
|
|
| 443 |
) -> Tuple[int, int, List[Dict[str, Any]]]:
|
| 444 |
"""
|
| 445 |
Runs YOLO inference, applies NMS/filtering, and updates global counters.
|
| 446 |
-
Returns counts AND a list of equation detection results.
|
| 447 |
"""
|
| 448 |
global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
|
| 449 |
|
|
@@ -452,8 +448,6 @@ def run_yolo_detection_and_count(
|
|
| 452 |
page_figures = 0
|
| 453 |
|
| 454 |
try:
|
| 455 |
-
# Run prediction
|
| 456 |
-
# Setting device to 'cpu' is a safety measure if CUDA isn't available
|
| 457 |
results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
|
| 458 |
|
| 459 |
if results and results[0].boxes:
|
|
@@ -471,7 +465,7 @@ def run_yolo_detection_and_count(
|
|
| 471 |
logging.error(f"YOLO inference failed on page {page_num}: {e}")
|
| 472 |
return 0, 0, []
|
| 473 |
|
| 474 |
-
# Apply NMS/Merging/Filtering
|
| 475 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 476 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 477 |
|
|
@@ -502,16 +496,16 @@ def run_yolo_detection_and_count(
|
|
| 502 |
|
| 503 |
|
| 504 |
# ============================================================================
|
| 505 |
-
# --- MAIN DOCUMENT PROCESSING FUNCTION
|
| 506 |
# ============================================================================
|
| 507 |
|
| 508 |
-
def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[
|
| 509 |
"""
|
| 510 |
-
Runs the pipeline, returns counts, report, and a list of
|
| 511 |
-
the cropped equation images.
|
| 512 |
"""
|
| 513 |
global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
|
| 514 |
|
|
|
|
| 515 |
GLOBAL_FIGURE_COUNT = 0
|
| 516 |
GLOBAL_EQUATION_COUNT = 0
|
| 517 |
|
|
@@ -519,21 +513,25 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
|
|
| 519 |
report = f"β FATAL ERROR: Input PDF not found at {pdf_path}."
|
| 520 |
return 0, 0, 0, report, []
|
| 521 |
|
|
|
|
| 522 |
try:
|
| 523 |
model = YOLO(WEIGHTS_PATH)
|
|
|
|
| 524 |
except Exception as e:
|
| 525 |
report = f"β ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
|
| 526 |
return 0, 0, 0, report, []
|
| 527 |
|
|
|
|
| 528 |
try:
|
| 529 |
doc = fitz.open(pdf_path)
|
| 530 |
total_pages = doc.page_count
|
|
|
|
| 531 |
except Exception as e:
|
| 532 |
report = f"β ERROR loading PDF file: {e}"
|
| 533 |
return 0, 0, 0, report, []
|
| 534 |
|
| 535 |
mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
|
| 536 |
-
all_equation_images = [] #
|
| 537 |
|
| 538 |
for page_num_0_based in range(doc.page_count):
|
| 539 |
fitz_page = doc.load_page(page_num_0_based)
|
|
@@ -553,25 +551,24 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
|
|
| 553 |
|
| 554 |
# --- Image Cropping and Saving for Debugging ---
|
| 555 |
for eq in equation_results_page:
|
| 556 |
-
# bbox_pdf is in PyMuPDF's Rect format (x0, y0, x1, y1)
|
| 557 |
bbox = eq['bbox_pdf']
|
| 558 |
|
| 559 |
try:
|
| 560 |
-
#
|
| 561 |
-
|
| 562 |
-
rect = fitz.Rect(bbox).prerotate(fitz_page.rotation)
|
| 563 |
clip_rect = rect + (0, 0, 5, 5) # Add small padding
|
| 564 |
|
| 565 |
-
# Get the pixmap for the cropped area
|
| 566 |
eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
|
| 567 |
|
| 568 |
-
# Convert the pixmap to
|
| 569 |
img_bytes = eq_pix.tobytes("png")
|
| 570 |
|
| 571 |
all_equation_images.append(img_bytes)
|
| 572 |
|
| 573 |
except Exception as e:
|
| 574 |
-
|
|
|
|
| 575 |
|
| 576 |
doc.close()
|
| 577 |
|
|
@@ -583,15 +580,14 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
|
|
| 583 |
f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
|
| 584 |
)
|
| 585 |
|
| 586 |
-
# Note the return type change to include the list of image bytes
|
| 587 |
return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
|
| 588 |
|
| 589 |
|
| 590 |
# ============================================================================
|
| 591 |
-
# --- GRADIO INTERFACE FUNCTION
|
| 592 |
# ============================================================================
|
| 593 |
|
| 594 |
-
def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[
|
| 595 |
"""
|
| 596 |
Gradio wrapper function to handle file upload and return all results + images.
|
| 597 |
"""
|
|
@@ -629,7 +625,8 @@ if __name__ == "__main__":
|
|
| 629 |
output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
|
| 630 |
output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
|
| 631 |
output_report = gr.Markdown(label="Processing Summary")
|
| 632 |
-
|
|
|
|
| 633 |
output_gallery = gr.Gallery(
|
| 634 |
label="Detected Equations for Debugging",
|
| 635 |
columns=5,
|
|
@@ -644,8 +641,8 @@ if __name__ == "__main__":
|
|
| 644 |
outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
|
| 645 |
title="π― Minimalist YOLO Counting & Equation Debugger",
|
| 646 |
description=(
|
| 647 |
-
"Upload a PDF to run YOLO detection
|
| 648 |
-
"of **all detected equation images** is shown
|
| 649 |
),
|
| 650 |
)
|
| 651 |
|
|
|
|
| 303 |
|
| 304 |
|
| 305 |
|
|
|
|
| 306 |
import fitz # PyMuPDF
|
| 307 |
import numpy as np
|
| 308 |
import cv2
|
|
|
|
| 315 |
import gradio as gr
|
| 316 |
import shutil
|
| 317 |
import tempfile
|
|
|
|
| 318 |
import io
|
| 319 |
|
| 320 |
# ============================================================================
|
|
|
|
| 349 |
GLOBAL_EQUATION_COUNT = 0
|
| 350 |
|
| 351 |
# ============================================================================
|
| 352 |
+
# --- BOX COMBINATION LOGIC ---
|
| 353 |
# ============================================================================
|
| 354 |
|
|
|
|
|
|
|
| 355 |
def calculate_iou(box1, box2):
|
| 356 |
x1_a, y1_a, x2_a, y2_a = box1
|
| 357 |
x1_b, y1_b, x2_b, y2_b = box2
|
|
|
|
| 419 |
return merged_detections
|
| 420 |
|
| 421 |
# ============================================================================
|
| 422 |
+
# --- UTILITY FUNCTIONS ---
|
| 423 |
# ============================================================================
|
| 424 |
|
| 425 |
def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
|
|
|
|
| 439 |
) -> Tuple[int, int, List[Dict[str, Any]]]:
|
| 440 |
"""
|
| 441 |
Runs YOLO inference, applies NMS/filtering, and updates global counters.
|
| 442 |
+
Returns counts AND a list of equation detection results (PDF coordinates).
|
| 443 |
"""
|
| 444 |
global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
|
| 445 |
|
|
|
|
| 448 |
page_figures = 0
|
| 449 |
|
| 450 |
try:
|
|
|
|
|
|
|
| 451 |
results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
|
| 452 |
|
| 453 |
if results and results[0].boxes:
|
|
|
|
| 465 |
logging.error(f"YOLO inference failed on page {page_num}: {e}")
|
| 466 |
return 0, 0, []
|
| 467 |
|
| 468 |
+
# Apply NMS/Merging/Filtering
|
| 469 |
merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
|
| 470 |
final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
|
| 471 |
|
|
|
|
| 496 |
|
| 497 |
|
| 498 |
# ============================================================================
|
| 499 |
+
# --- MAIN DOCUMENT PROCESSING FUNCTION ---
|
| 500 |
# ============================================================================
|
| 501 |
|
| 502 |
+
def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[bytes]]:
|
| 503 |
"""
|
| 504 |
+
Runs the pipeline, returns counts, report, and a list of cropped equation images (as bytes).
|
|
|
|
| 505 |
"""
|
| 506 |
global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
|
| 507 |
|
| 508 |
+
# Reset globals
|
| 509 |
GLOBAL_FIGURE_COUNT = 0
|
| 510 |
GLOBAL_EQUATION_COUNT = 0
|
| 511 |
|
|
|
|
| 513 |
report = f"β FATAL ERROR: Input PDF not found at {pdf_path}."
|
| 514 |
return 0, 0, 0, report, []
|
| 515 |
|
| 516 |
+
# Model Loading
|
| 517 |
try:
|
| 518 |
model = YOLO(WEIGHTS_PATH)
|
| 519 |
+
logging.warning(f"β
Loaded YOLO model from: {WEIGHTS_PATH}")
|
| 520 |
except Exception as e:
|
| 521 |
report = f"β ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
|
| 522 |
return 0, 0, 0, report, []
|
| 523 |
|
| 524 |
+
# PDF Loading
|
| 525 |
try:
|
| 526 |
doc = fitz.open(pdf_path)
|
| 527 |
total_pages = doc.page_count
|
| 528 |
+
logging.warning(f"β
Opened PDF with {doc.page_count} pages")
|
| 529 |
except Exception as e:
|
| 530 |
report = f"β ERROR loading PDF file: {e}"
|
| 531 |
return 0, 0, 0, report, []
|
| 532 |
|
| 533 |
mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
|
| 534 |
+
all_equation_images = [] # Stores PNG image bytes for Gradio gallery
|
| 535 |
|
| 536 |
for page_num_0_based in range(doc.page_count):
|
| 537 |
fitz_page = doc.load_page(page_num_0_based)
|
|
|
|
| 551 |
|
| 552 |
# --- Image Cropping and Saving for Debugging ---
|
| 553 |
for eq in equation_results_page:
|
|
|
|
| 554 |
bbox = eq['bbox_pdf']
|
| 555 |
|
| 556 |
try:
|
| 557 |
+
# FIX APPLIED: Removed .prerotate() call on Rect object
|
| 558 |
+
rect = fitz.Rect(bbox)
|
|
|
|
| 559 |
clip_rect = rect + (0, 0, 5, 5) # Add small padding
|
| 560 |
|
| 561 |
+
# Get the pixmap for the cropped area (high-res render)
|
| 562 |
eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
|
| 563 |
|
| 564 |
+
# Convert the pixmap to PNG bytes
|
| 565 |
img_bytes = eq_pix.tobytes("png")
|
| 566 |
|
| 567 |
all_equation_images.append(img_bytes)
|
| 568 |
|
| 569 |
except Exception as e:
|
| 570 |
+
# This error means the image generation failed for a specific box.
|
| 571 |
+
logging.error(f"Error cropping equation on page {page_num} with bbox {bbox}: {e}")
|
| 572 |
|
| 573 |
doc.close()
|
| 574 |
|
|
|
|
| 580 |
f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
|
| 581 |
)
|
| 582 |
|
|
|
|
| 583 |
return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
|
| 584 |
|
| 585 |
|
| 586 |
# ============================================================================
|
| 587 |
+
# --- GRADIO INTERFACE FUNCTION ---
|
| 588 |
# ============================================================================
|
| 589 |
|
| 590 |
+
def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[bytes]]:
|
| 591 |
"""
|
| 592 |
Gradio wrapper function to handle file upload and return all results + images.
|
| 593 |
"""
|
|
|
|
| 625 |
output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
|
| 626 |
output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
|
| 627 |
output_report = gr.Markdown(label="Processing Summary")
|
| 628 |
+
|
| 629 |
+
# Gradio Gallery to display the list of cropped images
|
| 630 |
output_gallery = gr.Gallery(
|
| 631 |
label="Detected Equations for Debugging",
|
| 632 |
columns=5,
|
|
|
|
| 641 |
outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
|
| 642 |
title="π― Minimalist YOLO Counting & Equation Debugger",
|
| 643 |
description=(
|
| 644 |
+
"Upload a PDF to run YOLO detection using your **`best.pt`** model. "
|
| 645 |
+
"The counts are displayed, and a gallery of **all detected equation images** is shown for debugging."
|
| 646 |
),
|
| 647 |
)
|
| 648 |
|