heerjtdev commited on
Commit
d52b60e
Β·
verified Β·
1 Parent(s): 40b8823

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -28
app.py CHANGED
@@ -303,7 +303,6 @@
303
 
304
 
305
 
306
-
307
  import fitz # PyMuPDF
308
  import numpy as np
309
  import cv2
@@ -316,7 +315,6 @@ import logging
316
  import gradio as gr
317
  import shutil
318
  import tempfile
319
- from PIL import Image
320
  import io
321
 
322
  # ============================================================================
@@ -351,11 +349,9 @@ GLOBAL_FIGURE_COUNT = 0
351
  GLOBAL_EQUATION_COUNT = 0
352
 
353
  # ============================================================================
354
- # --- BOX COMBINATION LOGIC (Retained) ---
355
  # ============================================================================
356
 
357
- # (calculate_iou, filter_nested_boxes, merge_overlapping_boxes functions remain unchanged)
358
-
359
  def calculate_iou(box1, box2):
360
  x1_a, y1_a, x2_a, y2_a = box1
361
  x1_b, y1_b, x2_b, y2_b = box2
@@ -423,7 +419,7 @@ def merge_overlapping_boxes(detections, iou_threshold):
423
  return merged_detections
424
 
425
  # ============================================================================
426
- # --- UTILITY FUNCTIONS (Modified to capture coordinates) ---
427
  # ============================================================================
428
 
429
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
@@ -443,7 +439,7 @@ def run_yolo_detection_and_count(
443
  ) -> Tuple[int, int, List[Dict[str, Any]]]:
444
  """
445
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
446
- Returns counts AND a list of equation detection results.
447
  """
448
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
449
 
@@ -452,8 +448,6 @@ def run_yolo_detection_and_count(
452
  page_figures = 0
453
 
454
  try:
455
- # Run prediction
456
- # Setting device to 'cpu' is a safety measure if CUDA isn't available
457
  results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
458
 
459
  if results and results[0].boxes:
@@ -471,7 +465,7 @@ def run_yolo_detection_and_count(
471
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
472
  return 0, 0, []
473
 
474
- # Apply NMS/Merging/Filtering based on your provided logic
475
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
476
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
477
 
@@ -502,16 +496,16 @@ def run_yolo_detection_and_count(
502
 
503
 
504
  # ============================================================================
505
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified to handle image cropping) ---
506
  # ============================================================================
507
 
508
- def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[str]]:
509
  """
510
- Runs the pipeline, returns counts, report, and a list of paths/bytes for
511
- the cropped equation images.
512
  """
513
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
514
 
 
515
  GLOBAL_FIGURE_COUNT = 0
516
  GLOBAL_EQUATION_COUNT = 0
517
 
@@ -519,21 +513,25 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
519
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
520
  return 0, 0, 0, report, []
521
 
 
522
  try:
523
  model = YOLO(WEIGHTS_PATH)
 
524
  except Exception as e:
525
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
526
  return 0, 0, 0, report, []
527
 
 
528
  try:
529
  doc = fitz.open(pdf_path)
530
  total_pages = doc.page_count
 
531
  except Exception as e:
532
  report = f"❌ ERROR loading PDF file: {e}"
533
  return 0, 0, 0, report, []
534
 
535
  mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
536
- all_equation_images = [] # List to store cropped image data (base64 or bytes)
537
 
538
  for page_num_0_based in range(doc.page_count):
539
  fitz_page = doc.load_page(page_num_0_based)
@@ -553,25 +551,24 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
553
 
554
  # --- Image Cropping and Saving for Debugging ---
555
  for eq in equation_results_page:
556
- # bbox_pdf is in PyMuPDF's Rect format (x0, y0, x1, y1)
557
  bbox = eq['bbox_pdf']
558
 
559
  try:
560
- # Crop the equation using the bounding box on the fitz page
561
- # We use a slight border (e.g., 5 points) for better visualization
562
- rect = fitz.Rect(bbox).prerotate(fitz_page.rotation)
563
  clip_rect = rect + (0, 0, 5, 5) # Add small padding
564
 
565
- # Get the pixmap for the cropped area
566
  eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
567
 
568
- # Convert the pixmap to a format Gradio can display (PNG bytes)
569
  img_bytes = eq_pix.tobytes("png")
570
 
571
  all_equation_images.append(img_bytes)
572
 
573
  except Exception as e:
574
- logging.error(f"Error cropping equation on page {page_num}: {e}")
 
575
 
576
  doc.close()
577
 
@@ -583,15 +580,14 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, Lis
583
  f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
584
  )
585
 
586
- # Note the return type change to include the list of image bytes
587
  return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
588
 
589
 
590
  # ============================================================================
591
- # --- GRADIO INTERFACE FUNCTION (Modified for image output) ---
592
  # ============================================================================
593
 
594
- def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
595
  """
596
  Gradio wrapper function to handle file upload and return all results + images.
597
  """
@@ -629,7 +625,8 @@ if __name__ == "__main__":
629
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
630
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
631
  output_report = gr.Markdown(label="Processing Summary")
632
- # NEW: Gradio Gallery to display the list of cropped images
 
633
  output_gallery = gr.Gallery(
634
  label="Detected Equations for Debugging",
635
  columns=5,
@@ -644,8 +641,8 @@ if __name__ == "__main__":
644
  outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
645
  title="🎯 Minimalist YOLO Counting & Equation Debugger",
646
  description=(
647
- "Upload a PDF to run YOLO detection. The counts are displayed, and a gallery "
648
- "of **all detected equation images** is shown below for debugging the detection accuracy."
649
  ),
650
  )
651
 
 
303
 
304
 
305
 
 
306
  import fitz # PyMuPDF
307
  import numpy as np
308
  import cv2
 
315
  import gradio as gr
316
  import shutil
317
  import tempfile
 
318
  import io
319
 
320
  # ============================================================================
 
349
  GLOBAL_EQUATION_COUNT = 0
350
 
351
  # ============================================================================
352
+ # --- BOX COMBINATION LOGIC ---
353
  # ============================================================================
354
 
 
 
355
  def calculate_iou(box1, box2):
356
  x1_a, y1_a, x2_a, y2_a = box1
357
  x1_b, y1_b, x2_b, y2_b = box2
 
419
  return merged_detections
420
 
421
  # ============================================================================
422
+ # --- UTILITY FUNCTIONS ---
423
  # ============================================================================
424
 
425
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 
439
  ) -> Tuple[int, int, List[Dict[str, Any]]]:
440
  """
441
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
442
+ Returns counts AND a list of equation detection results (PDF coordinates).
443
  """
444
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
445
 
 
448
  page_figures = 0
449
 
450
  try:
 
 
451
  results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
452
 
453
  if results and results[0].boxes:
 
465
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
466
  return 0, 0, []
467
 
468
+ # Apply NMS/Merging/Filtering
469
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
470
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
471
 
 
496
 
497
 
498
  # ============================================================================
499
+ # --- MAIN DOCUMENT PROCESSING FUNCTION ---
500
  # ============================================================================
501
 
502
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[bytes]]:
503
  """
504
+ Runs the pipeline, returns counts, report, and a list of cropped equation images (as bytes).
 
505
  """
506
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
507
 
508
+ # Reset globals
509
  GLOBAL_FIGURE_COUNT = 0
510
  GLOBAL_EQUATION_COUNT = 0
511
 
 
513
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
514
  return 0, 0, 0, report, []
515
 
516
+ # Model Loading
517
  try:
518
  model = YOLO(WEIGHTS_PATH)
519
+ logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
520
  except Exception as e:
521
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
522
  return 0, 0, 0, report, []
523
 
524
+ # PDF Loading
525
  try:
526
  doc = fitz.open(pdf_path)
527
  total_pages = doc.page_count
528
+ logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
529
  except Exception as e:
530
  report = f"❌ ERROR loading PDF file: {e}"
531
  return 0, 0, 0, report, []
532
 
533
  mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
534
+ all_equation_images = [] # Stores PNG image bytes for Gradio gallery
535
 
536
  for page_num_0_based in range(doc.page_count):
537
  fitz_page = doc.load_page(page_num_0_based)
 
551
 
552
  # --- Image Cropping and Saving for Debugging ---
553
  for eq in equation_results_page:
 
554
  bbox = eq['bbox_pdf']
555
 
556
  try:
557
+ # FIX APPLIED: Removed .prerotate() call on Rect object
558
+ rect = fitz.Rect(bbox)
 
559
  clip_rect = rect + (0, 0, 5, 5) # Add small padding
560
 
561
+ # Get the pixmap for the cropped area (high-res render)
562
  eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
563
 
564
+ # Convert the pixmap to PNG bytes
565
  img_bytes = eq_pix.tobytes("png")
566
 
567
  all_equation_images.append(img_bytes)
568
 
569
  except Exception as e:
570
+ # This error means the image generation failed for a specific box.
571
+ logging.error(f"Error cropping equation on page {page_num} with bbox {bbox}: {e}")
572
 
573
  doc.close()
574
 
 
580
  f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
581
  )
582
 
 
583
  return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
584
 
585
 
586
  # ============================================================================
587
+ # --- GRADIO INTERFACE FUNCTION ---
588
  # ============================================================================
589
 
590
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[bytes]]:
591
  """
592
  Gradio wrapper function to handle file upload and return all results + images.
593
  """
 
625
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
626
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
627
  output_report = gr.Markdown(label="Processing Summary")
628
+
629
+ # Gradio Gallery to display the list of cropped images
630
  output_gallery = gr.Gallery(
631
  label="Detected Equations for Debugging",
632
  columns=5,
 
641
  outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
642
  title="🎯 Minimalist YOLO Counting & Equation Debugger",
643
  description=(
644
+ "Upload a PDF to run YOLO detection using your **`best.pt`** model. "
645
+ "The counts are displayed, and a gallery of **all detected equation images** is shown for debugging."
646
  ),
647
  )
648