heerjtdev commited on
Commit
46d219d
·
verified ·
1 Parent(s): bfff4aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -416
app.py CHANGED
@@ -1,14 +1,12 @@
1
 
2
 
3
-
4
-
5
  import fitz # PyMuPDF
6
  import numpy as np
7
  import cv2
8
  import torch
9
  import torch.serialization
10
  import os
11
- import time # Import for timing
12
  from typing import Optional, Tuple, List, Dict, Any
13
  from ultralytics import YOLO
14
  import logging
@@ -35,7 +33,7 @@ logging.basicConfig(level=logging.WARNING)
35
  # ============================================================================
36
 
37
  WEIGHTS_PATH = 'best.pt'
38
- SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
 
40
  # Detection parameters
41
  CONF_THRESHOLD = 0.2
@@ -48,7 +46,7 @@ GLOBAL_FIGURE_COUNT = 0
48
  GLOBAL_EQUATION_COUNT = 0
49
 
50
  # ============================================================================
51
- # --- BOX COMBINATION LOGIC ---
52
  # ============================================================================
53
 
54
  def calculate_iou(box1, box2):
@@ -123,7 +121,6 @@ def merge_overlapping_boxes(detections, iou_threshold):
123
 
124
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
- # This function is retained as it's required to convert PDF page to image for YOLO input.
127
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
  (pix.h, pix.w, pix.n)
129
  )
@@ -136,7 +133,7 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
136
 
137
  def run_yolo_detection_and_count(
138
  image: np.ndarray, model: YOLO, page_num: int
139
- ) -> Tuple[int, int]: # Removed equation_results list from return
140
  """
141
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
  Returns page counts only.
@@ -183,17 +180,20 @@ def run_yolo_detection_and_count(
183
 
184
 
185
  # ============================================================================
186
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
  # ============================================================================
188
 
189
- def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
 
190
  """
191
- Runs the pipeline, returns counts, report, total time, and an empty list
192
- (maintaining the expected return signature for Gradio but with None for gallery).
193
  """
194
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
  start_time = time.time()
196
  log_messages = []
 
 
 
197
 
198
  # Reset globals
199
  GLOBAL_FIGURE_COUNT = 0
@@ -203,14 +203,14 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
203
  t0 = time.time()
204
  if not os.path.exists(pdf_path):
205
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
- return 0, 0, 0, report, time.time() - start_time, []
207
 
208
  try:
209
  model = YOLO(WEIGHTS_PATH)
210
  logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
211
  except Exception as e:
212
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
- return 0, 0, 0, report, time.time() - start_time, []
214
  t1 = time.time()
215
  log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
 
@@ -222,7 +222,7 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
222
  logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
223
  except Exception as e:
224
  report = f"❌ ERROR loading PDF file: {e}"
225
- return 0, 0, 0, report, time.time() - start_time, []
226
  t3 = time.time()
227
  log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
 
@@ -247,9 +247,12 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
247
 
248
  # Core Detection
249
  detect_start = time.time()
250
- run_yolo_detection_and_count(original_img, model, page_num)
251
  detect_time = time.time() - detect_start
252
 
 
 
 
253
  page_total_time = time.time() - page_start_time
254
  log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
 
@@ -258,6 +261,11 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
258
  detection_loop_time = t5 - t4
259
  log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
260
 
 
 
 
 
 
261
  # 4. Final Report Generation
262
  total_execution_time = t5 - start_time
263
 
@@ -274,38 +282,38 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
274
  f"\n```"
275
  )
276
 
277
- # Return total_execution_time and an empty list for the gallery output
278
- return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
 
280
 
281
  # ============================================================================
282
  # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
  # ============================================================================
284
 
285
- # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
- def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
  """
288
- Gradio wrapper function to handle file upload and return results (no image handling).
289
  """
290
  if pdf_file is None:
291
- return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
 
292
 
293
  pdf_path = pdf_file.name
294
 
295
  try:
296
- # Run the core logic
297
- # Note the change: temp_output_dir is removed, and total_time is returned
298
- num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
  pdf_path
300
  )
301
 
302
- # Return results (the last item is an empty list for the now-empty gallery)
303
- return str(num_pages), str(num_equations), str(num_figures), report, []
304
 
305
  except Exception as e:
306
  error_msg = f"An unexpected error occurred: {e}"
307
  logging.error(error_msg, exc_info=True)
308
- return "Error", "Error", "Error", error_msg, []
 
309
 
310
 
311
  # ============================================================================
@@ -325,410 +333,37 @@ if __name__ == "__main__":
325
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
  output_report = gr.Markdown(label="Processing Summary and Timing")
327
 
 
 
 
328
  # Gradio Gallery is retained but will receive an empty list []
329
  output_gallery = gr.Gallery(
330
  label="Detected Equations (Disabled for Speed)",
331
  columns=5,
332
  height="auto",
333
  object_fit="contain",
334
- allow_preview=False # Disable preview since it's empty
335
  )
336
 
337
  interface = gr.Interface(
338
  fn=gradio_process_pdf,
339
  inputs=input_file,
340
- # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
- outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
- title="🚀 Optimized YOLO Counting with Timing",
 
 
 
 
 
 
 
343
  description=(
344
- "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
- "Timing for each step is included in the summary report."
346
  ),
347
  )
348
 
349
  print("\nStarting Gradio application...")
350
  interface.launch(inbrowser=True)
351
 
352
-
353
-
354
-
355
-
356
-
357
-
358
-
359
-
360
-
361
-
362
-
363
-
364
-
365
-
366
-
367
-
368
- # import fitz # PyMuPDF
369
- # import numpy as np
370
- # import cv2
371
- # import torch
372
- # import torch.serialization
373
- # import os
374
- # import time
375
- # from typing import Optional, Tuple, List, Dict, Any
376
- # from ultralytics import YOLO
377
- # import logging
378
- # import gradio as gr
379
- # import shutil
380
- # import tempfile
381
- # import io
382
-
383
- # # ============================================================================
384
- # # --- Global Patches and Setup ---
385
- # # ============================================================================
386
-
387
- # # Patch torch.load to prevent weights_only error with older models
388
- # _original_torch_load = torch.load
389
- # def patched_torch_load(*args, **kwargs):
390
- # kwargs["weights_only"] = False
391
- # return _original_torch_load(*args, **kwargs)
392
- # torch.load = patched_torch_load
393
-
394
- # logging.basicConfig(level=logging.WARNING)
395
-
396
- # # ============================================================================
397
- # # --- CONFIGURATION AND CONSTANTS ---
398
- # # ============================================================================
399
-
400
- # WEIGHTS_PATH = 'best.pt'
401
- # SCALE_FACTOR = 2.0
402
-
403
- # # Detection parameters
404
- # CONF_THRESHOLD = 0.2
405
- # TARGET_CLASSES = ['figure', 'equation']
406
- # IOU_MERGE_THRESHOLD = 0.4
407
- # IOA_SUPPRESSION_THRESHOLD = 0.7
408
-
409
- # # Global counters (Reset per run)
410
- # GLOBAL_FIGURE_COUNT = 0
411
- # GLOBAL_EQUATION_COUNT = 0
412
-
413
- # # ============================================================================
414
- # # --- BOX COMBINATION LOGIC (Retained for detection accuracy) ---
415
- # # ============================================================================
416
-
417
- # def calculate_iou(box1, box2):
418
- # x1_a, y1_a, x2_a, y2_a = box1
419
- # x1_b, y1_b, x2_b, y2_b = box2
420
- # x_left = max(x1_a, x1_b)
421
- # y_top = max(y1_a, y1_b)
422
- # x_right = min(x2_a, x2_b)
423
- # y_bottom = min(y2_a, y2_b)
424
- # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
425
- # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
426
- # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
427
- # union_area = float(box_a_area + box_b_area - intersection_area)
428
- # return intersection_area / union_area if union_area > 0 else 0
429
-
430
-
431
- # def filter_nested_boxes(detections, ioa_threshold=0.80):
432
- # if not detections: return []
433
- # for d in detections:
434
- # x1, y1, x2, y2 = d['coords']
435
- # d['area'] = (x2 - x1) * (y2 - y1)
436
- # detections.sort(key=lambda x: x['area'], reverse=True)
437
- # keep_indices = []
438
- # is_suppressed = [False] * len(detections)
439
- # for i in range(len(detections)):
440
- # if is_suppressed[i]: continue
441
- # keep_indices.append(i)
442
- # box_a = detections[i]['coords']
443
- # for j in range(i + 1, len(detections)):
444
- # if is_suppressed[j]: continue
445
- # box_b = detections[j]['coords']
446
- # x_left = max(box_a[0], box_b[0])
447
- # y_top = max(box_a[1], box_b[1])
448
- # x_right = min(box_a[2], box_b[2])
449
- # y_bottom = min(box_a[3], box_b[3])
450
- # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
451
- # area_b = detections[j]['area']
452
- # if area_b > 0 and intersection / area_b > ioa_threshold:
453
- # is_suppressed[j] = True
454
- # return [detections[i] for i in keep_indices]
455
-
456
-
457
- # def merge_overlapping_boxes(detections, iou_threshold):
458
- # if not detections: return []
459
- # detections.sort(key=lambda d: d['conf'], reverse=True)
460
- # merged_detections = []
461
- # is_merged = [False] * len(detections)
462
- # for i in range(len(detections)):
463
- # if is_merged[i]: continue
464
- # current_box = detections[i]['coords']
465
- # current_class = detections[i]['class']
466
- # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
467
- # for j in range(i + 1, len(detections)):
468
- # if is_merged[j] or detections[j]['class'] != current_class: continue
469
- # other_box = detections[j]['coords']
470
- # iou = calculate_iou(current_box, other_box)
471
- # if iou > iou_threshold:
472
- # merged_x1 = min(merged_x1, other_box[0])
473
- # merged_y1 = min(merged_y1, other_box[1])
474
- # merged_x2 = max(merged_x2, other_box[2])
475
- # merged_y2 = max(merged_y2, other_box[3])
476
- # is_merged[j] = True
477
- # merged_detections.append({
478
- # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
479
- # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
480
- # })
481
- # return merged_detections
482
-
483
- # # ============================================================================
484
- # # --- UTILITY FUNCTIONS ---
485
- # # ============================================================================
486
-
487
- # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
488
- # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
489
- # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
490
- # (pix.h, pix.w, pix.n)
491
- # )
492
- # if pix.n == 4:
493
- # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
494
- # elif pix.n == 1:
495
- # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
496
- # return img
497
-
498
-
499
- # def run_yolo_detection_and_count(
500
- # image: np.ndarray, model: YOLO, page_num: int
501
- # ) -> Tuple[int, int]:
502
- # """
503
- # Runs YOLO inference, applies NMS/filtering, and updates global counters.
504
- # Returns page counts only.
505
- # """
506
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
507
-
508
- # yolo_detections = []
509
- # page_equations = 0
510
- # page_figures = 0
511
-
512
- # try:
513
- # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
514
-
515
- # if results and results[0].boxes:
516
- # for box in results[0].boxes.data.tolist():
517
- # x1, y1, x2, y2, conf, cls_id = box
518
- # cls_name = model.names[int(cls_id)]
519
-
520
- # if cls_name in TARGET_CLASSES:
521
- # yolo_detections.append({
522
- # 'coords': (x1, y1, x2, y2),
523
- # 'class': cls_name,
524
- # 'conf': conf
525
- # })
526
- # except Exception as e:
527
- # logging.error(f"YOLO inference failed on page {page_num}: {e}")
528
- # return 0, 0
529
-
530
- # # Apply NMS/Merging/Filtering
531
- # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
532
- # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
533
-
534
- # # Update Global Counters
535
- # for det in final_detections:
536
- # if det['class'] == 'figure':
537
- # GLOBAL_FIGURE_COUNT += 1
538
- # page_figures += 1
539
- # elif det['class'] == 'equation':
540
- # GLOBAL_EQUATION_COUNT += 1
541
- # page_equations += 1
542
-
543
- # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
544
- # return page_equations, page_figures
545
-
546
-
547
- # # ============================================================================
548
- # # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for JSON serialization) ---
549
- # # ============================================================================
550
-
551
- # # NOTE: The return signature now uses Dict[str, int] for the equation counts
552
- # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, Dict[str, int], List[str]]:
553
- # """
554
- # Runs the pipeline, returns counts, report, total time, page counts dict (str keys), and empty list.
555
- # """
556
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
557
- # start_time = time.time()
558
- # log_messages = []
559
-
560
- # # Dictionary to store {page_number (int): equation_count (int)}
561
- # equation_counts_per_page: Dict[int, int] = {}
562
-
563
- # # Reset globals
564
- # GLOBAL_FIGURE_COUNT = 0
565
- # GLOBAL_EQUATION_COUNT = 0
566
-
567
- # # 1. Validation and Model Loading
568
- # t0 = time.time()
569
- # if not os.path.exists(pdf_path):
570
- # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
571
- # return 0, 0, 0, report, time.time() - start_time, {}, []
572
-
573
- # try:
574
- # model = YOLO(WEIGHTS_PATH)
575
- # logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
576
- # except Exception as e:
577
- # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
578
- # return 0, 0, 0, report, time.time() - start_time, {}, []
579
- # t1 = time.time()
580
- # log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
581
-
582
- # # 2. PDF Loading
583
- # t2 = time.time()
584
- # try:
585
- # doc = fitz.open(pdf_path)
586
- # total_pages = doc.page_count
587
- # logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
588
- # except Exception as e:
589
- # report = f"❌ ERROR loading PDF file: {e}"
590
- # return 0, 0, 0, report, time.time() - start_time, {}, []
591
- # t3 = time.time()
592
- # log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
593
-
594
- # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
595
-
596
- # # 3. Page Processing and Detection Loop
597
- # t4 = time.time()
598
- # for page_num_0_based in range(doc.page_count):
599
- # page_start_time = time.time()
600
- # fitz_page = doc.load_page(page_num_0_based)
601
- # page_num = page_num_0_based + 1
602
-
603
- # # Render page to image for YOLO
604
- # try:
605
- # pix_start = time.time()
606
- # pix = fitz_page.get_pixmap(matrix=mat)
607
- # original_img = pixmap_to_numpy(pix)
608
- # pix_time = time.time() - pix_start
609
- # except Exception as e:
610
- # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
611
- # continue
612
-
613
- # # Core Detection
614
- # detect_start = time.time()
615
- # page_equations, _ = run_yolo_detection_and_count(original_img, model, page_num)
616
- # detect_time = time.time() - detect_start
617
-
618
- # # Store the count in the dictionary (INT keys)
619
- # equation_counts_per_page[page_num] = page_equations
620
-
621
- # page_total_time = time.time() - page_start_time
622
- # log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
623
-
624
- # doc.close()
625
- # t5 = time.time()
626
- # detection_loop_time = t5 - t4
627
- # log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
628
-
629
- # # FIX APPLIED HERE: Convert integer keys to string keys for JSON serialization
630
- # equation_counts_per_page_str_keys: Dict[str, int] = {
631
- # str(k): v for k, v in equation_counts_per_page.items()
632
- # }
633
-
634
- # # 4. Final Report Generation
635
- # total_execution_time = t5 - start_time
636
-
637
- # report = (
638
- # f"✅ **YOLO Counting Complete!**\n\n"
639
- # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
640
- # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
641
- # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
642
- # f"---\n"
643
- # f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
644
- # f"### Detailed Step Timing\n"
645
- # f"```\n"
646
- # + "\n".join(log_messages) +
647
- # f"\n```"
648
- # )
649
-
650
- # # Return the dictionary with string keys
651
- # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page_str_keys, []
652
-
653
-
654
- # # ============================================================================
655
- # # --- GRADIO INTERFACE FUNCTION (Updated) ---
656
- # # ============================================================================
657
-
658
- # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, int], List[str]]:
659
- # """
660
- # Gradio wrapper function to handle file upload and return results.
661
- # """
662
- # if pdf_file is None:
663
- # # Return an empty dict with string keys
664
- # return "N/A", "N/A", "N/A", "Please upload a PDF file.", {}, []
665
-
666
- # pdf_path = pdf_file.name
667
-
668
- # try:
669
- # # Unpack the new return value: equation_counts_per_page (with string keys)
670
- # num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, _ = run_single_pdf_preprocessing(
671
- # pdf_path
672
- # )
673
-
674
- # # Return results (6 items now)
675
- # return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, []
676
-
677
- # except Exception as e:
678
- # error_msg = f"An unexpected error occurred: {e}"
679
- # logging.error(error_msg, exc_info=True)
680
- # # Return an empty dict on error
681
- # return "Error", "Error", "Error", error_msg, {}, []
682
-
683
-
684
- # # ============================================================================
685
- # # --- GRADIO INTERFACE DEFINITION (Updated) ---
686
- # # ============================================================================
687
-
688
- # if __name__ == "__main__":
689
-
690
- # if not os.path.exists(WEIGHTS_PATH):
691
- # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
692
-
693
- # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
694
-
695
- # # Outputs
696
- # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
697
- # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
698
- # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
699
- # output_report = gr.Markdown(label="Processing Summary and Timing")
700
-
701
- # # NEW OUTPUT: JSON component for structured data
702
- # output_page_counts = gr.JSON(label="Equation Count Per Page (Dictionary)")
703
-
704
- # # Gradio Gallery is retained but will receive an empty list []
705
- # output_gallery = gr.Gallery(
706
- # label="Detected Equations (Disabled for Speed)",
707
- # columns=5,
708
- # height="auto",
709
- # object_fit="contain",
710
- # allow_preview=False
711
- # )
712
-
713
- # interface = gr.Interface(
714
- # fn=gradio_process_pdf,
715
- # inputs=input_file,
716
- # # Outputs list remains the same, but the JSON component now receives string keys.
717
- # outputs=[
718
- # output_pages,
719
- # output_equations,
720
- # output_figures,
721
- # output_report,
722
- # output_page_counts,
723
- # output_gallery
724
- # ],
725
- # title="📊 YOLO Counting with Per-Page Data & Timing",
726
- # description=(
727
- # "Upload a PDF to run YOLO detection. The results include total counts, a breakdown of "
728
- # "equation counts per page (in JSON format), and detailed timing."
729
- # ),
730
- # )
731
-
732
- # print("\nStarting Gradio application...")
733
- # interface.launch(inbrowser=True)
734
-
 
1
 
2
 
 
 
3
  import fitz # PyMuPDF
4
  import numpy as np
5
  import cv2
6
  import torch
7
  import torch.serialization
8
  import os
9
+ import time
10
  from typing import Optional, Tuple, List, Dict, Any
11
  from ultralytics import YOLO
12
  import logging
 
33
  # ============================================================================
34
 
35
  WEIGHTS_PATH = 'best.pt'
36
+ SCALE_FACTOR = 2.0
37
 
38
  # Detection parameters
39
  CONF_THRESHOLD = 0.2
 
46
  GLOBAL_EQUATION_COUNT = 0
47
 
48
  # ============================================================================
49
+ # --- BOX COMBINATION LOGIC (Retained for detection accuracy) ---
50
  # ============================================================================
51
 
52
  def calculate_iou(box1, box2):
 
121
 
122
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
123
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
 
124
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
125
  (pix.h, pix.w, pix.n)
126
  )
 
133
 
134
  def run_yolo_detection_and_count(
135
  image: np.ndarray, model: YOLO, page_num: int
136
+ ) -> Tuple[int, int]:
137
  """
138
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
139
  Returns page counts only.
 
180
 
181
 
182
  # ============================================================================
183
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for JSON serialization) ---
184
  # ============================================================================
185
 
186
+ # NOTE: The return signature now uses Dict[str, int] for the equation counts
187
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, Dict[str, int], List[str]]:
188
  """
189
+ Runs the pipeline, returns counts, report, total time, page counts dict (str keys), and empty list.
 
190
  """
191
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
192
  start_time = time.time()
193
  log_messages = []
194
+
195
+ # Dictionary to store {page_number (int): equation_count (int)}
196
+ equation_counts_per_page: Dict[int, int] = {}
197
 
198
  # Reset globals
199
  GLOBAL_FIGURE_COUNT = 0
 
203
  t0 = time.time()
204
  if not os.path.exists(pdf_path):
205
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
+ return 0, 0, 0, report, time.time() - start_time, {}, []
207
 
208
  try:
209
  model = YOLO(WEIGHTS_PATH)
210
  logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
211
  except Exception as e:
212
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
+ return 0, 0, 0, report, time.time() - start_time, {}, []
214
  t1 = time.time()
215
  log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
 
 
222
  logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
223
  except Exception as e:
224
  report = f"❌ ERROR loading PDF file: {e}"
225
+ return 0, 0, 0, report, time.time() - start_time, {}, []
226
  t3 = time.time()
227
  log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
 
 
247
 
248
  # Core Detection
249
  detect_start = time.time()
250
+ page_equations, _ = run_yolo_detection_and_count(original_img, model, page_num)
251
  detect_time = time.time() - detect_start
252
 
253
+ # Store the count in the dictionary (INT keys)
254
+ equation_counts_per_page[page_num] = page_equations
255
+
256
  page_total_time = time.time() - page_start_time
257
  log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
258
 
 
261
  detection_loop_time = t5 - t4
262
  log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
263
 
264
+ # FIX APPLIED HERE: Convert integer keys to string keys for JSON serialization
265
+ equation_counts_per_page_str_keys: Dict[str, int] = {
266
+ str(k): v for k, v in equation_counts_per_page.items()
267
+ }
268
+
269
  # 4. Final Report Generation
270
  total_execution_time = t5 - start_time
271
 
 
282
  f"\n```"
283
  )
284
 
285
+ # Return the dictionary with string keys
286
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page_str_keys, []
287
 
288
 
289
  # ============================================================================
290
  # --- GRADIO INTERFACE FUNCTION (Updated) ---
291
  # ============================================================================
292
 
293
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, int], List[str]]:
 
294
  """
295
+ Gradio wrapper function to handle file upload and return results.
296
  """
297
  if pdf_file is None:
298
+ # Return an empty dict with string keys
299
+ return "N/A", "N/A", "N/A", "Please upload a PDF file.", {}, []
300
 
301
  pdf_path = pdf_file.name
302
 
303
  try:
304
+ # Unpack the new return value: equation_counts_per_page (with string keys)
305
+ num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, _ = run_single_pdf_preprocessing(
 
306
  pdf_path
307
  )
308
 
309
+ # Return results (6 items now)
310
+ return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, []
311
 
312
  except Exception as e:
313
  error_msg = f"An unexpected error occurred: {e}"
314
  logging.error(error_msg, exc_info=True)
315
+ # Return an empty dict on error
316
+ return "Error", "Error", "Error", error_msg, {}, []
317
 
318
 
319
  # ============================================================================
 
333
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
334
  output_report = gr.Markdown(label="Processing Summary and Timing")
335
 
336
+ # NEW OUTPUT: JSON component for structured data
337
+ output_page_counts = gr.JSON(label="Equation Count Per Page (Dictionary)")
338
+
339
  # Gradio Gallery is retained but will receive an empty list []
340
  output_gallery = gr.Gallery(
341
  label="Detected Equations (Disabled for Speed)",
342
  columns=5,
343
  height="auto",
344
  object_fit="contain",
345
+ allow_preview=False
346
  )
347
 
348
  interface = gr.Interface(
349
  fn=gradio_process_pdf,
350
  inputs=input_file,
351
+ # Outputs list remains the same, but the JSON component now receives string keys.
352
+ outputs=[
353
+ output_pages,
354
+ output_equations,
355
+ output_figures,
356
+ output_report,
357
+ output_page_counts,
358
+ output_gallery
359
+ ],
360
+ title="📊 YOLO Counting with Per-Page Data & Timing",
361
  description=(
362
+ "Upload a PDF to run YOLO detection. The results include total counts, a breakdown of "
363
+ "equation counts per page (in JSON format), and detailed timing."
364
  ),
365
  )
366
 
367
  print("\nStarting Gradio application...")
368
  interface.launch(inbrowser=True)
369