heerjtdev commited on
Commit
4a1e588
·
verified ·
1 Parent(s): d126c17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -367
app.py CHANGED
@@ -1,365 +1,6 @@
1
 
2
-
3
-
4
-
5
- # import fitz # PyMuPDF
6
- # import numpy as np
7
- # import cv2
8
- # import torch
9
- # import torch.serialization
10
- # import os
11
- # import time # Import for timing
12
- # from typing import Optional, Tuple, List, Dict, Any
13
- # from ultralytics import YOLO
14
- # import logging
15
- # import gradio as gr
16
- # import shutil
17
- # import tempfile
18
- # import io
19
-
20
- # # ============================================================================
21
- # # --- Global Patches and Setup ---
22
- # # ============================================================================
23
-
24
- # # Patch torch.load to prevent weights_only error with older models
25
- # _original_torch_load = torch.load
26
- # def patched_torch_load(*args, **kwargs):
27
- # kwargs["weights_only"] = False
28
- # return _original_torch_load(*args, **kwargs)
29
- # torch.load = patched_torch_load
30
-
31
- # logging.basicConfig(level=logging.WARNING)
32
-
33
- # # ============================================================================
34
- # # --- CONFIGURATION AND CONSTANTS ---
35
- # # ============================================================================
36
-
37
- # WEIGHTS_PATH = 'best.pt'
38
- # SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
-
40
- # # Detection parameters
41
- # CONF_THRESHOLD = 0.2
42
- # TARGET_CLASSES = ['figure', 'equation']
43
- # IOU_MERGE_THRESHOLD = 0.4
44
- # IOA_SUPPRESSION_THRESHOLD = 0.7
45
-
46
- # # Global counters (Reset per run)
47
- # GLOBAL_FIGURE_COUNT = 0
48
- # GLOBAL_EQUATION_COUNT = 0
49
-
50
- # # ============================================================================
51
- # # --- BOX COMBINATION LOGIC ---
52
- # # ============================================================================
53
-
54
- # def calculate_iou(box1, box2):
55
- # x1_a, y1_a, x2_a, y2_a = box1
56
- # x1_b, y1_b, x2_b, y2_b = box2
57
- # x_left = max(x1_a, x1_b)
58
- # y_top = max(y1_a, y1_b)
59
- # x_right = min(x2_a, x2_b)
60
- # y_bottom = min(y2_a, y2_b)
61
- # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
62
- # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
63
- # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
64
- # union_area = float(box_a_area + box_b_area - intersection_area)
65
- # return intersection_area / union_area if union_area > 0 else 0
66
-
67
-
68
- # def filter_nested_boxes(detections, ioa_threshold=0.80):
69
- # if not detections: return []
70
- # for d in detections:
71
- # x1, y1, x2, y2 = d['coords']
72
- # d['area'] = (x2 - x1) * (y2 - y1)
73
- # detections.sort(key=lambda x: x['area'], reverse=True)
74
- # keep_indices = []
75
- # is_suppressed = [False] * len(detections)
76
- # for i in range(len(detections)):
77
- # if is_suppressed[i]: continue
78
- # keep_indices.append(i)
79
- # box_a = detections[i]['coords']
80
- # for j in range(i + 1, len(detections)):
81
- # if is_suppressed[j]: continue
82
- # box_b = detections[j]['coords']
83
- # x_left = max(box_a[0], box_b[0])
84
- # y_top = max(box_a[1], box_b[1])
85
- # x_right = min(box_a[2], box_b[2])
86
- # y_bottom = min(box_a[3], box_b[3])
87
- # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
88
- # area_b = detections[j]['area']
89
- # if area_b > 0 and intersection / area_b > ioa_threshold:
90
- # is_suppressed[j] = True
91
- # return [detections[i] for i in keep_indices]
92
-
93
-
94
- # def merge_overlapping_boxes(detections, iou_threshold):
95
- # if not detections: return []
96
- # detections.sort(key=lambda d: d['conf'], reverse=True)
97
- # merged_detections = []
98
- # is_merged = [False] * len(detections)
99
- # for i in range(len(detections)):
100
- # if is_merged[i]: continue
101
- # current_box = detections[i]['coords']
102
- # current_class = detections[i]['class']
103
- # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
104
- # for j in range(i + 1, len(detections)):
105
- # if is_merged[j] or detections[j]['class'] != current_class: continue
106
- # other_box = detections[j]['coords']
107
- # iou = calculate_iou(current_box, other_box)
108
- # if iou > iou_threshold:
109
- # merged_x1 = min(merged_x1, other_box[0])
110
- # merged_y1 = min(merged_y1, other_box[1])
111
- # merged_x2 = max(merged_x2, other_box[2])
112
- # merged_y2 = max(merged_y2, other_box[3])
113
- # is_merged[j] = True
114
- # merged_detections.append({
115
- # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
116
- # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
117
- # })
118
- # return merged_detections
119
-
120
- # # ============================================================================
121
- # # --- UTILITY FUNCTIONS ---
122
- # # ============================================================================
123
-
124
- # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
- # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
- # # This function is retained as it's required to convert PDF page to image for YOLO input.
127
- # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
- # (pix.h, pix.w, pix.n)
129
- # )
130
- # if pix.n == 4:
131
- # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
132
- # elif pix.n == 1:
133
- # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
134
- # return img
135
-
136
-
137
- # def run_yolo_detection_and_count(
138
- # image: np.ndarray, model: YOLO, page_num: int
139
- # ) -> Tuple[int, int]: # Removed equation_results list from return
140
- # """
141
- # Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
- # Returns page counts only.
143
- # """
144
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
145
-
146
- # yolo_detections = []
147
- # page_equations = 0
148
- # page_figures = 0
149
-
150
- # try:
151
- # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
152
-
153
- # if results and results[0].boxes:
154
- # for box in results[0].boxes.data.tolist():
155
- # x1, y1, x2, y2, conf, cls_id = box
156
- # cls_name = model.names[int(cls_id)]
157
-
158
- # if cls_name in TARGET_CLASSES:
159
- # yolo_detections.append({
160
- # 'coords': (x1, y1, x2, y2),
161
- # 'class': cls_name,
162
- # 'conf': conf
163
- # })
164
- # except Exception as e:
165
- # logging.error(f"YOLO inference failed on page {page_num}: {e}")
166
- # return 0, 0
167
-
168
- # # Apply NMS/Merging/Filtering
169
- # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
170
- # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
171
-
172
- # # Update Global Counters
173
- # for det in final_detections:
174
- # if det['class'] == 'figure':
175
- # GLOBAL_FIGURE_COUNT += 1
176
- # page_figures += 1
177
- # elif det['class'] == 'equation':
178
- # GLOBAL_EQUATION_COUNT += 1
179
- # page_equations += 1
180
-
181
- # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
182
- # return page_equations, page_figures
183
-
184
-
185
- # # ============================================================================
186
- # # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
- # # ============================================================================
188
-
189
- # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
190
- # """
191
- # Runs the pipeline, returns counts, report, total time, and an empty list
192
- # (maintaining the expected return signature for Gradio but with None for gallery).
193
- # """
194
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
- # start_time = time.time()
196
- # log_messages = []
197
-
198
- # # Reset globals
199
- # GLOBAL_FIGURE_COUNT = 0
200
- # GLOBAL_EQUATION_COUNT = 0
201
-
202
- # # 1. Validation and Model Loading
203
- # t0 = time.time()
204
- # if not os.path.exists(pdf_path):
205
- # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
- # return 0, 0, 0, report, time.time() - start_time, []
207
-
208
- # try:
209
- # model = YOLO(WEIGHTS_PATH)
210
- # logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
211
- # except Exception as e:
212
- # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
- # return 0, 0, 0, report, time.time() - start_time, []
214
- # t1 = time.time()
215
- # log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
-
217
- # # 2. PDF Loading
218
- # t2 = time.time()
219
- # try:
220
- # doc = fitz.open(pdf_path)
221
- # total_pages = doc.page_count
222
- # logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
223
- # except Exception as e:
224
- # report = f"❌ ERROR loading PDF file: {e}"
225
- # return 0, 0, 0, report, time.time() - start_time, []
226
- # t3 = time.time()
227
- # log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
-
229
- # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
230
-
231
- # # 3. Page Processing and Detection Loop
232
- # t4 = time.time()
233
- # for page_num_0_based in range(doc.page_count):
234
- # page_start_time = time.time()
235
- # fitz_page = doc.load_page(page_num_0_based)
236
- # page_num = page_num_0_based + 1
237
-
238
- # # Render page to image for YOLO
239
- # try:
240
- # pix_start = time.time()
241
- # pix = fitz_page.get_pixmap(matrix=mat)
242
- # original_img = pixmap_to_numpy(pix)
243
- # pix_time = time.time() - pix_start
244
- # except Exception as e:
245
- # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
246
- # continue
247
-
248
- # # Core Detection
249
- # detect_start = time.time()
250
- # run_yolo_detection_and_count(original_img, model, page_num)
251
- # detect_time = time.time() - detect_start
252
-
253
- # page_total_time = time.time() - page_start_time
254
- # log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
-
256
- # doc.close()
257
- # t5 = time.time()
258
- # detection_loop_time = t5 - t4
259
- # log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
260
-
261
- # # 4. Final Report Generation
262
- # total_execution_time = t5 - start_time
263
-
264
- # report = (
265
- # f"✅ **YOLO Counting Complete!**\n\n"
266
- # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
267
- # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
268
- # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
269
- # f"---\n"
270
- # f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
271
- # f"### Detailed Step Timing\n"
272
- # f"```\n"
273
- # + "\n".join(log_messages) +
274
- # f"\n```"
275
- # )
276
-
277
- # # Return total_execution_time and an empty list for the gallery output
278
- # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
-
280
-
281
- # # ============================================================================
282
- # # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
- # # ============================================================================
284
-
285
- # # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
- # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
- # """
288
- # Gradio wrapper function to handle file upload and return results (no image handling).
289
- # """
290
- # if pdf_file is None:
291
- # return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
292
-
293
- # pdf_path = pdf_file.name
294
-
295
- # try:
296
- # # Run the core logic
297
- # # Note the change: temp_output_dir is removed, and total_time is returned
298
- # num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
- # pdf_path
300
- # )
301
-
302
- # # Return results (the last item is an empty list for the now-empty gallery)
303
- # return str(num_pages), str(num_equations), str(num_figures), report, []
304
-
305
- # except Exception as e:
306
- # error_msg = f"An unexpected error occurred: {e}"
307
- # logging.error(error_msg, exc_info=True)
308
- # return "Error", "Error", "Error", error_msg, []
309
-
310
-
311
- # # ============================================================================
312
- # # --- GRADIO INTERFACE DEFINITION (Updated) ---
313
- # # ============================================================================
314
-
315
- # if __name__ == "__main__":
316
-
317
- # if not os.path.exists(WEIGHTS_PATH):
318
- # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
319
-
320
- # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
321
-
322
- # # Outputs
323
- # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
324
- # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
325
- # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
- # output_report = gr.Markdown(label="Processing Summary and Timing")
327
-
328
- # # Gradio Gallery is retained but will receive an empty list []
329
- # output_gallery = gr.Gallery(
330
- # label="Detected Equations (Disabled for Speed)",
331
- # columns=5,
332
- # height="auto",
333
- # object_fit="contain",
334
- # allow_preview=False # Disable preview since it's empty
335
- # )
336
-
337
- # interface = gr.Interface(
338
- # fn=gradio_process_pdf,
339
- # inputs=input_file,
340
- # # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
- # outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
- # title="🚀 Optimized YOLO Counting with Timing",
343
- # description=(
344
- # "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
- # "Timing for each step is included in the summary report."
346
- # ),
347
- # )
348
-
349
- # print("\nStarting Gradio application...")
350
- # interface.launch(inbrowser=True)
351
-
352
-
353
-
354
-
355
-
356
-
357
-
358
-
359
-
360
-
361
-
362
-
363
 
364
 
365
 
@@ -544,6 +185,124 @@ def run_yolo_detection_and_count(
544
  return page_equations, page_figures
545
 
546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  # ============================================================================
548
  # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for JSON serialization) ---
549
  # ============================================================================
@@ -553,9 +312,12 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
553
  """
554
  Runs the pipeline, returns counts, report, total time, page counts dict (str keys), and empty list.
555
  """
 
 
556
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
557
  start_time = time.time()
558
  log_messages = []
 
559
 
560
  # Dictionary to store {page_number (int): equation_count (int)}
561
  equation_counts_per_page: Dict[int, int] = {}
@@ -612,7 +374,10 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
612
 
613
  # Core Detection
614
  detect_start = time.time()
615
- page_equations, _ = run_yolo_detection_and_count(original_img, model, page_num)
 
 
 
616
  detect_time = time.time() - detect_start
617
 
618
  # Store the count in the dictionary (INT keys)
@@ -648,7 +413,8 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
648
  )
649
 
650
  # Return the dictionary with string keys
651
- return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page_str_keys, []
 
652
 
653
 
654
  # ============================================================================
@@ -667,12 +433,16 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[str, int], Li
667
 
668
  try:
669
  # Unpack the new return value: equation_counts_per_page (with string keys)
670
- num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, _ = run_single_pdf_preprocessing(
671
- pdf_path
672
- )
 
 
673
 
674
  # Return results (6 items now)
675
- return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, []
 
 
676
 
677
  except Exception as e:
678
  error_msg = f"An unexpected error occurred: {e}"
 
1
 
2
+ import base64
3
+ from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
 
 
185
  return page_equations, page_figures
186
 
187
 
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+ def crop_and_convert_to_base64(image: np.ndarray, bbox: Tuple[float, float, float, float]) -> str:
209
+ """
210
+ Crop bounding box from image and return as base64 string.
211
+ """
212
+ x1, y1, x2, y2 = map(int, bbox)
213
+ h, w, _ = image.shape
214
+
215
+ # Clamp to image bounds
216
+ x1 = max(0, x1)
217
+ y1 = max(0, y1)
218
+ x2 = min(w, x2)
219
+ y2 = min(h, y2)
220
+
221
+ crop = image[y1:y2, x1:x2]
222
+
223
+ # Convert to PNG
224
+ _, buffer = cv2.imencode(".png", crop)
225
+ b64 = base64.b64encode(buffer).decode("utf-8")
226
+
227
+ return f"data:image/png;base64,{b64}"
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+ def run_yolo_detection_and_count(
237
+ image: np.ndarray, model: YOLO, page_num: int
238
+ ) -> Tuple[int, int, List[str]]:
239
+ """
240
+ Runs YOLO inference and returns:
241
+ equations, figures, list of base64 images
242
+ """
243
+ global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
244
+
245
+ yolo_detections = []
246
+ page_equations = 0
247
+ page_figures = 0
248
+ cropped_images_base64 = []
249
+
250
+ try:
251
+ results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
252
+
253
+ if results and results[0].boxes:
254
+ for box in results[0].boxes.data.tolist():
255
+ x1, y1, x2, y2, conf, cls_id = box
256
+ cls_name = model.names[int(cls_id)]
257
+
258
+ if cls_name in TARGET_CLASSES:
259
+ yolo_detections.append({
260
+ 'coords': (x1, y1, x2, y2),
261
+ 'class': cls_name,
262
+ 'conf': conf
263
+ })
264
+ except Exception as e:
265
+ logging.error(f"YOLO inference failed on page {page_num}: {e}")
266
+ return 0, 0, []
267
+
268
+ merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
269
+ final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
270
+
271
+ for det in final_detections:
272
+ bbox = det['coords']
273
+
274
+ if det['class'] == 'figure':
275
+ GLOBAL_FIGURE_COUNT += 1
276
+ page_figures += 1
277
+
278
+ elif det['class'] == 'equation':
279
+ GLOBAL_EQUATION_COUNT += 1
280
+ page_equations += 1
281
+
282
+ # ✅ Only crop equations (you can include figures too if needed)
283
+ b64_img = crop_and_convert_to_base64(image, bbox)
284
+ cropped_images_base64.append(b64_img)
285
+
286
+ logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
287
+ return page_equations, page_figures, cropped_images_base64
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
  # ============================================================================
307
  # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for JSON serialization) ---
308
  # ============================================================================
 
312
  """
313
  Runs the pipeline, returns counts, report, total time, page counts dict (str keys), and empty list.
314
  """
315
+
316
+
317
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
318
  start_time = time.time()
319
  log_messages = []
320
+ all_base64_images: List[str] = []
321
 
322
  # Dictionary to store {page_number (int): equation_count (int)}
323
  equation_counts_per_page: Dict[int, int] = {}
 
374
 
375
  # Core Detection
376
  detect_start = time.time()
377
+ # page_equations, _ = run_yolo_detection_and_count(original_img, model, page_num)
378
+ page_equations, _, page_images = run_yolo_detection_and_count(original_img, model, page_num)
379
+ all_base64_images.extend(page_images)
380
+
381
  detect_time = time.time() - detect_start
382
 
383
  # Store the count in the dictionary (INT keys)
 
413
  )
414
 
415
  # Return the dictionary with string keys
416
+ # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page_str_keys, []
417
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page_str_keys, all_base64_images
418
 
419
 
420
  # ============================================================================
 
433
 
434
  try:
435
  # Unpack the new return value: equation_counts_per_page (with string keys)
436
+ # num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, _ = run_single_pdf_preprocessing(
437
+ # pdf_path
438
+ # )
439
+ num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, images = run_single_pdf_preprocessing(pdf_path)
440
+
441
 
442
  # Return results (6 items now)
443
+ # return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, []
444
+ return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, images
445
+
446
 
447
  except Exception as e:
448
  error_msg = f"An unexpected error occurred: {e}"