heerjtdev commited on
Commit
e1d2112
Β·
verified Β·
1 Parent(s): fff6c9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +400 -31
app.py CHANGED
@@ -2,13 +2,363 @@
2
 
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import fitz # PyMuPDF
6
  import numpy as np
7
  import cv2
8
  import torch
9
  import torch.serialization
10
  import os
11
- import time # Import for timing
12
  from typing import Optional, Tuple, List, Dict, Any
13
  from ultralytics import YOLO
14
  import logging
@@ -35,7 +385,7 @@ logging.basicConfig(level=logging.WARNING)
35
  # ============================================================================
36
 
37
  WEIGHTS_PATH = 'best.pt'
38
- SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
 
40
  # Detection parameters
41
  CONF_THRESHOLD = 0.2
@@ -48,7 +398,7 @@ GLOBAL_FIGURE_COUNT = 0
48
  GLOBAL_EQUATION_COUNT = 0
49
 
50
  # ============================================================================
51
- # --- BOX COMBINATION LOGIC ---
52
  # ============================================================================
53
 
54
  def calculate_iou(box1, box2):
@@ -123,7 +473,6 @@ def merge_overlapping_boxes(detections, iou_threshold):
123
 
124
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
- # This function is retained as it's required to convert PDF page to image for YOLO input.
127
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
  (pix.h, pix.w, pix.n)
129
  )
@@ -136,7 +485,7 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
136
 
137
  def run_yolo_detection_and_count(
138
  image: np.ndarray, model: YOLO, page_num: int
139
- ) -> Tuple[int, int]: # Removed equation_results list from return
140
  """
141
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
  Returns page counts only.
@@ -183,17 +532,20 @@ def run_yolo_detection_and_count(
183
 
184
 
185
  # ============================================================================
186
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
  # ============================================================================
188
 
189
- def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
 
190
  """
191
- Runs the pipeline, returns counts, report, total time, and an empty list
192
- (maintaining the expected return signature for Gradio but with None for gallery).
193
  """
194
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
  start_time = time.time()
196
  log_messages = []
 
 
 
197
 
198
  # Reset globals
199
  GLOBAL_FIGURE_COUNT = 0
@@ -203,14 +555,16 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
203
  t0 = time.time()
204
  if not os.path.exists(pdf_path):
205
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
- return 0, 0, 0, report, time.time() - start_time, []
 
207
 
208
  try:
209
  model = YOLO(WEIGHTS_PATH)
210
  logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
211
  except Exception as e:
212
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
- return 0, 0, 0, report, time.time() - start_time, []
 
214
  t1 = time.time()
215
  log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
 
@@ -222,7 +576,8 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
222
  logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
  except Exception as e:
224
  report = f"❌ ERROR loading PDF file: {e}"
225
- return 0, 0, 0, report, time.time() - start_time, []
 
226
  t3 = time.time()
227
  log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
 
@@ -247,9 +602,12 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
247
 
248
  # Core Detection
249
  detect_start = time.time()
250
- run_yolo_detection_and_count(original_img, model, page_num)
251
  detect_time = time.time() - detect_start
252
 
 
 
 
253
  page_total_time = time.time() - page_start_time
254
  log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
 
@@ -274,38 +632,39 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, flo
274
  f"\n```"
275
  )
276
 
277
- # Return total_execution_time and an empty list for the gallery output
278
- return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
 
280
 
281
  # ============================================================================
282
  # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
  # ============================================================================
284
 
285
- # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
- def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
  """
288
- Gradio wrapper function to handle file upload and return results (no image handling).
289
  """
290
  if pdf_file is None:
291
- return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
 
292
 
293
  pdf_path = pdf_file.name
294
 
295
  try:
296
- # Run the core logic
297
- # Note the change: temp_output_dir is removed, and total_time is returned
298
- num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
  pdf_path
300
  )
301
 
302
- # Return results (the last item is an empty list for the now-empty gallery)
303
- return str(num_pages), str(num_equations), str(num_figures), report, []
304
 
305
  except Exception as e:
306
  error_msg = f"An unexpected error occurred: {e}"
307
  logging.error(error_msg, exc_info=True)
308
- return "Error", "Error", "Error", error_msg, []
 
309
 
310
 
311
  # ============================================================================
@@ -325,24 +684,34 @@ if __name__ == "__main__":
325
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
  output_report = gr.Markdown(label="Processing Summary and Timing")
327
 
 
 
 
328
  # Gradio Gallery is retained but will receive an empty list []
329
  output_gallery = gr.Gallery(
330
  label="Detected Equations (Disabled for Speed)",
331
  columns=5,
332
  height="auto",
333
  object_fit="contain",
334
- allow_preview=False # Disable preview since it's empty
335
  )
336
 
337
  interface = gr.Interface(
338
  fn=gradio_process_pdf,
339
  inputs=input_file,
340
- # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
- outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
- title="πŸš€ Optimized YOLO Counting with Timing",
 
 
 
 
 
 
 
343
  description=(
344
- "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
- "Timing for each step is included in the summary report."
346
  ),
347
  )
348
 
 
2
 
3
 
4
 
5
+ # import fitz # PyMuPDF
6
+ # import numpy as np
7
+ # import cv2
8
+ # import torch
9
+ # import torch.serialization
10
+ # import os
11
+ # import time # Import for timing
12
+ # from typing import Optional, Tuple, List, Dict, Any
13
+ # from ultralytics import YOLO
14
+ # import logging
15
+ # import gradio as gr
16
+ # import shutil
17
+ # import tempfile
18
+ # import io
19
+
20
+ # # ============================================================================
21
+ # # --- Global Patches and Setup ---
22
+ # # ============================================================================
23
+
24
+ # # Patch torch.load to prevent weights_only error with older models
25
+ # _original_torch_load = torch.load
26
+ # def patched_torch_load(*args, **kwargs):
27
+ # kwargs["weights_only"] = False
28
+ # return _original_torch_load(*args, **kwargs)
29
+ # torch.load = patched_torch_load
30
+
31
+ # logging.basicConfig(level=logging.WARNING)
32
+
33
+ # # ============================================================================
34
+ # # --- CONFIGURATION AND CONSTANTS ---
35
+ # # ============================================================================
36
+
37
+ # WEIGHTS_PATH = 'best.pt'
38
+ # SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
+
40
+ # # Detection parameters
41
+ # CONF_THRESHOLD = 0.2
42
+ # TARGET_CLASSES = ['figure', 'equation']
43
+ # IOU_MERGE_THRESHOLD = 0.4
44
+ # IOA_SUPPRESSION_THRESHOLD = 0.7
45
+
46
+ # # Global counters (Reset per run)
47
+ # GLOBAL_FIGURE_COUNT = 0
48
+ # GLOBAL_EQUATION_COUNT = 0
49
+
50
+ # # ============================================================================
51
+ # # --- BOX COMBINATION LOGIC ---
52
+ # # ============================================================================
53
+
54
+ # def calculate_iou(box1, box2):
55
+ # x1_a, y1_a, x2_a, y2_a = box1
56
+ # x1_b, y1_b, x2_b, y2_b = box2
57
+ # x_left = max(x1_a, x1_b)
58
+ # y_top = max(y1_a, y1_b)
59
+ # x_right = min(x2_a, x2_b)
60
+ # y_bottom = min(y2_a, y2_b)
61
+ # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
62
+ # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
63
+ # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
64
+ # union_area = float(box_a_area + box_b_area - intersection_area)
65
+ # return intersection_area / union_area if union_area > 0 else 0
66
+
67
+
68
+ # def filter_nested_boxes(detections, ioa_threshold=0.80):
69
+ # if not detections: return []
70
+ # for d in detections:
71
+ # x1, y1, x2, y2 = d['coords']
72
+ # d['area'] = (x2 - x1) * (y2 - y1)
73
+ # detections.sort(key=lambda x: x['area'], reverse=True)
74
+ # keep_indices = []
75
+ # is_suppressed = [False] * len(detections)
76
+ # for i in range(len(detections)):
77
+ # if is_suppressed[i]: continue
78
+ # keep_indices.append(i)
79
+ # box_a = detections[i]['coords']
80
+ # for j in range(i + 1, len(detections)):
81
+ # if is_suppressed[j]: continue
82
+ # box_b = detections[j]['coords']
83
+ # x_left = max(box_a[0], box_b[0])
84
+ # y_top = max(box_a[1], box_b[1])
85
+ # x_right = min(box_a[2], box_b[2])
86
+ # y_bottom = min(box_a[3], box_b[3])
87
+ # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
88
+ # area_b = detections[j]['area']
89
+ # if area_b > 0 and intersection / area_b > ioa_threshold:
90
+ # is_suppressed[j] = True
91
+ # return [detections[i] for i in keep_indices]
92
+
93
+
94
+ # def merge_overlapping_boxes(detections, iou_threshold):
95
+ # if not detections: return []
96
+ # detections.sort(key=lambda d: d['conf'], reverse=True)
97
+ # merged_detections = []
98
+ # is_merged = [False] * len(detections)
99
+ # for i in range(len(detections)):
100
+ # if is_merged[i]: continue
101
+ # current_box = detections[i]['coords']
102
+ # current_class = detections[i]['class']
103
+ # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
104
+ # for j in range(i + 1, len(detections)):
105
+ # if is_merged[j] or detections[j]['class'] != current_class: continue
106
+ # other_box = detections[j]['coords']
107
+ # iou = calculate_iou(current_box, other_box)
108
+ # if iou > iou_threshold:
109
+ # merged_x1 = min(merged_x1, other_box[0])
110
+ # merged_y1 = min(merged_y1, other_box[1])
111
+ # merged_x2 = max(merged_x2, other_box[2])
112
+ # merged_y2 = max(merged_y2, other_box[3])
113
+ # is_merged[j] = True
114
+ # merged_detections.append({
115
+ # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
116
+ # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
117
+ # })
118
+ # return merged_detections
119
+
120
+ # # ============================================================================
121
+ # # --- UTILITY FUNCTIONS ---
122
+ # # ============================================================================
123
+
124
+ # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
+ # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
+ # # This function is retained as it's required to convert PDF page to image for YOLO input.
127
+ # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
+ # (pix.h, pix.w, pix.n)
129
+ # )
130
+ # if pix.n == 4:
131
+ # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
132
+ # elif pix.n == 1:
133
+ # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
134
+ # return img
135
+
136
+
137
+ # def run_yolo_detection_and_count(
138
+ # image: np.ndarray, model: YOLO, page_num: int
139
+ # ) -> Tuple[int, int]: # Removed equation_results list from return
140
+ # """
141
+ # Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
+ # Returns page counts only.
143
+ # """
144
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
145
+
146
+ # yolo_detections = []
147
+ # page_equations = 0
148
+ # page_figures = 0
149
+
150
+ # try:
151
+ # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
152
+
153
+ # if results and results[0].boxes:
154
+ # for box in results[0].boxes.data.tolist():
155
+ # x1, y1, x2, y2, conf, cls_id = box
156
+ # cls_name = model.names[int(cls_id)]
157
+
158
+ # if cls_name in TARGET_CLASSES:
159
+ # yolo_detections.append({
160
+ # 'coords': (x1, y1, x2, y2),
161
+ # 'class': cls_name,
162
+ # 'conf': conf
163
+ # })
164
+ # except Exception as e:
165
+ # logging.error(f"YOLO inference failed on page {page_num}: {e}")
166
+ # return 0, 0
167
+
168
+ # # Apply NMS/Merging/Filtering
169
+ # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
170
+ # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
171
+
172
+ # # Update Global Counters
173
+ # for det in final_detections:
174
+ # if det['class'] == 'figure':
175
+ # GLOBAL_FIGURE_COUNT += 1
176
+ # page_figures += 1
177
+ # elif det['class'] == 'equation':
178
+ # GLOBAL_EQUATION_COUNT += 1
179
+ # page_equations += 1
180
+
181
+ # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
182
+ # return page_equations, page_figures
183
+
184
+
185
+ # # ============================================================================
186
+ # # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
+ # # ============================================================================
188
+
189
+ # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
190
+ # """
191
+ # Runs the pipeline, returns counts, report, total time, and an empty list
192
+ # (maintaining the expected return signature for Gradio but with None for gallery).
193
+ # """
194
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
+ # start_time = time.time()
196
+ # log_messages = []
197
+
198
+ # # Reset globals
199
+ # GLOBAL_FIGURE_COUNT = 0
200
+ # GLOBAL_EQUATION_COUNT = 0
201
+
202
+ # # 1. Validation and Model Loading
203
+ # t0 = time.time()
204
+ # if not os.path.exists(pdf_path):
205
+ # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
+ # return 0, 0, 0, report, time.time() - start_time, []
207
+
208
+ # try:
209
+ # model = YOLO(WEIGHTS_PATH)
210
+ # logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
211
+ # except Exception as e:
212
+ # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
+ # return 0, 0, 0, report, time.time() - start_time, []
214
+ # t1 = time.time()
215
+ # log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
+
217
+ # # 2. PDF Loading
218
+ # t2 = time.time()
219
+ # try:
220
+ # doc = fitz.open(pdf_path)
221
+ # total_pages = doc.page_count
222
+ # logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
+ # except Exception as e:
224
+ # report = f"❌ ERROR loading PDF file: {e}"
225
+ # return 0, 0, 0, report, time.time() - start_time, []
226
+ # t3 = time.time()
227
+ # log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
+
229
+ # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
230
+
231
+ # # 3. Page Processing and Detection Loop
232
+ # t4 = time.time()
233
+ # for page_num_0_based in range(doc.page_count):
234
+ # page_start_time = time.time()
235
+ # fitz_page = doc.load_page(page_num_0_based)
236
+ # page_num = page_num_0_based + 1
237
+
238
+ # # Render page to image for YOLO
239
+ # try:
240
+ # pix_start = time.time()
241
+ # pix = fitz_page.get_pixmap(matrix=mat)
242
+ # original_img = pixmap_to_numpy(pix)
243
+ # pix_time = time.time() - pix_start
244
+ # except Exception as e:
245
+ # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
246
+ # continue
247
+
248
+ # # Core Detection
249
+ # detect_start = time.time()
250
+ # run_yolo_detection_and_count(original_img, model, page_num)
251
+ # detect_time = time.time() - detect_start
252
+
253
+ # page_total_time = time.time() - page_start_time
254
+ # log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
+
256
+ # doc.close()
257
+ # t5 = time.time()
258
+ # detection_loop_time = t5 - t4
259
+ # log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
260
+
261
+ # # 4. Final Report Generation
262
+ # total_execution_time = t5 - start_time
263
+
264
+ # report = (
265
+ # f"βœ… **YOLO Counting Complete!**\n\n"
266
+ # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
267
+ # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
268
+ # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
269
+ # f"---\n"
270
+ # f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
271
+ # f"### Detailed Step Timing\n"
272
+ # f"```\n"
273
+ # + "\n".join(log_messages) +
274
+ # f"\n```"
275
+ # )
276
+
277
+ # # Return total_execution_time and an empty list for the gallery output
278
+ # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
+
280
+
281
+ # # ============================================================================
282
+ # # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
+ # # ============================================================================
284
+
285
+ # # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
+ # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
+ # """
288
+ # Gradio wrapper function to handle file upload and return results (no image handling).
289
+ # """
290
+ # if pdf_file is None:
291
+ # return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
292
+
293
+ # pdf_path = pdf_file.name
294
+
295
+ # try:
296
+ # # Run the core logic
297
+ # # Note the change: temp_output_dir is removed, and total_time is returned
298
+ # num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
+ # pdf_path
300
+ # )
301
+
302
+ # # Return results (the last item is an empty list for the now-empty gallery)
303
+ # return str(num_pages), str(num_equations), str(num_figures), report, []
304
+
305
+ # except Exception as e:
306
+ # error_msg = f"An unexpected error occurred: {e}"
307
+ # logging.error(error_msg, exc_info=True)
308
+ # return "Error", "Error", "Error", error_msg, []
309
+
310
+
311
+ # # ============================================================================
312
+ # # --- GRADIO INTERFACE DEFINITION (Updated) ---
313
+ # # ============================================================================
314
+
315
+ # if __name__ == "__main__":
316
+
317
+ # if not os.path.exists(WEIGHTS_PATH):
318
+ # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
319
+
320
+ # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
321
+
322
+ # # Outputs
323
+ # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
324
+ # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
325
+ # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
+ # output_report = gr.Markdown(label="Processing Summary and Timing")
327
+
328
+ # # Gradio Gallery is retained but will receive an empty list []
329
+ # output_gallery = gr.Gallery(
330
+ # label="Detected Equations (Disabled for Speed)",
331
+ # columns=5,
332
+ # height="auto",
333
+ # object_fit="contain",
334
+ # allow_preview=False # Disable preview since it's empty
335
+ # )
336
+
337
+ # interface = gr.Interface(
338
+ # fn=gradio_process_pdf,
339
+ # inputs=input_file,
340
+ # # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
+ # outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
+ # title="πŸš€ Optimized YOLO Counting with Timing",
343
+ # description=(
344
+ # "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
+ # "Timing for each step is included in the summary report."
346
+ # ),
347
+ # )
348
+
349
+ # print("\nStarting Gradio application...")
350
+ # interface.launch(inbrowser=True)
351
+
352
+
353
+
354
+
355
  import fitz # PyMuPDF
356
  import numpy as np
357
  import cv2
358
  import torch
359
  import torch.serialization
360
  import os
361
+ import time
362
  from typing import Optional, Tuple, List, Dict, Any
363
  from ultralytics import YOLO
364
  import logging
 
385
  # ============================================================================
386
 
387
  WEIGHTS_PATH = 'best.pt'
388
+ SCALE_FACTOR = 2.0
389
 
390
  # Detection parameters
391
  CONF_THRESHOLD = 0.2
 
398
  GLOBAL_EQUATION_COUNT = 0
399
 
400
  # ============================================================================
401
+ # --- BOX COMBINATION LOGIC (Retained for detection accuracy) ---
402
  # ============================================================================
403
 
404
  def calculate_iou(box1, box2):
 
473
 
474
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
475
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
 
476
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
477
  (pix.h, pix.w, pix.n)
478
  )
 
485
 
486
  def run_yolo_detection_and_count(
487
  image: np.ndarray, model: YOLO, page_num: int
488
+ ) -> Tuple[int, int]:
489
  """
490
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
491
  Returns page counts only.
 
532
 
533
 
534
  # ============================================================================
535
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Revised for Dict Return) ---
536
  # ============================================================================
537
 
538
+ # NOTE: The return signature now includes the equation_counts_per_page dictionary
539
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, Dict[int, int], List[str]]:
540
  """
541
+ Runs the pipeline, returns counts, report, total time, page counts dict, and empty list.
 
542
  """
543
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
544
  start_time = time.time()
545
  log_messages = []
546
+
547
+ # NEW: Dictionary to store {page_number: equation_count}
548
+ equation_counts_per_page: Dict[int, int] = {}
549
 
550
  # Reset globals
551
  GLOBAL_FIGURE_COUNT = 0
 
555
  t0 = time.time()
556
  if not os.path.exists(pdf_path):
557
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
558
+ # Add the new return value (empty dict)
559
+ return 0, 0, 0, report, time.time() - start_time, {}, []
560
 
561
  try:
562
  model = YOLO(WEIGHTS_PATH)
563
  logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
564
  except Exception as e:
565
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
566
+ # Add the new return value (empty dict)
567
+ return 0, 0, 0, report, time.time() - start_time, {}, []
568
  t1 = time.time()
569
  log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
570
 
 
576
  logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
577
  except Exception as e:
578
  report = f"❌ ERROR loading PDF file: {e}"
579
+ # Add the new return value (empty dict)
580
+ return 0, 0, 0, report, time.time() - start_time, {}, []
581
  t3 = time.time()
582
  log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
583
 
 
602
 
603
  # Core Detection
604
  detect_start = time.time()
605
+ page_equations, _ = run_yolo_detection_and_count(original_img, model, page_num)
606
  detect_time = time.time() - detect_start
607
 
608
+ # NEW: Store the count in the dictionary
609
+ equation_counts_per_page[page_num] = page_equations
610
+
611
  page_total_time = time.time() - page_start_time
612
  log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
613
 
 
632
  f"\n```"
633
  )
634
 
635
+ # Return the new dictionary as the sixth element, and an empty list as the seventh
636
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, equation_counts_per_page, []
637
 
638
 
639
  # ============================================================================
640
  # --- GRADIO INTERFACE FUNCTION (Updated) ---
641
  # ============================================================================
642
 
643
+ # NOTE: The return signature now includes the equation_counts_per_page dictionary (Dict[int, int])
644
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, Dict[int, int], List[str]]:
645
  """
646
+ Gradio wrapper function to handle file upload and return results.
647
  """
648
  if pdf_file is None:
649
+ # Return an empty dict for the new JSON output
650
+ return "N/A", "N/A", "N/A", "Please upload a PDF file.", {}, []
651
 
652
  pdf_path = pdf_file.name
653
 
654
  try:
655
+ # Unpack the new return value: equation_counts_per_page
656
+ num_pages, num_equations, num_figures, report, total_time, equation_counts_per_page, _ = run_single_pdf_preprocessing(
 
657
  pdf_path
658
  )
659
 
660
+ # Return results (5 items now: 3 textboxes, 1 markdown, 1 JSON, 1 empty list for gallery)
661
+ return str(num_pages), str(num_equations), str(num_figures), report, equation_counts_per_page, []
662
 
663
  except Exception as e:
664
  error_msg = f"An unexpected error occurred: {e}"
665
  logging.error(error_msg, exc_info=True)
666
+ # Return an empty dict for the new JSON output on error
667
+ return "Error", "Error", "Error", error_msg, {}, []
668
 
669
 
670
  # ============================================================================
 
684
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
685
  output_report = gr.Markdown(label="Processing Summary and Timing")
686
 
687
+ # NEW OUTPUT: JSON component for structured data
688
+ output_page_counts = gr.JSON(label="Equation Count Per Page (Dictionary)")
689
+
690
  # Gradio Gallery is retained but will receive an empty list []
691
  output_gallery = gr.Gallery(
692
  label="Detected Equations (Disabled for Speed)",
693
  columns=5,
694
  height="auto",
695
  object_fit="contain",
696
+ allow_preview=False
697
  )
698
 
699
  interface = gr.Interface(
700
  fn=gradio_process_pdf,
701
  inputs=input_file,
702
+ # Outputs now include the JSON component
703
+ outputs=[
704
+ output_pages,
705
+ output_equations,
706
+ output_figures,
707
+ output_report,
708
+ output_page_counts, # New
709
+ output_gallery
710
+ ],
711
+ title="πŸ“Š YOLO Counting with Per-Page Data & Timing",
712
  description=(
713
+ "Upload a PDF to run YOLO detection. The results include total counts, a breakdown of "
714
+ "equation counts per page (in JSON format), and detailed timing."
715
  ),
716
  )
717