heerjtdev commited on
Commit
8d96f17
Β·
verified Β·
1 Parent(s): 4c1e812

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +442 -89
app.py CHANGED
@@ -1,9 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import fitz # PyMuPDF
2
  import numpy as np
3
  import cv2
4
  import torch
5
  import torch.serialization
6
  import os
 
7
  from typing import Optional, Tuple, List, Dict, Any
8
  from ultralytics import YOLO
9
  import logging
@@ -118,6 +490,7 @@ def merge_overlapping_boxes(detections, iou_threshold):
118
 
119
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
120
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
 
121
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
122
  (pix.h, pix.w, pix.n)
123
  )
@@ -130,10 +503,10 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
130
 
131
  def run_yolo_detection_and_count(
132
  image: np.ndarray, model: YOLO, page_num: int
133
- ) -> Tuple[int, int, List[Dict[str, Any]]]:
134
  """
135
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
136
- Returns counts AND a list of equation detection results (PDF coordinates).
137
  """
138
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
139
 
@@ -157,174 +530,153 @@ def run_yolo_detection_and_count(
157
  })
158
  except Exception as e:
159
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
- return 0, 0, []
161
 
162
  # Apply NMS/Merging/Filtering
163
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
 
166
- equation_results = []
167
-
168
  # Update Global Counters
169
  for det in final_detections:
170
- # Scale coordinates back to the original PDF space (points)
171
- x1_pix, y1_pix, x2_pix, y2_pix = det['coords']
172
- x1_pdf = x1_pix / SCALE_FACTOR
173
- y1_pdf = y1_pix / SCALE_FACTOR
174
- x2_pdf = x2_pix / SCALE_FACTOR
175
- y2_pdf = y2_pix / SCALE_FACTOR
176
-
177
  if det['class'] == 'figure':
178
  GLOBAL_FIGURE_COUNT += 1
179
  page_figures += 1
180
  elif det['class'] == 'equation':
181
  GLOBAL_EQUATION_COUNT += 1
182
  page_equations += 1
183
- equation_results.append({
184
- 'page': page_num,
185
- 'bbox_pdf': (x1_pdf, y1_pdf, x2_pdf, y2_pdf) # Coordinates in PDF space
186
- })
187
 
188
  logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
189
- return page_equations, page_figures, equation_results
190
 
191
 
192
  # ============================================================================
193
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for unique filenames) ---
194
  # ============================================================================
195
 
196
- def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[int, int, int, str, List[str]]:
197
  """
198
- Runs the pipeline, returns counts, report, and a list of paths to cropped equation images.
 
199
  """
200
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
 
 
201
 
202
  # Reset globals
203
  GLOBAL_FIGURE_COUNT = 0
204
  GLOBAL_EQUATION_COUNT = 0
205
 
 
 
206
  if not os.path.exists(pdf_path):
207
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
208
- return 0, 0, 0, report, []
209
 
210
- # Model Loading
211
  try:
212
  model = YOLO(WEIGHTS_PATH)
213
  logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
214
  except Exception as e:
215
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
216
- return 0, 0, 0, report, []
217
-
218
- # PDF Loading
 
 
 
219
  try:
220
  doc = fitz.open(pdf_path)
221
  total_pages = doc.page_count
222
  logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
  except Exception as e:
224
  report = f"❌ ERROR loading PDF file: {e}"
225
- return 0, 0, 0, report, []
 
 
226
 
227
  mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
228
- all_equation_images = [] # Stores file paths (strings) for Gradio gallery
229
-
230
- # NEW LOCAL COUNTER: Tracks total equations processed for unique filename creation
231
- equation_save_count = 0
232
 
 
 
233
  for page_num_0_based in range(doc.page_count):
 
234
  fitz_page = doc.load_page(page_num_0_based)
235
  page_num = page_num_0_based + 1
236
 
 
237
  try:
 
238
  pix = fitz_page.get_pixmap(matrix=mat)
239
  original_img = pixmap_to_numpy(pix)
 
240
  except Exception as e:
241
  logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
242
  continue
243
-
244
- # Core Detection, Counting, and Equation Result Collection
245
- _, _, equation_results_page = run_yolo_detection_and_count(
246
- original_img, model, page_num
247
- )
248
 
249
- # --- Image Cropping and Saving for Debugging ---
250
- for eq in equation_results_page:
251
- bbox = eq['bbox_pdf']
252
-
253
- try:
254
- # Fixed Rect object creation
255
- rect = fitz.Rect(bbox)
256
- clip_rect = rect + (0, 0, 5, 5) # Add small padding
257
-
258
- # Get the pixmap for the cropped area (high-res render)
259
- eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
260
-
261
- # Save to a temporary file path
262
- img_bytes = eq_pix.tobytes("png")
263
-
264
- # FIX APPLIED: Increment and use local counter for unique filename
265
- equation_save_count += 1
266
- filename = f"eq_{equation_save_count}_p{page_num}.png"
267
-
268
- output_path = os.path.join(temp_output_dir, filename)
269
-
270
- with open(output_path, 'wb') as f:
271
- f.write(img_bytes)
272
-
273
- all_equation_images.append(output_path)
274
-
275
- except Exception as e:
276
- logging.error(f"Error cropping equation on page {page_num} with bbox {bbox}: {e}")
277
-
278
  doc.close()
 
 
 
279
 
280
- # Final Report Generation (GLOBAL_EQUATION_COUNT is correct here)
 
 
281
  report = (
282
  f"βœ… **YOLO Counting Complete!**\n\n"
283
  f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
284
  f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
285
- f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
 
 
 
 
 
 
286
  )
287
 
288
- # Return the list of file paths (strings)
289
- return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
290
 
291
 
292
  # ============================================================================
293
- # --- GRADIO INTERFACE FUNCTION ---
294
  # ============================================================================
295
 
 
296
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
297
  """
298
- Gradio wrapper function to handle file upload, manage temporary directory, and return file paths.
299
  """
300
  if pdf_file is None:
301
  return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
302
 
303
  pdf_path = pdf_file.name
304
- temp_output_dir = tempfile.mkdtemp() # Create temp directory
305
 
306
  try:
307
- # Run the core logic, passing the temp directory
308
- num_pages, num_equations, num_figures, report, equation_images = run_single_pdf_preprocessing(
309
- pdf_path, temp_output_dir
 
310
  )
311
 
312
- # Return results and the list of image file paths
313
- return str(num_pages), str(num_equations), str(num_figures), report, equation_images
314
 
315
  except Exception as e:
316
  error_msg = f"An unexpected error occurred: {e}"
317
  logging.error(error_msg, exc_info=True)
318
- # Still clean up in case of a hard error
319
- shutil.rmtree(temp_output_dir, ignore_errors=True)
320
  return "Error", "Error", "Error", error_msg, []
321
-
322
- # NOTE: The final cleanup block for success case is intentionally removed
323
- # to prevent files from being deleted before Gradio can serve them.
324
 
325
 
326
  # ============================================================================
327
- # --- GRADIO INTERFACE DEFINITION ---
328
  # ============================================================================
329
 
330
  if __name__ == "__main__":
@@ -338,25 +690,26 @@ if __name__ == "__main__":
338
  output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
339
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
340
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
341
- output_report = gr.Markdown(label="Processing Summary")
342
 
343
- # Gradio Gallery expects a list of file paths (strings)
344
  output_gallery = gr.Gallery(
345
- label="Detected Equations for Debugging",
346
  columns=5,
347
  height="auto",
348
  object_fit="contain",
349
- allow_preview=True
350
  )
351
 
352
  interface = gr.Interface(
353
  fn=gradio_process_pdf,
354
  inputs=input_file,
 
355
  outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
356
- title="🎯 Minimalist YOLO Counting & Equation Debugger",
357
  description=(
358
- "Upload a PDF to run YOLO detection using your **`best.pt`** model. "
359
- "The counts are displayed, and a gallery of **all detected equation images** is shown for debugging."
360
  ),
361
  )
362
 
 
1
+ # import fitz # PyMuPDF
2
+ # import numpy as np
3
+ # import cv2
4
+ # import torch
5
+ # import torch.serialization
6
+ # import os
7
+ # from typing import Optional, Tuple, List, Dict, Any
8
+ # from ultralytics import YOLO
9
+ # import logging
10
+ # import gradio as gr
11
+ # import shutil
12
+ # import tempfile
13
+ # import io
14
+
15
+ # # ============================================================================
16
+ # # --- Global Patches and Setup ---
17
+ # # ============================================================================
18
+
19
+ # # Patch torch.load to prevent weights_only error with older models
20
+ # _original_torch_load = torch.load
21
+ # def patched_torch_load(*args, **kwargs):
22
+ # kwargs["weights_only"] = False
23
+ # return _original_torch_load(*args, **kwargs)
24
+ # torch.load = patched_torch_load
25
+
26
+ # logging.basicConfig(level=logging.WARNING)
27
+
28
+ # # ============================================================================
29
+ # # --- CONFIGURATION AND CONSTANTS ---
30
+ # # ============================================================================
31
+
32
+ # WEIGHTS_PATH = 'best.pt'
33
+ # SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
34
+
35
+ # # Detection parameters
36
+ # CONF_THRESHOLD = 0.2
37
+ # TARGET_CLASSES = ['figure', 'equation']
38
+ # IOU_MERGE_THRESHOLD = 0.4
39
+ # IOA_SUPPRESSION_THRESHOLD = 0.7
40
+
41
+ # # Global counters (Reset per run)
42
+ # GLOBAL_FIGURE_COUNT = 0
43
+ # GLOBAL_EQUATION_COUNT = 0
44
+
45
+ # # ============================================================================
46
+ # # --- BOX COMBINATION LOGIC ---
47
+ # # ============================================================================
48
+
49
+ # def calculate_iou(box1, box2):
50
+ # x1_a, y1_a, x2_a, y2_a = box1
51
+ # x1_b, y1_b, x2_b, y2_b = box2
52
+ # x_left = max(x1_a, x1_b)
53
+ # y_top = max(y1_a, y1_b)
54
+ # x_right = min(x2_a, x2_b)
55
+ # y_bottom = min(y2_a, y2_b)
56
+ # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
57
+ # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
58
+ # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
59
+ # union_area = float(box_a_area + box_b_area - intersection_area)
60
+ # return intersection_area / union_area if union_area > 0 else 0
61
+
62
+
63
+ # def filter_nested_boxes(detections, ioa_threshold=0.80):
64
+ # if not detections: return []
65
+ # for d in detections:
66
+ # x1, y1, x2, y2 = d['coords']
67
+ # d['area'] = (x2 - x1) * (y2 - y1)
68
+ # detections.sort(key=lambda x: x['area'], reverse=True)
69
+ # keep_indices = []
70
+ # is_suppressed = [False] * len(detections)
71
+ # for i in range(len(detections)):
72
+ # if is_suppressed[i]: continue
73
+ # keep_indices.append(i)
74
+ # box_a = detections[i]['coords']
75
+ # for j in range(i + 1, len(detections)):
76
+ # if is_suppressed[j]: continue
77
+ # box_b = detections[j]['coords']
78
+ # x_left = max(box_a[0], box_b[0])
79
+ # y_top = max(box_a[1], box_b[1])
80
+ # x_right = min(box_a[2], box_b[2])
81
+ # y_bottom = min(box_a[3], box_b[3])
82
+ # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
83
+ # area_b = detections[j]['area']
84
+ # if area_b > 0 and intersection / area_b > ioa_threshold:
85
+ # is_suppressed[j] = True
86
+ # return [detections[i] for i in keep_indices]
87
+
88
+
89
+ # def merge_overlapping_boxes(detections, iou_threshold):
90
+ # if not detections: return []
91
+ # detections.sort(key=lambda d: d['conf'], reverse=True)
92
+ # merged_detections = []
93
+ # is_merged = [False] * len(detections)
94
+ # for i in range(len(detections)):
95
+ # if is_merged[i]: continue
96
+ # current_box = detections[i]['coords']
97
+ # current_class = detections[i]['class']
98
+ # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
99
+ # for j in range(i + 1, len(detections)):
100
+ # if is_merged[j] or detections[j]['class'] != current_class: continue
101
+ # other_box = detections[j]['coords']
102
+ # iou = calculate_iou(current_box, other_box)
103
+ # if iou > iou_threshold:
104
+ # merged_x1 = min(merged_x1, other_box[0])
105
+ # merged_y1 = min(merged_y1, other_box[1])
106
+ # merged_x2 = max(merged_x2, other_box[2])
107
+ # merged_y2 = max(merged_y2, other_box[3])
108
+ # is_merged[j] = True
109
+ # merged_detections.append({
110
+ # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
111
+ # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
112
+ # })
113
+ # return merged_detections
114
+
115
+ # # ============================================================================
116
+ # # --- UTILITY FUNCTIONS ---
117
+ # # ============================================================================
118
+
119
+ # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
120
+ # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
121
+ # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
122
+ # (pix.h, pix.w, pix.n)
123
+ # )
124
+ # if pix.n == 4:
125
+ # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
126
+ # elif pix.n == 1:
127
+ # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
128
+ # return img
129
+
130
+
131
+ # def run_yolo_detection_and_count(
132
+ # image: np.ndarray, model: YOLO, page_num: int
133
+ # ) -> Tuple[int, int, List[Dict[str, Any]]]:
134
+ # """
135
+ # Runs YOLO inference, applies NMS/filtering, and updates global counters.
136
+ # Returns counts AND a list of equation detection results (PDF coordinates).
137
+ # """
138
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
139
+
140
+ # yolo_detections = []
141
+ # page_equations = 0
142
+ # page_figures = 0
143
+
144
+ # try:
145
+ # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
+
147
+ # if results and results[0].boxes:
148
+ # for box in results[0].boxes.data.tolist():
149
+ # x1, y1, x2, y2, conf, cls_id = box
150
+ # cls_name = model.names[int(cls_id)]
151
+
152
+ # if cls_name in TARGET_CLASSES:
153
+ # yolo_detections.append({
154
+ # 'coords': (x1, y1, x2, y2),
155
+ # 'class': cls_name,
156
+ # 'conf': conf
157
+ # })
158
+ # except Exception as e:
159
+ # logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
+ # return 0, 0, []
161
+
162
+ # # Apply NMS/Merging/Filtering
163
+ # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
+ # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
+
166
+ # equation_results = []
167
+
168
+ # # Update Global Counters
169
+ # for det in final_detections:
170
+ # # Scale coordinates back to the original PDF space (points)
171
+ # x1_pix, y1_pix, x2_pix, y2_pix = det['coords']
172
+ # x1_pdf = x1_pix / SCALE_FACTOR
173
+ # y1_pdf = y1_pix / SCALE_FACTOR
174
+ # x2_pdf = x2_pix / SCALE_FACTOR
175
+ # y2_pdf = y2_pix / SCALE_FACTOR
176
+
177
+ # if det['class'] == 'figure':
178
+ # GLOBAL_FIGURE_COUNT += 1
179
+ # page_figures += 1
180
+ # elif det['class'] == 'equation':
181
+ # GLOBAL_EQUATION_COUNT += 1
182
+ # page_equations += 1
183
+ # equation_results.append({
184
+ # 'page': page_num,
185
+ # 'bbox_pdf': (x1_pdf, y1_pdf, x2_pdf, y2_pdf) # Coordinates in PDF space
186
+ # })
187
+
188
+ # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
189
+ # return page_equations, page_figures, equation_results
190
+
191
+
192
+ # # ============================================================================
193
+ # # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for unique filenames) ---
194
+ # # ============================================================================
195
+
196
+ # def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[int, int, int, str, List[str]]:
197
+ # """
198
+ # Runs the pipeline, returns counts, report, and a list of paths to cropped equation images.
199
+ # """
200
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
201
+
202
+ # # Reset globals
203
+ # GLOBAL_FIGURE_COUNT = 0
204
+ # GLOBAL_EQUATION_COUNT = 0
205
+
206
+ # if not os.path.exists(pdf_path):
207
+ # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
208
+ # return 0, 0, 0, report, []
209
+
210
+ # # Model Loading
211
+ # try:
212
+ # model = YOLO(WEIGHTS_PATH)
213
+ # logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
214
+ # except Exception as e:
215
+ # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
216
+ # return 0, 0, 0, report, []
217
+
218
+ # # PDF Loading
219
+ # try:
220
+ # doc = fitz.open(pdf_path)
221
+ # total_pages = doc.page_count
222
+ # logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
+ # except Exception as e:
224
+ # report = f"❌ ERROR loading PDF file: {e}"
225
+ # return 0, 0, 0, report, []
226
+
227
+ # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
228
+ # all_equation_images = [] # Stores file paths (strings) for Gradio gallery
229
+
230
+ # # NEW LOCAL COUNTER: Tracks total equations processed for unique filename creation
231
+ # equation_save_count = 0
232
+
233
+ # for page_num_0_based in range(doc.page_count):
234
+ # fitz_page = doc.load_page(page_num_0_based)
235
+ # page_num = page_num_0_based + 1
236
+
237
+ # try:
238
+ # pix = fitz_page.get_pixmap(matrix=mat)
239
+ # original_img = pixmap_to_numpy(pix)
240
+ # except Exception as e:
241
+ # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
242
+ # continue
243
+
244
+ # # Core Detection, Counting, and Equation Result Collection
245
+ # _, _, equation_results_page = run_yolo_detection_and_count(
246
+ # original_img, model, page_num
247
+ # )
248
+
249
+ # # --- Image Cropping and Saving for Debugging ---
250
+ # for eq in equation_results_page:
251
+ # bbox = eq['bbox_pdf']
252
+
253
+ # try:
254
+ # # Fixed Rect object creation
255
+ # rect = fitz.Rect(bbox)
256
+ # clip_rect = rect + (0, 0, 5, 5) # Add small padding
257
+
258
+ # # Get the pixmap for the cropped area (high-res render)
259
+ # eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
260
+
261
+ # # Save to a temporary file path
262
+ # img_bytes = eq_pix.tobytes("png")
263
+
264
+ # # FIX APPLIED: Increment and use local counter for unique filename
265
+ # equation_save_count += 1
266
+ # filename = f"eq_{equation_save_count}_p{page_num}.png"
267
+
268
+ # output_path = os.path.join(temp_output_dir, filename)
269
+
270
+ # with open(output_path, 'wb') as f:
271
+ # f.write(img_bytes)
272
+
273
+ # all_equation_images.append(output_path)
274
+
275
+ # except Exception as e:
276
+ # logging.error(f"Error cropping equation on page {page_num} with bbox {bbox}: {e}")
277
+
278
+ # doc.close()
279
+
280
+ # # Final Report Generation (GLOBAL_EQUATION_COUNT is correct here)
281
+ # report = (
282
+ # f"βœ… **YOLO Counting Complete!**\n\n"
283
+ # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
284
+ # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
285
+ # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
286
+ # )
287
+
288
+ # # Return the list of file paths (strings)
289
+ # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
290
+
291
+
292
+ # # ============================================================================
293
+ # # --- GRADIO INTERFACE FUNCTION ---
294
+ # # ============================================================================
295
+
296
+ # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
297
+ # """
298
+ # Gradio wrapper function to handle file upload, manage temporary directory, and return file paths.
299
+ # """
300
+ # if pdf_file is None:
301
+ # return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
302
+
303
+ # pdf_path = pdf_file.name
304
+ # temp_output_dir = tempfile.mkdtemp() # Create temp directory
305
+
306
+ # try:
307
+ # # Run the core logic, passing the temp directory
308
+ # num_pages, num_equations, num_figures, report, equation_images = run_single_pdf_preprocessing(
309
+ # pdf_path, temp_output_dir
310
+ # )
311
+
312
+ # # Return results and the list of image file paths
313
+ # return str(num_pages), str(num_equations), str(num_figures), report, equation_images
314
+
315
+ # except Exception as e:
316
+ # error_msg = f"An unexpected error occurred: {e}"
317
+ # logging.error(error_msg, exc_info=True)
318
+ # # Still clean up in case of a hard error
319
+ # shutil.rmtree(temp_output_dir, ignore_errors=True)
320
+ # return "Error", "Error", "Error", error_msg, []
321
+
322
+ # # NOTE: The final cleanup block for success case is intentionally removed
323
+ # # to prevent files from being deleted before Gradio can serve them.
324
+
325
+
326
+ # # ============================================================================
327
+ # # --- GRADIO INTERFACE DEFINITION ---
328
+ # # ============================================================================
329
+
330
+ # if __name__ == "__main__":
331
+
332
+ # if not os.path.exists(WEIGHTS_PATH):
333
+ # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
334
+
335
+ # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
336
+
337
+ # # Outputs
338
+ # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
339
+ # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
340
+ # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
341
+ # output_report = gr.Markdown(label="Processing Summary")
342
+
343
+ # # Gradio Gallery expects a list of file paths (strings)
344
+ # output_gallery = gr.Gallery(
345
+ # label="Detected Equations for Debugging",
346
+ # columns=5,
347
+ # height="auto",
348
+ # object_fit="contain",
349
+ # allow_preview=True
350
+ # )
351
+
352
+ # interface = gr.Interface(
353
+ # fn=gradio_process_pdf,
354
+ # inputs=input_file,
355
+ # outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
356
+ # title="🎯 Minimalist YOLO Counting & Equation Debugger",
357
+ # description=(
358
+ # "Upload a PDF to run YOLO detection using your **`best.pt`** model. "
359
+ # "The counts are displayed, and a gallery of **all detected equation images** is shown for debugging."
360
+ # ),
361
+ # )
362
+
363
+ # print("\nStarting Gradio application...")
364
+ # interface.launch(inbrowser=True)
365
+
366
+
367
+
368
+
369
+
370
+
371
+
372
  import fitz # PyMuPDF
373
  import numpy as np
374
  import cv2
375
  import torch
376
  import torch.serialization
377
  import os
378
+ import time # Import for timing
379
  from typing import Optional, Tuple, List, Dict, Any
380
  from ultralytics import YOLO
381
  import logging
 
490
 
491
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
492
  """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
493
+ # This function is retained as it's required to convert PDF page to image for YOLO input.
494
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
495
  (pix.h, pix.w, pix.n)
496
  )
 
503
 
504
  def run_yolo_detection_and_count(
505
  image: np.ndarray, model: YOLO, page_num: int
506
+ ) -> Tuple[int, int]: # Removed equation_results list from return
507
  """
508
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
509
+ Returns page counts only.
510
  """
511
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
512
 
 
530
  })
531
  except Exception as e:
532
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
533
+ return 0, 0
534
 
535
  # Apply NMS/Merging/Filtering
536
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
537
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
538
 
 
 
539
  # Update Global Counters
540
  for det in final_detections:
 
 
 
 
 
 
 
541
  if det['class'] == 'figure':
542
  GLOBAL_FIGURE_COUNT += 1
543
  page_figures += 1
544
  elif det['class'] == 'equation':
545
  GLOBAL_EQUATION_COUNT += 1
546
  page_equations += 1
 
 
 
 
547
 
548
  logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
549
+ return page_equations, page_figures
550
 
551
 
552
  # ============================================================================
553
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
554
  # ============================================================================
555
 
556
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
557
  """
558
+ Runs the pipeline, returns counts, report, total time, and an empty list
559
+ (maintaining the expected return signature for Gradio but with None for gallery).
560
  """
561
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
562
+ start_time = time.time()
563
+ log_messages = []
564
 
565
  # Reset globals
566
  GLOBAL_FIGURE_COUNT = 0
567
  GLOBAL_EQUATION_COUNT = 0
568
 
569
+ # 1. Validation and Model Loading
570
+ t0 = time.time()
571
  if not os.path.exists(pdf_path):
572
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
573
+ return 0, 0, 0, report, time.time() - start_time, []
574
 
 
575
  try:
576
  model = YOLO(WEIGHTS_PATH)
577
  logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
578
  except Exception as e:
579
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
580
+ return 0, 0, 0, report, time.time() - start_time, []
581
+ t1 = time.time()
582
+ log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
583
+
584
+ # 2. PDF Loading
585
+ t2 = time.time()
586
  try:
587
  doc = fitz.open(pdf_path)
588
  total_pages = doc.page_count
589
  logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
590
  except Exception as e:
591
  report = f"❌ ERROR loading PDF file: {e}"
592
+ return 0, 0, 0, report, time.time() - start_time, []
593
+ t3 = time.time()
594
+ log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
595
 
596
  mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
 
 
 
 
597
 
598
+ # 3. Page Processing and Detection Loop
599
+ t4 = time.time()
600
  for page_num_0_based in range(doc.page_count):
601
+ page_start_time = time.time()
602
  fitz_page = doc.load_page(page_num_0_based)
603
  page_num = page_num_0_based + 1
604
 
605
+ # Render page to image for YOLO
606
  try:
607
+ pix_start = time.time()
608
  pix = fitz_page.get_pixmap(matrix=mat)
609
  original_img = pixmap_to_numpy(pix)
610
+ pix_time = time.time() - pix_start
611
  except Exception as e:
612
  logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
613
  continue
 
 
 
 
 
614
 
615
+ # Core Detection
616
+ detect_start = time.time()
617
+ run_yolo_detection_and_count(original_img, model, page_num)
618
+ detect_time = time.time() - detect_start
619
+
620
+ page_total_time = time.time() - page_start_time
621
+ log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
622
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
  doc.close()
624
+ t5 = time.time()
625
+ detection_loop_time = t5 - t4
626
+ log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
627
 
628
+ # 4. Final Report Generation
629
+ total_execution_time = t5 - start_time
630
+
631
  report = (
632
  f"βœ… **YOLO Counting Complete!**\n\n"
633
  f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
634
  f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
635
+ f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
636
+ f"---\n"
637
+ f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
638
+ f"### Detailed Step Timing\n"
639
+ f"```\n"
640
+ + "\n".join(log_messages) +
641
+ f"\n```"
642
  )
643
 
644
+ # Return total_execution_time and an empty list for the gallery output
645
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
646
 
647
 
648
  # ============================================================================
649
+ # --- GRADIO INTERFACE FUNCTION (Updated) ---
650
  # ============================================================================
651
 
652
+ # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
653
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
654
  """
655
+ Gradio wrapper function to handle file upload and return results (no image handling).
656
  """
657
  if pdf_file is None:
658
  return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
659
 
660
  pdf_path = pdf_file.name
 
661
 
662
  try:
663
+ # Run the core logic
664
+ # Note the change: temp_output_dir is removed, and total_time is returned
665
+ num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
666
+ pdf_path
667
  )
668
 
669
+ # Return results (the last item is an empty list for the now-empty gallery)
670
+ return str(num_pages), str(num_equations), str(num_figures), report, []
671
 
672
  except Exception as e:
673
  error_msg = f"An unexpected error occurred: {e}"
674
  logging.error(error_msg, exc_info=True)
 
 
675
  return "Error", "Error", "Error", error_msg, []
 
 
 
676
 
677
 
678
  # ============================================================================
679
+ # --- GRADIO INTERFACE DEFINITION (Updated) ---
680
  # ============================================================================
681
 
682
  if __name__ == "__main__":
 
690
  output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
691
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
692
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
693
+ output_report = gr.Markdown(label="Processing Summary and Timing")
694
 
695
+ # Gradio Gallery is retained but will receive an empty list []
696
  output_gallery = gr.Gallery(
697
+ label="Detected Equations (Disabled for Speed)",
698
  columns=5,
699
  height="auto",
700
  object_fit="contain",
701
+ allow_preview=False # Disable preview since it's empty
702
  )
703
 
704
  interface = gr.Interface(
705
  fn=gradio_process_pdf,
706
  inputs=input_file,
707
+ # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
708
  outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
709
+ title="πŸš€ Optimized YOLO Counting with Timing",
710
  description=(
711
+ "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
712
+ "Timing for each step is included in the summary report."
713
  ),
714
  )
715