heerjtdev commited on
Commit
40b8823
·
verified ·
1 Parent(s): 12f4426

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +394 -38
app.py CHANGED
@@ -1,16 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import fitz # PyMuPDF
2
  import numpy as np
3
  import cv2
4
  import torch
5
  import torch.serialization
6
  import os
7
- from typing import Optional, Tuple
8
  from ultralytics import YOLO
9
  import logging
10
  import gradio as gr
11
  import shutil
12
  import tempfile
13
- import json # Still needed for simple JSON logging
 
14
 
15
  # ============================================================================
16
  # --- Global Patches and Setup ---
@@ -23,6 +330,7 @@ def patched_torch_load(*args, **kwargs):
23
  return _original_torch_load(*args, **kwargs)
24
  torch.load = patched_torch_load
25
 
 
26
  logging.basicConfig(level=logging.WARNING)
27
 
28
  # ============================================================================
@@ -30,8 +338,9 @@ logging.basicConfig(level=logging.WARNING)
30
  # ============================================================================
31
 
32
  WEIGHTS_PATH = 'best.pt'
 
33
 
34
- # Detection parameters (Required for your box combination logic)
35
  CONF_THRESHOLD = 0.2
36
  TARGET_CLASSES = ['figure', 'equation']
37
  IOU_MERGE_THRESHOLD = 0.4
@@ -42,9 +351,11 @@ GLOBAL_FIGURE_COUNT = 0
42
  GLOBAL_EQUATION_COUNT = 0
43
 
44
  # ============================================================================
45
- # --- BOX COMBINATION LOGIC (Retained from your original script) ---
46
  # ============================================================================
47
 
 
 
48
  def calculate_iou(box1, box2):
49
  x1_a, y1_a, x2_a, y2_a = box1
50
  x1_b, y1_b, x2_b, y2_b = box2
@@ -112,7 +423,7 @@ def merge_overlapping_boxes(detections, iou_threshold):
112
  return merged_detections
113
 
114
  # ============================================================================
115
- # --- UTILITY FUNCTIONS (Minimally Required) ---
116
  # ============================================================================
117
 
118
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
@@ -129,10 +440,10 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
129
 
130
  def run_yolo_detection_and_count(
131
  image: np.ndarray, model: YOLO, page_num: int
132
- ) -> Tuple[int, int]:
133
  """
134
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
135
- Returns the counts for the current page.
136
  """
137
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
138
 
@@ -142,6 +453,7 @@ def run_yolo_detection_and_count(
142
 
143
  try:
144
  # Run prediction
 
145
  results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
 
147
  if results and results[0].boxes:
@@ -157,61 +469,71 @@ def run_yolo_detection_and_count(
157
  })
158
  except Exception as e:
159
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
- return 0, 0
161
 
162
  # Apply NMS/Merging/Filtering based on your provided logic
163
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
 
 
 
166
  # Update Global Counters
167
  for det in final_detections:
 
 
 
 
 
 
 
168
  if det['class'] == 'figure':
169
  GLOBAL_FIGURE_COUNT += 1
170
  page_figures += 1
171
  elif det['class'] == 'equation':
172
  GLOBAL_EQUATION_COUNT += 1
173
  page_equations += 1
 
 
 
 
174
 
175
  logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
176
- return page_equations, page_figures
177
 
178
 
179
  # ============================================================================
180
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
181
  # ============================================================================
182
 
183
- def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
184
  """
185
- Runs the pipeline and returns just the counts and a report.
186
- No intermediate JSON saving or complex output structure.
187
  """
188
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
189
 
190
- # Reset globals
191
  GLOBAL_FIGURE_COUNT = 0
192
  GLOBAL_EQUATION_COUNT = 0
193
 
194
  if not os.path.exists(pdf_path):
195
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
196
- return 0, 0, 0, report
197
 
198
- # Model Loading (CRITICAL: Requires best.pt)
199
  try:
200
  model = YOLO(WEIGHTS_PATH)
201
- logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
202
  except Exception as e:
203
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
204
- return 0, 0, 0, report
205
 
206
  try:
207
  doc = fitz.open(pdf_path)
208
  total_pages = doc.page_count
209
- logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
210
  except Exception as e:
211
  report = f"❌ ERROR loading PDF file: {e}"
212
- return 0, 0, 0, report
213
 
214
- mat = fitz.Matrix(2.0, 2.0)
 
215
 
216
  for page_num_0_based in range(doc.page_count):
217
  fitz_page = doc.load_page(page_num_0_based)
@@ -224,9 +546,33 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
224
  logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
225
  continue
226
 
227
- # Core Detection and Counting
228
- run_yolo_detection_and_count(original_img, model, page_num)
229
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  doc.close()
231
 
232
  # Final Report Generation
@@ -237,32 +583,34 @@ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
237
  f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
238
  )
239
 
240
- return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
 
241
 
242
 
243
  # ============================================================================
244
- # --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
245
  # ============================================================================
246
 
247
- def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
248
  """
249
- Gradio wrapper function to handle file upload and return all results as strings.
250
  """
251
  if pdf_file is None:
252
- return "N/A", "N/A", "N/A", "Please upload a PDF file."
253
 
254
  pdf_path = pdf_file.name
255
 
256
  try:
257
  # Run the core logic
258
- num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
259
 
260
- # Return results as formatted strings
261
- return str(num_pages), str(num_equations), str(num_figures), report
262
 
263
  except Exception as e:
264
  error_msg = f"An unexpected error occurred: {e}"
265
- return "Error", "Error", "Error", error_msg
 
266
 
267
 
268
  # ============================================================================
@@ -276,20 +624,28 @@ if __name__ == "__main__":
276
 
277
  input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
278
 
279
- # Outputs are now discrete number fields and a final markdown report
280
  output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
281
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
282
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
283
  output_report = gr.Markdown(label="Processing Summary")
 
 
 
 
 
 
 
 
284
 
285
  interface = gr.Interface(
286
  fn=gradio_process_pdf,
287
  inputs=input_file,
288
- outputs=[output_pages, output_equations, output_figures, output_report],
289
- title="🎯 Minimalist YOLO Counting for PDF Elements",
290
  description=(
291
- "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
292
- "and get the total counts for pages, equations, and figures."
293
  ),
294
  )
295
 
 
1
+ # import fitz # PyMuPDF
2
+ # import numpy as np
3
+ # import cv2
4
+ # import torch
5
+ # import torch.serialization
6
+ # import os
7
+ # from typing import Optional, Tuple
8
+ # from ultralytics import YOLO
9
+ # import logging
10
+ # import gradio as gr
11
+ # import shutil
12
+ # import tempfile
13
+ # import json # Still needed for simple JSON logging
14
+
15
+ # # ============================================================================
16
+ # # --- Global Patches and Setup ---
17
+ # # ============================================================================
18
+
19
+ # # Patch torch.load to prevent weights_only error with older models
20
+ # _original_torch_load = torch.load
21
+ # def patched_torch_load(*args, **kwargs):
22
+ # kwargs["weights_only"] = False
23
+ # return _original_torch_load(*args, **kwargs)
24
+ # torch.load = patched_torch_load
25
+
26
+ # logging.basicConfig(level=logging.WARNING)
27
+
28
+ # # ============================================================================
29
+ # # --- CONFIGURATION AND CONSTANTS ---
30
+ # # ============================================================================
31
+
32
+ # WEIGHTS_PATH = 'best.pt'
33
+
34
+ # # Detection parameters (Required for your box combination logic)
35
+ # CONF_THRESHOLD = 0.2
36
+ # TARGET_CLASSES = ['figure', 'equation']
37
+ # IOU_MERGE_THRESHOLD = 0.4
38
+ # IOA_SUPPRESSION_THRESHOLD = 0.7
39
+
40
+ # # Global counters (Reset per run)
41
+ # GLOBAL_FIGURE_COUNT = 0
42
+ # GLOBAL_EQUATION_COUNT = 0
43
+
44
+ # # ============================================================================
45
+ # # --- BOX COMBINATION LOGIC (Retained from your original script) ---
46
+ # # ============================================================================
47
+
48
+ # def calculate_iou(box1, box2):
49
+ # x1_a, y1_a, x2_a, y2_a = box1
50
+ # x1_b, y1_b, x2_b, y2_b = box2
51
+ # x_left = max(x1_a, x1_b)
52
+ # y_top = max(y1_a, y1_b)
53
+ # x_right = min(x2_a, x2_b)
54
+ # y_bottom = min(y2_a, y2_b)
55
+ # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
56
+ # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
57
+ # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
58
+ # union_area = float(box_a_area + box_b_area - intersection_area)
59
+ # return intersection_area / union_area if union_area > 0 else 0
60
+
61
+
62
+ # def filter_nested_boxes(detections, ioa_threshold=0.80):
63
+ # if not detections: return []
64
+ # for d in detections:
65
+ # x1, y1, x2, y2 = d['coords']
66
+ # d['area'] = (x2 - x1) * (y2 - y1)
67
+ # detections.sort(key=lambda x: x['area'], reverse=True)
68
+ # keep_indices = []
69
+ # is_suppressed = [False] * len(detections)
70
+ # for i in range(len(detections)):
71
+ # if is_suppressed[i]: continue
72
+ # keep_indices.append(i)
73
+ # box_a = detections[i]['coords']
74
+ # for j in range(i + 1, len(detections)):
75
+ # if is_suppressed[j]: continue
76
+ # box_b = detections[j]['coords']
77
+ # x_left = max(box_a[0], box_b[0])
78
+ # y_top = max(box_a[1], box_b[1])
79
+ # x_right = min(box_a[2], box_b[2])
80
+ # y_bottom = min(box_a[3], box_b[3])
81
+ # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
82
+ # area_b = detections[j]['area']
83
+ # if area_b > 0 and intersection / area_b > ioa_threshold:
84
+ # is_suppressed[j] = True
85
+ # return [detections[i] for i in keep_indices]
86
+
87
+
88
+ # def merge_overlapping_boxes(detections, iou_threshold):
89
+ # if not detections: return []
90
+ # detections.sort(key=lambda d: d['conf'], reverse=True)
91
+ # merged_detections = []
92
+ # is_merged = [False] * len(detections)
93
+ # for i in range(len(detections)):
94
+ # if is_merged[i]: continue
95
+ # current_box = detections[i]['coords']
96
+ # current_class = detections[i]['class']
97
+ # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
98
+ # for j in range(i + 1, len(detections)):
99
+ # if is_merged[j] or detections[j]['class'] != current_class: continue
100
+ # other_box = detections[j]['coords']
101
+ # iou = calculate_iou(current_box, other_box)
102
+ # if iou > iou_threshold:
103
+ # merged_x1 = min(merged_x1, other_box[0])
104
+ # merged_y1 = min(merged_y1, other_box[1])
105
+ # merged_x2 = max(merged_x2, other_box[2])
106
+ # merged_y2 = max(merged_y2, other_box[3])
107
+ # is_merged[j] = True
108
+ # merged_detections.append({
109
+ # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
110
+ # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
111
+ # })
112
+ # return merged_detections
113
+
114
+ # # ============================================================================
115
+ # # --- UTILITY FUNCTIONS (Minimally Required) ---
116
+ # # ============================================================================
117
+
118
+ # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
119
+ # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
120
+ # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
121
+ # (pix.h, pix.w, pix.n)
122
+ # )
123
+ # if pix.n == 4:
124
+ # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
125
+ # elif pix.n == 1:
126
+ # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
127
+ # return img
128
+
129
+
130
+ # def run_yolo_detection_and_count(
131
+ # image: np.ndarray, model: YOLO, page_num: int
132
+ # ) -> Tuple[int, int]:
133
+ # """
134
+ # Runs YOLO inference, applies NMS/filtering, and updates global counters.
135
+ # Returns the counts for the current page.
136
+ # """
137
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
138
+
139
+ # yolo_detections = []
140
+ # page_equations = 0
141
+ # page_figures = 0
142
+
143
+ # try:
144
+ # # Run prediction
145
+ # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
+
147
+ # if results and results[0].boxes:
148
+ # for box in results[0].boxes.data.tolist():
149
+ # x1, y1, x2, y2, conf, cls_id = box
150
+ # cls_name = model.names[int(cls_id)]
151
+
152
+ # if cls_name in TARGET_CLASSES:
153
+ # yolo_detections.append({
154
+ # 'coords': (x1, y1, x2, y2),
155
+ # 'class': cls_name,
156
+ # 'conf': conf
157
+ # })
158
+ # except Exception as e:
159
+ # logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
+ # return 0, 0
161
+
162
+ # # Apply NMS/Merging/Filtering based on your provided logic
163
+ # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
+ # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
+
166
+ # # Update Global Counters
167
+ # for det in final_detections:
168
+ # if det['class'] == 'figure':
169
+ # GLOBAL_FIGURE_COUNT += 1
170
+ # page_figures += 1
171
+ # elif det['class'] == 'equation':
172
+ # GLOBAL_EQUATION_COUNT += 1
173
+ # page_equations += 1
174
+
175
+ # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
176
+ # return page_equations, page_figures
177
+
178
+
179
+ # # ============================================================================
180
+ # # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
181
+ # # ============================================================================
182
+
183
+ # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
184
+ # """
185
+ # Runs the pipeline and returns just the counts and a report.
186
+ # No intermediate JSON saving or complex output structure.
187
+ # """
188
+ # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
189
+
190
+ # # Reset globals
191
+ # GLOBAL_FIGURE_COUNT = 0
192
+ # GLOBAL_EQUATION_COUNT = 0
193
+
194
+ # if not os.path.exists(pdf_path):
195
+ # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
196
+ # return 0, 0, 0, report
197
+
198
+ # # Model Loading (CRITICAL: Requires best.pt)
199
+ # try:
200
+ # model = YOLO(WEIGHTS_PATH)
201
+ # logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
202
+ # except Exception as e:
203
+ # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
204
+ # return 0, 0, 0, report
205
+
206
+ # try:
207
+ # doc = fitz.open(pdf_path)
208
+ # total_pages = doc.page_count
209
+ # logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
210
+ # except Exception as e:
211
+ # report = f"❌ ERROR loading PDF file: {e}"
212
+ # return 0, 0, 0, report
213
+
214
+ # mat = fitz.Matrix(2.0, 2.0)
215
+
216
+ # for page_num_0_based in range(doc.page_count):
217
+ # fitz_page = doc.load_page(page_num_0_based)
218
+ # page_num = page_num_0_based + 1
219
+
220
+ # try:
221
+ # pix = fitz_page.get_pixmap(matrix=mat)
222
+ # original_img = pixmap_to_numpy(pix)
223
+ # except Exception as e:
224
+ # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
225
+ # continue
226
+
227
+ # # Core Detection and Counting
228
+ # run_yolo_detection_and_count(original_img, model, page_num)
229
+
230
+ # doc.close()
231
+
232
+ # # Final Report Generation
233
+ # report = (
234
+ # f"✅ **YOLO Counting Complete!**\n\n"
235
+ # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
236
+ # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
237
+ # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
238
+ # )
239
+
240
+ # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
241
+
242
+
243
+ # # ============================================================================
244
+ # # --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
245
+ # # ============================================================================
246
+
247
+ # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
248
+ # """
249
+ # Gradio wrapper function to handle file upload and return all results as strings.
250
+ # """
251
+ # if pdf_file is None:
252
+ # return "N/A", "N/A", "N/A", "Please upload a PDF file."
253
+
254
+ # pdf_path = pdf_file.name
255
+
256
+ # try:
257
+ # # Run the core logic
258
+ # num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
259
+
260
+ # # Return results as formatted strings
261
+ # return str(num_pages), str(num_equations), str(num_figures), report
262
+
263
+ # except Exception as e:
264
+ # error_msg = f"An unexpected error occurred: {e}"
265
+ # return "Error", "Error", "Error", error_msg
266
+
267
+
268
+ # # ============================================================================
269
+ # # --- GRADIO INTERFACE DEFINITION ---
270
+ # # ============================================================================
271
+
272
+ # if __name__ == "__main__":
273
+
274
+ # if not os.path.exists(WEIGHTS_PATH):
275
+ # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
276
+
277
+ # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
278
+
279
+ # # Outputs are now discrete number fields and a final markdown report
280
+ # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
281
+ # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
282
+ # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
283
+ # output_report = gr.Markdown(label="Processing Summary")
284
+
285
+ # interface = gr.Interface(
286
+ # fn=gradio_process_pdf,
287
+ # inputs=input_file,
288
+ # outputs=[output_pages, output_equations, output_figures, output_report],
289
+ # title="🎯 Minimalist YOLO Counting for PDF Elements",
290
+ # description=(
291
+ # "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
292
+ # "and get the total counts for pages, equations, and figures."
293
+ # ),
294
+ # )
295
+
296
+ # print("\nStarting Gradio application...")
297
+ # interface.launch(inbrowser=True)
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
  import fitz # PyMuPDF
308
  import numpy as np
309
  import cv2
310
  import torch
311
  import torch.serialization
312
  import os
313
+ from typing import Optional, Tuple, List, Dict, Any
314
  from ultralytics import YOLO
315
  import logging
316
  import gradio as gr
317
  import shutil
318
  import tempfile
319
+ from PIL import Image
320
+ import io
321
 
322
  # ============================================================================
323
  # --- Global Patches and Setup ---
 
330
  return _original_torch_load(*args, **kwargs)
331
  torch.load = patched_torch_load
332
 
333
+ # Suppress warnings during normal operation
334
  logging.basicConfig(level=logging.WARNING)
335
 
336
  # ============================================================================
 
338
  # ============================================================================
339
 
340
  WEIGHTS_PATH = 'best.pt'
341
+ SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
342
 
343
+ # Detection parameters
344
  CONF_THRESHOLD = 0.2
345
  TARGET_CLASSES = ['figure', 'equation']
346
  IOU_MERGE_THRESHOLD = 0.4
 
351
  GLOBAL_EQUATION_COUNT = 0
352
 
353
  # ============================================================================
354
+ # --- BOX COMBINATION LOGIC (Retained) ---
355
  # ============================================================================
356
 
357
+ # (calculate_iou, filter_nested_boxes, merge_overlapping_boxes functions remain unchanged)
358
+
359
  def calculate_iou(box1, box2):
360
  x1_a, y1_a, x2_a, y2_a = box1
361
  x1_b, y1_b, x2_b, y2_b = box2
 
423
  return merged_detections
424
 
425
  # ============================================================================
426
+ # --- UTILITY FUNCTIONS (Modified to capture coordinates) ---
427
  # ============================================================================
428
 
429
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 
440
 
441
  def run_yolo_detection_and_count(
442
  image: np.ndarray, model: YOLO, page_num: int
443
+ ) -> Tuple[int, int, List[Dict[str, Any]]]:
444
  """
445
  Runs YOLO inference, applies NMS/filtering, and updates global counters.
446
+ Returns counts AND a list of equation detection results.
447
  """
448
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
449
 
 
453
 
454
  try:
455
  # Run prediction
456
+ # Setting device to 'cpu' is a safety measure if CUDA isn't available
457
  results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
458
 
459
  if results and results[0].boxes:
 
469
  })
470
  except Exception as e:
471
  logging.error(f"YOLO inference failed on page {page_num}: {e}")
472
+ return 0, 0, []
473
 
474
  # Apply NMS/Merging/Filtering based on your provided logic
475
  merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
476
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
477
 
478
+ equation_results = []
479
+
480
  # Update Global Counters
481
  for det in final_detections:
482
+ # Scale coordinates back to the original PDF space (points)
483
+ x1_pix, y1_pix, x2_pix, y2_pix = det['coords']
484
+ x1_pdf = x1_pix / SCALE_FACTOR
485
+ y1_pdf = y1_pix / SCALE_FACTOR
486
+ x2_pdf = x2_pix / SCALE_FACTOR
487
+ y2_pdf = y2_pix / SCALE_FACTOR
488
+
489
  if det['class'] == 'figure':
490
  GLOBAL_FIGURE_COUNT += 1
491
  page_figures += 1
492
  elif det['class'] == 'equation':
493
  GLOBAL_EQUATION_COUNT += 1
494
  page_equations += 1
495
+ equation_results.append({
496
+ 'page': page_num,
497
+ 'bbox_pdf': (x1_pdf, y1_pdf, x2_pdf, y2_pdf) # Coordinates in PDF space
498
+ })
499
 
500
  logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
501
+ return page_equations, page_figures, equation_results
502
 
503
 
504
  # ============================================================================
505
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified to handle image cropping) ---
506
  # ============================================================================
507
 
508
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, List[str]]:
509
  """
510
+ Runs the pipeline, returns counts, report, and a list of paths/bytes for
511
+ the cropped equation images.
512
  """
513
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
514
 
 
515
  GLOBAL_FIGURE_COUNT = 0
516
  GLOBAL_EQUATION_COUNT = 0
517
 
518
  if not os.path.exists(pdf_path):
519
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
520
+ return 0, 0, 0, report, []
521
 
 
522
  try:
523
  model = YOLO(WEIGHTS_PATH)
 
524
  except Exception as e:
525
  report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
526
+ return 0, 0, 0, report, []
527
 
528
  try:
529
  doc = fitz.open(pdf_path)
530
  total_pages = doc.page_count
 
531
  except Exception as e:
532
  report = f"❌ ERROR loading PDF file: {e}"
533
+ return 0, 0, 0, report, []
534
 
535
+ mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
536
+ all_equation_images = [] # List to store cropped image data (base64 or bytes)
537
 
538
  for page_num_0_based in range(doc.page_count):
539
  fitz_page = doc.load_page(page_num_0_based)
 
546
  logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
547
  continue
548
 
549
+ # Core Detection, Counting, and Equation Result Collection
550
+ _, _, equation_results_page = run_yolo_detection_and_count(
551
+ original_img, model, page_num
552
+ )
553
+
554
+ # --- Image Cropping and Saving for Debugging ---
555
+ for eq in equation_results_page:
556
+ # bbox_pdf is in PyMuPDF's Rect format (x0, y0, x1, y1)
557
+ bbox = eq['bbox_pdf']
558
+
559
+ try:
560
+ # Crop the equation using the bounding box on the fitz page
561
+ # We use a slight border (e.g., 5 points) for better visualization
562
+ rect = fitz.Rect(bbox).prerotate(fitz_page.rotation)
563
+ clip_rect = rect + (0, 0, 5, 5) # Add small padding
564
+
565
+ # Get the pixmap for the cropped area
566
+ eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
567
+
568
+ # Convert the pixmap to a format Gradio can display (PNG bytes)
569
+ img_bytes = eq_pix.tobytes("png")
570
+
571
+ all_equation_images.append(img_bytes)
572
+
573
+ except Exception as e:
574
+ logging.error(f"Error cropping equation on page {page_num}: {e}")
575
+
576
  doc.close()
577
 
578
  # Final Report Generation
 
583
  f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
584
  )
585
 
586
+ # Note the return type change to include the list of image bytes
587
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
588
 
589
 
590
  # ============================================================================
591
+ # --- GRADIO INTERFACE FUNCTION (Modified for image output) ---
592
  # ============================================================================
593
 
594
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
595
  """
596
+ Gradio wrapper function to handle file upload and return all results + images.
597
  """
598
  if pdf_file is None:
599
+ return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
600
 
601
  pdf_path = pdf_file.name
602
 
603
  try:
604
  # Run the core logic
605
+ num_pages, num_equations, num_figures, report, equation_images = run_single_pdf_preprocessing(pdf_path)
606
 
607
+ # Return results as formatted strings and the list of image bytes
608
+ return str(num_pages), str(num_equations), str(num_figures), report, equation_images
609
 
610
  except Exception as e:
611
  error_msg = f"An unexpected error occurred: {e}"
612
+ logging.error(error_msg, exc_info=True)
613
+ return "Error", "Error", "Error", error_msg, []
614
 
615
 
616
  # ============================================================================
 
624
 
625
  input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
626
 
627
+ # Outputs
628
  output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
629
  output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
630
  output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
631
  output_report = gr.Markdown(label="Processing Summary")
632
+ # NEW: Gradio Gallery to display the list of cropped images
633
+ output_gallery = gr.Gallery(
634
+ label="Detected Equations for Debugging",
635
+ columns=5,
636
+ height="auto",
637
+ object_fit="contain",
638
+ allow_preview=True
639
+ )
640
 
641
  interface = gr.Interface(
642
  fn=gradio_process_pdf,
643
  inputs=input_file,
644
+ outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
645
+ title="🎯 Minimalist YOLO Counting & Equation Debugger",
646
  description=(
647
+ "Upload a PDF to run YOLO detection. The counts are displayed, and a gallery "
648
+ "of **all detected equation images** is shown below for debugging the detection accuracy."
649
  ),
650
  )
651