heerjtdev commited on
Commit
bfff4aa
Β·
verified Β·
1 Parent(s): 7fbfa32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +322 -322
app.py CHANGED
@@ -2,352 +2,352 @@
2
 
3
 
4
 
5
- # import fitz # PyMuPDF
6
- # import numpy as np
7
- # import cv2
8
- # import torch
9
- # import torch.serialization
10
- # import os
11
- # import time # Import for timing
12
- # from typing import Optional, Tuple, List, Dict, Any
13
- # from ultralytics import YOLO
14
- # import logging
15
- # import gradio as gr
16
- # import shutil
17
- # import tempfile
18
- # import io
19
-
20
- # # ============================================================================
21
- # # --- Global Patches and Setup ---
22
- # # ============================================================================
23
-
24
- # # Patch torch.load to prevent weights_only error with older models
25
- # _original_torch_load = torch.load
26
- # def patched_torch_load(*args, **kwargs):
27
- # kwargs["weights_only"] = False
28
- # return _original_torch_load(*args, **kwargs)
29
- # torch.load = patched_torch_load
30
-
31
- # logging.basicConfig(level=logging.WARNING)
32
-
33
- # # ============================================================================
34
- # # --- CONFIGURATION AND CONSTANTS ---
35
- # # ============================================================================
36
-
37
- # WEIGHTS_PATH = 'best.pt'
38
- # SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
-
40
- # # Detection parameters
41
- # CONF_THRESHOLD = 0.2
42
- # TARGET_CLASSES = ['figure', 'equation']
43
- # IOU_MERGE_THRESHOLD = 0.4
44
- # IOA_SUPPRESSION_THRESHOLD = 0.7
45
-
46
- # # Global counters (Reset per run)
47
- # GLOBAL_FIGURE_COUNT = 0
48
- # GLOBAL_EQUATION_COUNT = 0
49
-
50
- # # ============================================================================
51
- # # --- BOX COMBINATION LOGIC ---
52
- # # ============================================================================
53
-
54
- # def calculate_iou(box1, box2):
55
- # x1_a, y1_a, x2_a, y2_a = box1
56
- # x1_b, y1_b, x2_b, y2_b = box2
57
- # x_left = max(x1_a, x1_b)
58
- # y_top = max(y1_a, y1_b)
59
- # x_right = min(x2_a, x2_b)
60
- # y_bottom = min(y2_a, y2_b)
61
- # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
62
- # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
63
- # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
64
- # union_area = float(box_a_area + box_b_area - intersection_area)
65
- # return intersection_area / union_area if union_area > 0 else 0
66
-
67
-
68
- # def filter_nested_boxes(detections, ioa_threshold=0.80):
69
- # if not detections: return []
70
- # for d in detections:
71
- # x1, y1, x2, y2 = d['coords']
72
- # d['area'] = (x2 - x1) * (y2 - y1)
73
- # detections.sort(key=lambda x: x['area'], reverse=True)
74
- # keep_indices = []
75
- # is_suppressed = [False] * len(detections)
76
- # for i in range(len(detections)):
77
- # if is_suppressed[i]: continue
78
- # keep_indices.append(i)
79
- # box_a = detections[i]['coords']
80
- # for j in range(i + 1, len(detections)):
81
- # if is_suppressed[j]: continue
82
- # box_b = detections[j]['coords']
83
- # x_left = max(box_a[0], box_b[0])
84
- # y_top = max(box_a[1], box_b[1])
85
- # x_right = min(box_a[2], box_b[2])
86
- # y_bottom = min(box_a[3], box_b[3])
87
- # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
88
- # area_b = detections[j]['area']
89
- # if area_b > 0 and intersection / area_b > ioa_threshold:
90
- # is_suppressed[j] = True
91
- # return [detections[i] for i in keep_indices]
92
-
93
-
94
- # def merge_overlapping_boxes(detections, iou_threshold):
95
- # if not detections: return []
96
- # detections.sort(key=lambda d: d['conf'], reverse=True)
97
- # merged_detections = []
98
- # is_merged = [False] * len(detections)
99
- # for i in range(len(detections)):
100
- # if is_merged[i]: continue
101
- # current_box = detections[i]['coords']
102
- # current_class = detections[i]['class']
103
- # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
104
- # for j in range(i + 1, len(detections)):
105
- # if is_merged[j] or detections[j]['class'] != current_class: continue
106
- # other_box = detections[j]['coords']
107
- # iou = calculate_iou(current_box, other_box)
108
- # if iou > iou_threshold:
109
- # merged_x1 = min(merged_x1, other_box[0])
110
- # merged_y1 = min(merged_y1, other_box[1])
111
- # merged_x2 = max(merged_x2, other_box[2])
112
- # merged_y2 = max(merged_y2, other_box[3])
113
- # is_merged[j] = True
114
- # merged_detections.append({
115
- # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
116
- # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
117
- # })
118
- # return merged_detections
119
-
120
- # # ============================================================================
121
- # # --- UTILITY FUNCTIONS ---
122
- # # ============================================================================
123
-
124
- # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
- # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
- # # This function is retained as it's required to convert PDF page to image for YOLO input.
127
- # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
- # (pix.h, pix.w, pix.n)
129
- # )
130
- # if pix.n == 4:
131
- # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
132
- # elif pix.n == 1:
133
- # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
134
- # return img
135
-
136
-
137
- # def run_yolo_detection_and_count(
138
- # image: np.ndarray, model: YOLO, page_num: int
139
- # ) -> Tuple[int, int]: # Removed equation_results list from return
140
- # """
141
- # Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
- # Returns page counts only.
143
- # """
144
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
145
 
146
- # yolo_detections = []
147
- # page_equations = 0
148
- # page_figures = 0
149
 
150
- # try:
151
- # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
152
 
153
- # if results and results[0].boxes:
154
- # for box in results[0].boxes.data.tolist():
155
- # x1, y1, x2, y2, conf, cls_id = box
156
- # cls_name = model.names[int(cls_id)]
157
 
158
- # if cls_name in TARGET_CLASSES:
159
- # yolo_detections.append({
160
- # 'coords': (x1, y1, x2, y2),
161
- # 'class': cls_name,
162
- # 'conf': conf
163
- # })
164
- # except Exception as e:
165
- # logging.error(f"YOLO inference failed on page {page_num}: {e}")
166
- # return 0, 0
167
-
168
- # # Apply NMS/Merging/Filtering
169
- # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
170
- # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
171
-
172
- # # Update Global Counters
173
- # for det in final_detections:
174
- # if det['class'] == 'figure':
175
- # GLOBAL_FIGURE_COUNT += 1
176
- # page_figures += 1
177
- # elif det['class'] == 'equation':
178
- # GLOBAL_EQUATION_COUNT += 1
179
- # page_equations += 1
180
 
181
- # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
182
- # return page_equations, page_figures
183
-
184
-
185
- # # ============================================================================
186
- # # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
- # # ============================================================================
188
-
189
- # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
190
- # """
191
- # Runs the pipeline, returns counts, report, total time, and an empty list
192
- # (maintaining the expected return signature for Gradio but with None for gallery).
193
- # """
194
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
- # start_time = time.time()
196
- # log_messages = []
197
-
198
- # # Reset globals
199
- # GLOBAL_FIGURE_COUNT = 0
200
- # GLOBAL_EQUATION_COUNT = 0
201
-
202
- # # 1. Validation and Model Loading
203
- # t0 = time.time()
204
- # if not os.path.exists(pdf_path):
205
- # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
- # return 0, 0, 0, report, time.time() - start_time, []
207
 
208
- # try:
209
- # model = YOLO(WEIGHTS_PATH)
210
- # logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
211
- # except Exception as e:
212
- # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
- # return 0, 0, 0, report, time.time() - start_time, []
214
- # t1 = time.time()
215
- # log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
 
217
- # # 2. PDF Loading
218
- # t2 = time.time()
219
- # try:
220
- # doc = fitz.open(pdf_path)
221
- # total_pages = doc.page_count
222
- # logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
- # except Exception as e:
224
- # report = f"❌ ERROR loading PDF file: {e}"
225
- # return 0, 0, 0, report, time.time() - start_time, []
226
- # t3 = time.time()
227
- # log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
-
229
- # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
230
 
231
- # # 3. Page Processing and Detection Loop
232
- # t4 = time.time()
233
- # for page_num_0_based in range(doc.page_count):
234
- # page_start_time = time.time()
235
- # fitz_page = doc.load_page(page_num_0_based)
236
- # page_num = page_num_0_based + 1
237
-
238
- # # Render page to image for YOLO
239
- # try:
240
- # pix_start = time.time()
241
- # pix = fitz_page.get_pixmap(matrix=mat)
242
- # original_img = pixmap_to_numpy(pix)
243
- # pix_time = time.time() - pix_start
244
- # except Exception as e:
245
- # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
246
- # continue
247
 
248
- # # Core Detection
249
- # detect_start = time.time()
250
- # run_yolo_detection_and_count(original_img, model, page_num)
251
- # detect_time = time.time() - detect_start
252
 
253
- # page_total_time = time.time() - page_start_time
254
- # log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
 
256
- # doc.close()
257
- # t5 = time.time()
258
- # detection_loop_time = t5 - t4
259
- # log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
260
 
261
- # # 4. Final Report Generation
262
- # total_execution_time = t5 - start_time
263
 
264
- # report = (
265
- # f"βœ… **YOLO Counting Complete!**\n\n"
266
- # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
267
- # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
268
- # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
269
- # f"---\n"
270
- # f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
271
- # f"### Detailed Step Timing\n"
272
- # f"```\n"
273
- # + "\n".join(log_messages) +
274
- # f"\n```"
275
- # )
276
-
277
- # # Return total_execution_time and an empty list for the gallery output
278
- # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
-
280
-
281
- # # ============================================================================
282
- # # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
- # # ============================================================================
284
-
285
- # # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
- # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
- # """
288
- # Gradio wrapper function to handle file upload and return results (no image handling).
289
- # """
290
- # if pdf_file is None:
291
- # return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
292
 
293
- # pdf_path = pdf_file.name
294
-
295
- # try:
296
- # # Run the core logic
297
- # # Note the change: temp_output_dir is removed, and total_time is returned
298
- # num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
- # pdf_path
300
- # )
301
 
302
- # # Return results (the last item is an empty list for the now-empty gallery)
303
- # return str(num_pages), str(num_equations), str(num_figures), report, []
304
 
305
- # except Exception as e:
306
- # error_msg = f"An unexpected error occurred: {e}"
307
- # logging.error(error_msg, exc_info=True)
308
- # return "Error", "Error", "Error", error_msg, []
309
 
310
 
311
- # # ============================================================================
312
- # # --- GRADIO INTERFACE DEFINITION (Updated) ---
313
- # # ============================================================================
314
 
315
- # if __name__ == "__main__":
316
 
317
- # if not os.path.exists(WEIGHTS_PATH):
318
- # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
319
 
320
- # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
321
 
322
- # # Outputs
323
- # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
324
- # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
325
- # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
- # output_report = gr.Markdown(label="Processing Summary and Timing")
327
 
328
- # # Gradio Gallery is retained but will receive an empty list []
329
- # output_gallery = gr.Gallery(
330
- # label="Detected Equations (Disabled for Speed)",
331
- # columns=5,
332
- # height="auto",
333
- # object_fit="contain",
334
- # allow_preview=False # Disable preview since it's empty
335
- # )
336
 
337
- # interface = gr.Interface(
338
- # fn=gradio_process_pdf,
339
- # inputs=input_file,
340
- # # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
- # outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
- # title="πŸš€ Optimized YOLO Counting with Timing",
343
- # description=(
344
- # "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
- # "Timing for each step is included in the summary report."
346
- # ),
347
- # )
348
-
349
- # print("\nStarting Gradio application...")
350
- # interface.launch(inbrowser=True)
351
 
352
 
353
 
 
2
 
3
 
4
 
5
+ import fitz # PyMuPDF
6
+ import numpy as np
7
+ import cv2
8
+ import torch
9
+ import torch.serialization
10
+ import os
11
+ import time # Import for timing
12
+ from typing import Optional, Tuple, List, Dict, Any
13
+ from ultralytics import YOLO
14
+ import logging
15
+ import gradio as gr
16
+ import shutil
17
+ import tempfile
18
+ import io
19
+
20
+ # ============================================================================
21
+ # --- Global Patches and Setup ---
22
+ # ============================================================================
23
+
24
+ # Patch torch.load to prevent weights_only error with older models
25
+ _original_torch_load = torch.load
26
+ def patched_torch_load(*args, **kwargs):
27
+ kwargs["weights_only"] = False
28
+ return _original_torch_load(*args, **kwargs)
29
+ torch.load = patched_torch_load
30
+
31
+ logging.basicConfig(level=logging.WARNING)
32
+
33
+ # ============================================================================
34
+ # --- CONFIGURATION AND CONSTANTS ---
35
+ # ============================================================================
36
+
37
+ WEIGHTS_PATH = 'best.pt'
38
+ SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
39
+
40
+ # Detection parameters
41
+ CONF_THRESHOLD = 0.2
42
+ TARGET_CLASSES = ['figure', 'equation']
43
+ IOU_MERGE_THRESHOLD = 0.4
44
+ IOA_SUPPRESSION_THRESHOLD = 0.7
45
+
46
+ # Global counters (Reset per run)
47
+ GLOBAL_FIGURE_COUNT = 0
48
+ GLOBAL_EQUATION_COUNT = 0
49
+
50
+ # ============================================================================
51
+ # --- BOX COMBINATION LOGIC ---
52
+ # ============================================================================
53
+
54
+ def calculate_iou(box1, box2):
55
+ x1_a, y1_a, x2_a, y2_a = box1
56
+ x1_b, y1_b, x2_b, y2_b = box2
57
+ x_left = max(x1_a, x1_b)
58
+ y_top = max(y1_a, y1_b)
59
+ x_right = min(x2_a, x2_b)
60
+ y_bottom = min(y2_a, y2_b)
61
+ intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
62
+ box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
63
+ box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
64
+ union_area = float(box_a_area + box_b_area - intersection_area)
65
+ return intersection_area / union_area if union_area > 0 else 0
66
+
67
+
68
+ def filter_nested_boxes(detections, ioa_threshold=0.80):
69
+ if not detections: return []
70
+ for d in detections:
71
+ x1, y1, x2, y2 = d['coords']
72
+ d['area'] = (x2 - x1) * (y2 - y1)
73
+ detections.sort(key=lambda x: x['area'], reverse=True)
74
+ keep_indices = []
75
+ is_suppressed = [False] * len(detections)
76
+ for i in range(len(detections)):
77
+ if is_suppressed[i]: continue
78
+ keep_indices.append(i)
79
+ box_a = detections[i]['coords']
80
+ for j in range(i + 1, len(detections)):
81
+ if is_suppressed[j]: continue
82
+ box_b = detections[j]['coords']
83
+ x_left = max(box_a[0], box_b[0])
84
+ y_top = max(box_a[1], box_b[1])
85
+ x_right = min(box_a[2], box_b[2])
86
+ y_bottom = min(box_a[3], box_b[3])
87
+ intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
88
+ area_b = detections[j]['area']
89
+ if area_b > 0 and intersection / area_b > ioa_threshold:
90
+ is_suppressed[j] = True
91
+ return [detections[i] for i in keep_indices]
92
+
93
+
94
+ def merge_overlapping_boxes(detections, iou_threshold):
95
+ if not detections: return []
96
+ detections.sort(key=lambda d: d['conf'], reverse=True)
97
+ merged_detections = []
98
+ is_merged = [False] * len(detections)
99
+ for i in range(len(detections)):
100
+ if is_merged[i]: continue
101
+ current_box = detections[i]['coords']
102
+ current_class = detections[i]['class']
103
+ merged_x1, merged_y1, merged_x2, merged_y2 = current_box
104
+ for j in range(i + 1, len(detections)):
105
+ if is_merged[j] or detections[j]['class'] != current_class: continue
106
+ other_box = detections[j]['coords']
107
+ iou = calculate_iou(current_box, other_box)
108
+ if iou > iou_threshold:
109
+ merged_x1 = min(merged_x1, other_box[0])
110
+ merged_y1 = min(merged_y1, other_box[1])
111
+ merged_x2 = max(merged_x2, other_box[2])
112
+ merged_y2 = max(merged_y2, other_box[3])
113
+ is_merged[j] = True
114
+ merged_detections.append({
115
+ 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
116
+ 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
117
+ })
118
+ return merged_detections
119
+
120
+ # ============================================================================
121
+ # --- UTILITY FUNCTIONS ---
122
+ # ============================================================================
123
+
124
+ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
125
+ """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
126
+ # This function is retained as it's required to convert PDF page to image for YOLO input.
127
+ img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
128
+ (pix.h, pix.w, pix.n)
129
+ )
130
+ if pix.n == 4:
131
+ img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
132
+ elif pix.n == 1:
133
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
134
+ return img
135
+
136
+
137
+ def run_yolo_detection_and_count(
138
+ image: np.ndarray, model: YOLO, page_num: int
139
+ ) -> Tuple[int, int]: # Removed equation_results list from return
140
+ """
141
+ Runs YOLO inference, applies NMS/filtering, and updates global counters.
142
+ Returns page counts only.
143
+ """
144
+ global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
145
 
146
+ yolo_detections = []
147
+ page_equations = 0
148
+ page_figures = 0
149
 
150
+ try:
151
+ results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
152
 
153
+ if results and results[0].boxes:
154
+ for box in results[0].boxes.data.tolist():
155
+ x1, y1, x2, y2, conf, cls_id = box
156
+ cls_name = model.names[int(cls_id)]
157
 
158
+ if cls_name in TARGET_CLASSES:
159
+ yolo_detections.append({
160
+ 'coords': (x1, y1, x2, y2),
161
+ 'class': cls_name,
162
+ 'conf': conf
163
+ })
164
+ except Exception as e:
165
+ logging.error(f"YOLO inference failed on page {page_num}: {e}")
166
+ return 0, 0
167
+
168
+ # Apply NMS/Merging/Filtering
169
+ merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
170
+ final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
171
+
172
+ # Update Global Counters
173
+ for det in final_detections:
174
+ if det['class'] == 'figure':
175
+ GLOBAL_FIGURE_COUNT += 1
176
+ page_figures += 1
177
+ elif det['class'] == 'equation':
178
+ GLOBAL_EQUATION_COUNT += 1
179
+ page_equations += 1
180
 
181
+ logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
182
+ return page_equations, page_figures
183
+
184
+
185
+ # ============================================================================
186
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Optimized) ---
187
+ # ============================================================================
188
+
189
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str, float, List[str]]:
190
+ """
191
+ Runs the pipeline, returns counts, report, total time, and an empty list
192
+ (maintaining the expected return signature for Gradio but with None for gallery).
193
+ """
194
+ global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
195
+ start_time = time.time()
196
+ log_messages = []
197
+
198
+ # Reset globals
199
+ GLOBAL_FIGURE_COUNT = 0
200
+ GLOBAL_EQUATION_COUNT = 0
201
+
202
+ # 1. Validation and Model Loading
203
+ t0 = time.time()
204
+ if not os.path.exists(pdf_path):
205
+ report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
206
+ return 0, 0, 0, report, time.time() - start_time, []
207
 
208
+ try:
209
+ model = YOLO(WEIGHTS_PATH)
210
+ logging.warning(f"βœ… Loaded YOLO model from: {WEIGHTS_PATH}")
211
+ except Exception as e:
212
+ report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
213
+ return 0, 0, 0, report, time.time() - start_time, []
214
+ t1 = time.time()
215
+ log_messages.append(f"Model Loading Time: {t1-t0:.4f}s")
216
 
217
+ # 2. PDF Loading
218
+ t2 = time.time()
219
+ try:
220
+ doc = fitz.open(pdf_path)
221
+ total_pages = doc.page_count
222
+ logging.warning(f"βœ… Opened PDF with {doc.page_count} pages")
223
+ except Exception as e:
224
+ report = f"❌ ERROR loading PDF file: {e}"
225
+ return 0, 0, 0, report, time.time() - start_time, []
226
+ t3 = time.time()
227
+ log_messages.append(f"PDF Initialization Time: {t3-t2:.4f}s")
228
+
229
+ mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
230
 
231
+ # 3. Page Processing and Detection Loop
232
+ t4 = time.time()
233
+ for page_num_0_based in range(doc.page_count):
234
+ page_start_time = time.time()
235
+ fitz_page = doc.load_page(page_num_0_based)
236
+ page_num = page_num_0_based + 1
237
+
238
+ # Render page to image for YOLO
239
+ try:
240
+ pix_start = time.time()
241
+ pix = fitz_page.get_pixmap(matrix=mat)
242
+ original_img = pixmap_to_numpy(pix)
243
+ pix_time = time.time() - pix_start
244
+ except Exception as e:
245
+ logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
246
+ continue
247
 
248
+ # Core Detection
249
+ detect_start = time.time()
250
+ run_yolo_detection_and_count(original_img, model, page_num)
251
+ detect_time = time.time() - detect_start
252
 
253
+ page_total_time = time.time() - page_start_time
254
+ log_messages.append(f"Page {page_num} Time: Total={page_total_time:.4f}s (Render={pix_time:.4f}s, Detect={detect_time:.4f}s)")
255
 
256
+ doc.close()
257
+ t5 = time.time()
258
+ detection_loop_time = t5 - t4
259
+ log_messages.append(f"Total Detection Loop Time ({total_pages} pages): {detection_loop_time:.4f}s")
260
 
261
+ # 4. Final Report Generation
262
+ total_execution_time = t5 - start_time
263
 
264
+ report = (
265
+ f"βœ… **YOLO Counting Complete!**\n\n"
266
+ f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
267
+ f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
268
+ f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**\n"
269
+ f"---\n"
270
+ f"**4) Total Execution Time:** **{total_execution_time:.4f}s**\n"
271
+ f"### Detailed Step Timing\n"
272
+ f"```\n"
273
+ + "\n".join(log_messages) +
274
+ f"\n```"
275
+ )
276
+
277
+ # Return total_execution_time and an empty list for the gallery output
278
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, total_execution_time, []
279
+
280
+
281
+ # ============================================================================
282
+ # --- GRADIO INTERFACE FUNCTION (Updated) ---
283
+ # ============================================================================
284
+
285
+ # NOTE: The return signature has changed. We removed 'temp_output_dir' as it's no longer used.
286
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
287
+ """
288
+ Gradio wrapper function to handle file upload and return results (no image handling).
289
+ """
290
+ if pdf_file is None:
291
+ return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
292
 
293
+ pdf_path = pdf_file.name
294
+
295
+ try:
296
+ # Run the core logic
297
+ # Note the change: temp_output_dir is removed, and total_time is returned
298
+ num_pages, num_equations, num_figures, report, total_time, _ = run_single_pdf_preprocessing(
299
+ pdf_path
300
+ )
301
 
302
+ # Return results (the last item is an empty list for the now-empty gallery)
303
+ return str(num_pages), str(num_equations), str(num_figures), report, []
304
 
305
+ except Exception as e:
306
+ error_msg = f"An unexpected error occurred: {e}"
307
+ logging.error(error_msg, exc_info=True)
308
+ return "Error", "Error", "Error", error_msg, []
309
 
310
 
311
+ # ============================================================================
312
+ # --- GRADIO INTERFACE DEFINITION (Updated) ---
313
+ # ============================================================================
314
 
315
+ if __name__ == "__main__":
316
 
317
+ if not os.path.exists(WEIGHTS_PATH):
318
+ logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
319
 
320
+ input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
321
 
322
+ # Outputs
323
+ output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
324
+ output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
325
+ output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
326
+ output_report = gr.Markdown(label="Processing Summary and Timing")
327
 
328
+ # Gradio Gallery is retained but will receive an empty list []
329
+ output_gallery = gr.Gallery(
330
+ label="Detected Equations (Disabled for Speed)",
331
+ columns=5,
332
+ height="auto",
333
+ object_fit="contain",
334
+ allow_preview=False # Disable preview since it's empty
335
+ )
336
 
337
+ interface = gr.Interface(
338
+ fn=gradio_process_pdf,
339
+ inputs=input_file,
340
+ # The number of outputs remains 5 (3 textboxes, 1 markdown, 1 gallery)
341
+ outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
342
+ title="πŸš€ Optimized YOLO Counting with Timing",
343
+ description=(
344
+ "Upload a PDF to run YOLO detection. Image cropping is disabled for maximum speed. "
345
+ "Timing for each step is included in the summary report."
346
+ ),
347
+ )
348
+
349
+ print("\nStarting Gradio application...")
350
+ interface.launch(inbrowser=True)
351
 
352
 
353