heerjtdev commited on
Commit
12f4426
·
verified ·
1 Parent(s): 0f353ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -220
app.py CHANGED
@@ -3,24 +3,22 @@ import numpy as np
3
  import cv2
4
  import torch
5
  import torch.serialization
6
- import json
7
  import os
8
- import re
9
- from typing import List, Dict, Any, Optional, Union, Tuple
10
  from ultralytics import YOLO
11
  import logging
12
  import gradio as gr
13
  import shutil
14
  import tempfile
15
- import time
16
 
17
  # ============================================================================
18
- # --- Global Patches (Kept from original script) ---
19
  # ============================================================================
20
 
 
21
  _original_torch_load = torch.load
22
  def patched_torch_load(*args, **kwargs):
23
- # FORCE classic behavior
24
  kwargs["weights_only"] = False
25
  return _original_torch_load(*args, **kwargs)
26
  torch.load = patched_torch_load
@@ -31,49 +29,20 @@ logging.basicConfig(level=logging.WARNING)
31
  # --- CONFIGURATION AND CONSTANTS ---
32
  # ============================================================================
33
 
34
- # NOTE: Update these paths to match your environment before running!
35
- # Gradio runs in the current working directory, so relative paths are fine.
36
  WEIGHTS_PATH = 'best.pt'
37
 
38
- # DIRECTORY CONFIGURATION - Now managed by tempfile or local folders
39
- # NOTE: For Gradio, we'll use a temporary directory for output files
40
- # to prevent cluttering the execution environment.
41
-
42
- # Detection parameters
43
  CONF_THRESHOLD = 0.2
44
  TARGET_CLASSES = ['figure', 'equation']
45
  IOU_MERGE_THRESHOLD = 0.4
46
  IOA_SUPPRESSION_THRESHOLD = 0.7
47
- LINE_TOLERANCE = 15
48
 
49
- # Global counters for sequential numbering across the entire PDF
50
  GLOBAL_FIGURE_COUNT = 0
51
  GLOBAL_EQUATION_COUNT = 0
52
 
53
  # ============================================================================
54
- # --- PERFORMANCE OPTIMIZATION: OCR CACHE ---
55
- # Using the original OCRCache class definition
56
- # ============================================================================
57
-
58
- class OCRCache:
59
- """Caches OCR results per page to avoid redundant Tesseract runs."""
60
- def __init__(self):
61
- self.cache = {}
62
- def get_key(self, pdf_path: str, page_num: int) -> str:
63
- return f"{pdf_path}:{page_num}"
64
- def has_ocr(self, pdf_path: str, page_num: int) -> bool:
65
- return self.get_key(pdf_path, page_num) in self.cache
66
- def get_ocr(self, pdf_path: str, page_num: int) -> Optional[list]:
67
- return self.cache.get(self.get_key(pdf_path, page_num))
68
- def set_ocr(self, pdf_path: str, page_num: int, ocr_data: list):
69
- self.cache[self.get_key(pdf_path, page_num)] = ocr_data
70
- def clear(self):
71
- self.cache.clear()
72
-
73
- _ocr_cache = OCRCache()
74
-
75
- # ============================================================================
76
- # --- PHASE 1: YOLO/OCR PREPROCESSING FUNCTIONS (Kept from original script) ---
77
  # ============================================================================
78
 
79
  def calculate_iou(box1, box2):
@@ -90,21 +59,8 @@ def calculate_iou(box1, box2):
90
  return intersection_area / union_area if union_area > 0 else 0
91
 
92
 
93
- def calculate_ioa(box1, box2):
94
- x1_a, y1_a, x2_a, y2_a = box1
95
- x1_b, y1_b, x2_b, y2_b = box2
96
- x_left = max(x1_a, x1_b)
97
- y_top = max(y1_a, y1_b)
98
- x_right = min(x2_a, x2_b)
99
- y_bottom = min(y2_a, y2_b)
100
- intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
101
- box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
102
- return intersection_area / box_a_area if box_a_area > 0 else 0
103
-
104
-
105
  def filter_nested_boxes(detections, ioa_threshold=0.80):
106
- if not detections:
107
- return []
108
  for d in detections:
109
  x1, y1, x2, y2 = d['coords']
110
  d['area'] = (x2 - x1) * (y2 - y1)
@@ -155,38 +111,12 @@ def merge_overlapping_boxes(detections, iou_threshold):
155
  })
156
  return merged_detections
157
 
158
-
159
- def merge_yolo_into_word_data(raw_word_data: list, yolo_detections: list, scale_factor: float) -> list:
160
- if not yolo_detections:
161
- return raw_word_data
162
- pdf_space_boxes = []
163
- for det in yolo_detections:
164
- x1, y1, x2, y2 = det['coords']
165
- pdf_box = (x1 / scale_factor, y1 / scale_factor, x2 / scale_factor, y2 / scale_factor)
166
- pdf_space_boxes.append(pdf_box)
167
- cleaned_word_data = []
168
- for word_tuple in raw_word_data:
169
- wx1, wy1, wx2, wy2 = word_tuple[1], word_tuple[2], word_tuple[3], word_tuple[4]
170
- w_center_x = (wx1 + wx2) / 2
171
- w_center_y = (wy1 + wy2) / 2
172
- is_inside_yolo = False
173
- for px1, py1, px2, py2 in pdf_space_boxes:
174
- if px1 <= w_center_x <= px2 and py1 <= w_center_y <= py2:
175
- is_inside_yolo = True
176
- break
177
- if not is_inside_yolo:
178
- cleaned_word_data.append(word_tuple)
179
- for i, (px1, py1, px2, py2) in enumerate(pdf_space_boxes):
180
- dummy_entry = (f"BLOCK_{i}", px1, py1, px2, py2)
181
- cleaned_word_data.append(dummy_entry)
182
- return cleaned_word_data
183
-
184
-
185
  # ============================================================================
186
- # --- MISSING HELPER FUNCTIONS (Placeholders) ---
187
  # ============================================================================
188
 
189
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
 
190
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
191
  (pix.h, pix.w, pix.n)
192
  )
@@ -196,198 +126,143 @@ def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
196
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
197
  return img
198
 
199
- def find_column_separator_x(raw_word_data: list, page_width: float) -> Optional[float]:
200
- # Placeholder: Always assume single column unless you have the full logic.
201
- return None
202
 
203
- def preprocess_and_ocr_page(
204
- image: np.ndarray, model: YOLO, pdf_path: str, page_num: int,
205
- fitz_page: fitz.Page, pdf_name: str
206
- ) -> Tuple[Optional[list], Optional[float]]:
 
 
 
207
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
208
 
209
- scale_factor = 2.0
 
 
210
 
211
- # Mock Detection for Counters (Same as previous response):
212
- mock_detections = [
213
- {'coords': (100, 100, 400, 200), 'class': 'equation', 'conf': 0.95},
214
- {'coords': (100, 300, 400, 400), 'class': 'figure', 'conf': 0.90},
215
- {'coords': (100, 500, 400, 600), 'class': 'equation', 'conf': 0.85},
216
- ]
217
-
218
- # --- Actual Logic Starts Here ---
219
-
220
- # Run YOLO detection on the image (Actual implementation needed here)
221
- # results = model(image, conf=CONF_THRESHOLD)
222
- # mock_detections = []
223
- # if results and results[0].boxes:
224
- # for box in results[0].boxes.data.tolist():
225
- # x1, y1, x2, y2, conf, cls_id = box
226
- # cls_name = model.names[int(cls_id)]
227
- # if cls_name in TARGET_CLASSES:
228
- # mock_detections.append({'coords': (x1, y1, x2, y2), 'class': cls_name, 'conf': conf})
229
-
230
- merged_detections = merge_overlapping_boxes(mock_detections, IOU_MERGE_THRESHOLD)
 
231
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
232
 
233
- # 3. Update Global Counters based on Final Detections
234
  for det in final_detections:
235
  if det['class'] == 'figure':
236
  GLOBAL_FIGURE_COUNT += 1
 
237
  elif det['class'] == 'equation':
238
  GLOBAL_EQUATION_COUNT += 1
 
 
 
 
239
 
240
- # Mock Raw Word Data and Cleaning (Actual implementation needs fitz_page.get_text("words"))
241
- mock_raw_words = [("Word", 50.0, 50.0, 80.0, 60.0)]
242
- cleaned_word_data = merge_yolo_into_word_data(mock_raw_words, final_detections, scale_factor)
243
-
244
- page_width = fitz_page.rect.width
245
- page_separator_x = find_column_separator_x(cleaned_word_data, page_width)
246
-
247
- # Mock Final Output Structure
248
- final_output = [
249
- {"type": "text", "text": "Mock Text Block 1"},
250
- {"type": "yolo_block", "class": "figure", "page_num": page_num, "global_id": GLOBAL_FIGURE_COUNT},
251
- {"type": "yolo_block", "class": "equation", "page_num": page_num, "global_id": GLOBAL_EQUATION_COUNT},
252
- ]
253
-
254
- return final_output, page_separator_x
255
 
256
  # ============================================================================
257
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Gradio) ---
258
  # ============================================================================
259
 
260
- def run_single_pdf_preprocessing(pdf_path: str, output_dir: str) -> Tuple[Optional[str], int, int, int, str]:
261
  """
262
- Runs the preprocessing pipeline and returns the output JSON path, counts, and a summary report.
 
263
  """
264
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
265
 
 
266
  GLOBAL_FIGURE_COUNT = 0
267
  GLOBAL_EQUATION_COUNT = 0
268
- _ocr_cache.clear()
269
 
270
  if not os.path.exists(pdf_path):
271
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
272
- return None, 0, 0, 0, report
273
-
274
- # Define output paths inside the provided temporary directory
275
- pdf_name = os.path.splitext(os.path.basename(pdf_path))[0]
276
- preprocessed_json_path = os.path.join(output_dir, f"{pdf_name}_preprocessed.json")
277
-
278
- # Placeholder for FIGURE_EXTRACTION_DIR
279
- figure_output_dir = os.path.join(output_dir, 'figure_extraction')
280
- os.makedirs(figure_output_dir, exist_ok=True)
281
 
 
282
  try:
283
  model = YOLO(WEIGHTS_PATH)
 
284
  except Exception as e:
285
- report = f"❌ ERROR loading YOLO model from {WEIGHTS_PATH}: {e}\n(Please ensure 'best.pt' is in the current directory and Ultralytics is installed.)"
286
- return None, 0, 0, 0, report
287
 
288
  try:
289
  doc = fitz.open(pdf_path)
290
  total_pages = doc.page_count
 
291
  except Exception as e:
292
  report = f"❌ ERROR loading PDF file: {e}"
293
- return None, 0, 0, 0, report
294
 
295
- all_pages_data = []
296
- total_pages_processed = 0
297
  mat = fitz.Matrix(2.0, 2.0)
298
 
299
  for page_num_0_based in range(doc.page_count):
300
  fitz_page = doc.load_page(page_num_0_based)
 
301
 
302
  try:
303
  pix = fitz_page.get_pixmap(matrix=mat)
304
  original_img = pixmap_to_numpy(pix)
305
  except Exception as e:
306
- logging.error(f"Error converting page {page_num_0_based + 1} to image: {e}")
307
  continue
308
 
309
- final_output, page_separator_x = preprocess_and_ocr_page(
310
- original_img, model, pdf_path, page_num_0_based + 1, fitz_page, pdf_name
311
- )
312
-
313
- if final_output is not None:
314
- page_data = {
315
- "page_number": page_num_0_based + 1,
316
- "data": final_output,
317
- "column_separator_x": page_separator_x
318
- }
319
- all_pages_data.append(page_data)
320
- total_pages_processed += 1
321
 
322
  doc.close()
323
 
324
- if all_pages_data:
325
- try:
326
- with open(preprocessed_json_path, 'w') as f:
327
- json.dump(all_pages_data, f, indent=4)
328
- json_path_out = preprocessed_json_path
329
-
330
- report = (
331
- f"✅ **Processing Complete!**\n"
332
- f"--- {total_pages_processed} pages processed ---\n"
333
- f"**1) Total Pages Detected:** {total_pages}\n"
334
- f"**2) Elements Extracted:**\n"
335
- f" - Equations: {GLOBAL_EQUATION_COUNT}\n"
336
- f" - Figures: {GLOBAL_FIGURE_COUNT}\n"
337
- f"\nDetailed JSON output saved to: `{os.path.basename(json_path_out)}`"
338
- )
339
- except Exception as e:
340
- json_path_out = None
341
- report = f"❌ ERROR saving combined JSON output: {e}"
342
- else:
343
- json_path_out = None
344
- report = f"❌ WARNING: No page data generated. Halting pipeline. Total pages in PDF: {total_pages}"
345
 
346
- return json_path_out, total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
347
 
348
 
349
  # ============================================================================
350
- # --- GRADIO INTERFACE FUNCTION ---
351
  # ============================================================================
352
 
353
- def gradio_process_pdf(pdf_file) -> Tuple[str, Optional[str]]:
354
  """
355
- Gradio wrapper function to handle file upload and cleanup.
356
  """
357
  if pdf_file is None:
358
- return "Please upload a PDF file.", None
359
 
360
  pdf_path = pdf_file.name
361
-
362
- # Use a temporary directory for all output files to ensure cleanup
363
- temp_output_dir = tempfile.mkdtemp()
364
 
365
  try:
366
  # Run the core logic
367
- json_path, num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(
368
- pdf_path, temp_output_dir
369
- )
370
 
371
- # Prepare file output for Gradio (only the JSON is returned)
372
- if json_path and os.path.exists(json_path):
373
- # Create a file name for the download button
374
- download_filename = os.path.basename(json_path)
375
- # Gradio requires the file path to exist until the download is complete
376
-
377
- # Move the file out of the temp dir so Gradio can access it later, or
378
- # more simply, return the path and rely on Gradio's internal file handling.
379
- # We'll rely on Gradio to handle the temporary file access.
380
- return report, json_path
381
- else:
382
- return report, None
383
 
384
  except Exception as e:
385
- return f"An unexpected error occurred during processing: {e}", None
386
- finally:
387
- # Clean up the temporary directory after the processing function returns
388
- # NOTE: Gradio manages its own temp files; this cleans the processing outputs.
389
- # shutil.rmtree(temp_output_dir, ignore_errors=True)
390
- pass # Better to let Gradio/OS handle cleanup of large files.
391
 
392
 
393
  # ============================================================================
@@ -397,31 +272,26 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, Optional[str]]:
397
  if __name__ == "__main__":
398
 
399
  if not os.path.exists(WEIGHTS_PATH):
400
- print("⚠️ WARNING: YOLO weight file 'best.pt' not found.")
401
- print("The script will run, but the element counting uses placeholder values.")
402
 
403
-
404
- # Define the inputs and outputs for the Gradio interface
405
  input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
406
 
407
- output_report = gr.Markdown(label="Extraction Summary")
408
- output_json = gr.File(label="Download Preprocessed JSON", type="filepath", visible=True)
 
 
 
409
 
410
- # Create the Gradio interface
411
  interface = gr.Interface(
412
  fn=gradio_process_pdf,
413
  inputs=input_file,
414
- outputs=[output_report, output_json],
415
- title="🔬 PDF Element Extractor (YOLO/OCR Pipeline)",
416
  description=(
417
- "Upload a research paper PDF to run the YOLO/OCR pre-processing pipeline.\n"
418
- "It detects pages, figures, and equations, and returns a summary of the counts "
419
- "along with the structured JSON output file."
420
  ),
421
-
422
  )
423
 
424
- # Launch the interface
425
  print("\nStarting Gradio application...")
426
- # NOTE: Set share=True to generate a public link (good for testing)
427
  interface.launch(inbrowser=True)
 
3
  import cv2
4
  import torch
5
  import torch.serialization
 
6
  import os
7
+ from typing import Optional, Tuple
 
8
  from ultralytics import YOLO
9
  import logging
10
  import gradio as gr
11
  import shutil
12
  import tempfile
13
+ import json # Still needed for simple JSON logging
14
 
15
  # ============================================================================
16
+ # --- Global Patches and Setup ---
17
  # ============================================================================
18
 
19
+ # Patch torch.load to prevent weights_only error with older models
20
  _original_torch_load = torch.load
21
  def patched_torch_load(*args, **kwargs):
 
22
  kwargs["weights_only"] = False
23
  return _original_torch_load(*args, **kwargs)
24
  torch.load = patched_torch_load
 
29
  # --- CONFIGURATION AND CONSTANTS ---
30
  # ============================================================================
31
 
 
 
32
  WEIGHTS_PATH = 'best.pt'
33
 
34
+ # Detection parameters (Required for your box combination logic)
 
 
 
 
35
  CONF_THRESHOLD = 0.2
36
  TARGET_CLASSES = ['figure', 'equation']
37
  IOU_MERGE_THRESHOLD = 0.4
38
  IOA_SUPPRESSION_THRESHOLD = 0.7
 
39
 
40
+ # Global counters (Reset per run)
41
  GLOBAL_FIGURE_COUNT = 0
42
  GLOBAL_EQUATION_COUNT = 0
43
 
44
  # ============================================================================
45
+ # --- BOX COMBINATION LOGIC (Retained from your original script) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # ============================================================================
47
 
48
  def calculate_iou(box1, box2):
 
59
  return intersection_area / union_area if union_area > 0 else 0
60
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  def filter_nested_boxes(detections, ioa_threshold=0.80):
63
+ if not detections: return []
 
64
  for d in detections:
65
  x1, y1, x2, y2 = d['coords']
66
  d['area'] = (x2 - x1) * (y2 - y1)
 
111
  })
112
  return merged_detections
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  # ============================================================================
115
+ # --- UTILITY FUNCTIONS (Minimally Required) ---
116
  # ============================================================================
117
 
118
  def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
119
+ """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
120
  img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
121
  (pix.h, pix.w, pix.n)
122
  )
 
126
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
127
  return img
128
 
 
 
 
129
 
130
+ def run_yolo_detection_and_count(
131
+ image: np.ndarray, model: YOLO, page_num: int
132
+ ) -> Tuple[int, int]:
133
+ """
134
+ Runs YOLO inference, applies NMS/filtering, and updates global counters.
135
+ Returns the counts for the current page.
136
+ """
137
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
138
 
139
+ yolo_detections = []
140
+ page_equations = 0
141
+ page_figures = 0
142
 
143
+ try:
144
+ # Run prediction
145
+ results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
+
147
+ if results and results[0].boxes:
148
+ for box in results[0].boxes.data.tolist():
149
+ x1, y1, x2, y2, conf, cls_id = box
150
+ cls_name = model.names[int(cls_id)]
151
+
152
+ if cls_name in TARGET_CLASSES:
153
+ yolo_detections.append({
154
+ 'coords': (x1, y1, x2, y2),
155
+ 'class': cls_name,
156
+ 'conf': conf
157
+ })
158
+ except Exception as e:
159
+ logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
+ return 0, 0
161
+
162
+ # Apply NMS/Merging/Filtering based on your provided logic
163
+ merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
  final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
 
166
+ # Update Global Counters
167
  for det in final_detections:
168
  if det['class'] == 'figure':
169
  GLOBAL_FIGURE_COUNT += 1
170
+ page_figures += 1
171
  elif det['class'] == 'equation':
172
  GLOBAL_EQUATION_COUNT += 1
173
+ page_equations += 1
174
+
175
+ logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
176
+ return page_equations, page_figures
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
  # ============================================================================
180
+ # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
181
  # ============================================================================
182
 
183
+ def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
184
  """
185
+ Runs the pipeline and returns just the counts and a report.
186
+ No intermediate JSON saving or complex output structure.
187
  """
188
  global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
189
 
190
+ # Reset globals
191
  GLOBAL_FIGURE_COUNT = 0
192
  GLOBAL_EQUATION_COUNT = 0
 
193
 
194
  if not os.path.exists(pdf_path):
195
  report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
196
+ return 0, 0, 0, report
 
 
 
 
 
 
 
 
197
 
198
+ # Model Loading (CRITICAL: Requires best.pt)
199
  try:
200
  model = YOLO(WEIGHTS_PATH)
201
+ logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
202
  except Exception as e:
203
+ report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
204
+ return 0, 0, 0, report
205
 
206
  try:
207
  doc = fitz.open(pdf_path)
208
  total_pages = doc.page_count
209
+ logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
210
  except Exception as e:
211
  report = f"❌ ERROR loading PDF file: {e}"
212
+ return 0, 0, 0, report
213
 
 
 
214
  mat = fitz.Matrix(2.0, 2.0)
215
 
216
  for page_num_0_based in range(doc.page_count):
217
  fitz_page = doc.load_page(page_num_0_based)
218
+ page_num = page_num_0_based + 1
219
 
220
  try:
221
  pix = fitz_page.get_pixmap(matrix=mat)
222
  original_img = pixmap_to_numpy(pix)
223
  except Exception as e:
224
+ logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
225
  continue
226
 
227
+ # Core Detection and Counting
228
+ run_yolo_detection_and_count(original_img, model, page_num)
 
 
 
 
 
 
 
 
 
 
229
 
230
  doc.close()
231
 
232
+ # Final Report Generation
233
+ report = (
234
+ f"✅ **YOLO Counting Complete!**\n\n"
235
+ f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
236
+ f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
237
+ f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
238
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
241
 
242
 
243
  # ============================================================================
244
+ # --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
245
  # ============================================================================
246
 
247
+ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
248
  """
249
+ Gradio wrapper function to handle file upload and return all results as strings.
250
  """
251
  if pdf_file is None:
252
+ return "N/A", "N/A", "N/A", "Please upload a PDF file."
253
 
254
  pdf_path = pdf_file.name
 
 
 
255
 
256
  try:
257
  # Run the core logic
258
+ num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
 
 
259
 
260
+ # Return results as formatted strings
261
+ return str(num_pages), str(num_equations), str(num_figures), report
 
 
 
 
 
 
 
 
 
 
262
 
263
  except Exception as e:
264
+ error_msg = f"An unexpected error occurred: {e}"
265
+ return "Error", "Error", "Error", error_msg
 
 
 
 
266
 
267
 
268
  # ============================================================================
 
272
  if __name__ == "__main__":
273
 
274
  if not os.path.exists(WEIGHTS_PATH):
275
+ logging.error(f" FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
 
276
 
 
 
277
  input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
278
 
279
+ # Outputs are now discrete number fields and a final markdown report
280
+ output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
281
+ output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
282
+ output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
283
+ output_report = gr.Markdown(label="Processing Summary")
284
 
 
285
  interface = gr.Interface(
286
  fn=gradio_process_pdf,
287
  inputs=input_file,
288
+ outputs=[output_pages, output_equations, output_figures, output_report],
289
+ title="🎯 Minimalist YOLO Counting for PDF Elements",
290
  description=(
291
+ "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
292
+ "and get the total counts for pages, equations, and figures."
 
293
  ),
 
294
  )
295
 
 
296
  print("\nStarting Gradio application...")
 
297
  interface.launch(inbrowser=True)