heerjtdev commited on
Commit
2631762
·
verified ·
1 Parent(s): cd7b6c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -311
app.py CHANGED
@@ -1,306 +1,3 @@
1
- # import fitz # PyMuPDF
2
- # import numpy as np
3
- # import cv2
4
- # import torch
5
- # import torch.serialization
6
- # import os
7
- # from typing import Optional, Tuple
8
- # from ultralytics import YOLO
9
- # import logging
10
- # import gradio as gr
11
- # import shutil
12
- # import tempfile
13
- # import json # Still needed for simple JSON logging
14
-
15
- # # ============================================================================
16
- # # --- Global Patches and Setup ---
17
- # # ============================================================================
18
-
19
- # # Patch torch.load to prevent weights_only error with older models
20
- # _original_torch_load = torch.load
21
- # def patched_torch_load(*args, **kwargs):
22
- # kwargs["weights_only"] = False
23
- # return _original_torch_load(*args, **kwargs)
24
- # torch.load = patched_torch_load
25
-
26
- # logging.basicConfig(level=logging.WARNING)
27
-
28
- # # ============================================================================
29
- # # --- CONFIGURATION AND CONSTANTS ---
30
- # # ============================================================================
31
-
32
- # WEIGHTS_PATH = 'best.pt'
33
-
34
- # # Detection parameters (Required for your box combination logic)
35
- # CONF_THRESHOLD = 0.2
36
- # TARGET_CLASSES = ['figure', 'equation']
37
- # IOU_MERGE_THRESHOLD = 0.4
38
- # IOA_SUPPRESSION_THRESHOLD = 0.7
39
-
40
- # # Global counters (Reset per run)
41
- # GLOBAL_FIGURE_COUNT = 0
42
- # GLOBAL_EQUATION_COUNT = 0
43
-
44
- # # ============================================================================
45
- # # --- BOX COMBINATION LOGIC (Retained from your original script) ---
46
- # # ============================================================================
47
-
48
- # def calculate_iou(box1, box2):
49
- # x1_a, y1_a, x2_a, y2_a = box1
50
- # x1_b, y1_b, x2_b, y2_b = box2
51
- # x_left = max(x1_a, x1_b)
52
- # y_top = max(y1_a, y1_b)
53
- # x_right = min(x2_a, x2_b)
54
- # y_bottom = min(y2_a, y2_b)
55
- # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
56
- # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
57
- # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
58
- # union_area = float(box_a_area + box_b_area - intersection_area)
59
- # return intersection_area / union_area if union_area > 0 else 0
60
-
61
-
62
- # def filter_nested_boxes(detections, ioa_threshold=0.80):
63
- # if not detections: return []
64
- # for d in detections:
65
- # x1, y1, x2, y2 = d['coords']
66
- # d['area'] = (x2 - x1) * (y2 - y1)
67
- # detections.sort(key=lambda x: x['area'], reverse=True)
68
- # keep_indices = []
69
- # is_suppressed = [False] * len(detections)
70
- # for i in range(len(detections)):
71
- # if is_suppressed[i]: continue
72
- # keep_indices.append(i)
73
- # box_a = detections[i]['coords']
74
- # for j in range(i + 1, len(detections)):
75
- # if is_suppressed[j]: continue
76
- # box_b = detections[j]['coords']
77
- # x_left = max(box_a[0], box_b[0])
78
- # y_top = max(box_a[1], box_b[1])
79
- # x_right = min(box_a[2], box_b[2])
80
- # y_bottom = min(box_a[3], box_b[3])
81
- # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
82
- # area_b = detections[j]['area']
83
- # if area_b > 0 and intersection / area_b > ioa_threshold:
84
- # is_suppressed[j] = True
85
- # return [detections[i] for i in keep_indices]
86
-
87
-
88
- # def merge_overlapping_boxes(detections, iou_threshold):
89
- # if not detections: return []
90
- # detections.sort(key=lambda d: d['conf'], reverse=True)
91
- # merged_detections = []
92
- # is_merged = [False] * len(detections)
93
- # for i in range(len(detections)):
94
- # if is_merged[i]: continue
95
- # current_box = detections[i]['coords']
96
- # current_class = detections[i]['class']
97
- # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
98
- # for j in range(i + 1, len(detections)):
99
- # if is_merged[j] or detections[j]['class'] != current_class: continue
100
- # other_box = detections[j]['coords']
101
- # iou = calculate_iou(current_box, other_box)
102
- # if iou > iou_threshold:
103
- # merged_x1 = min(merged_x1, other_box[0])
104
- # merged_y1 = min(merged_y1, other_box[1])
105
- # merged_x2 = max(merged_x2, other_box[2])
106
- # merged_y2 = max(merged_y2, other_box[3])
107
- # is_merged[j] = True
108
- # merged_detections.append({
109
- # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
110
- # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
111
- # })
112
- # return merged_detections
113
-
114
- # # ============================================================================
115
- # # --- UTILITY FUNCTIONS (Minimally Required) ---
116
- # # ============================================================================
117
-
118
- # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
119
- # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
120
- # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
121
- # (pix.h, pix.w, pix.n)
122
- # )
123
- # if pix.n == 4:
124
- # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
125
- # elif pix.n == 1:
126
- # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
127
- # return img
128
-
129
-
130
- # def run_yolo_detection_and_count(
131
- # image: np.ndarray, model: YOLO, page_num: int
132
- # ) -> Tuple[int, int]:
133
- # """
134
- # Runs YOLO inference, applies NMS/filtering, and updates global counters.
135
- # Returns the counts for the current page.
136
- # """
137
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
138
-
139
- # yolo_detections = []
140
- # page_equations = 0
141
- # page_figures = 0
142
-
143
- # try:
144
- # # Run prediction
145
- # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
-
147
- # if results and results[0].boxes:
148
- # for box in results[0].boxes.data.tolist():
149
- # x1, y1, x2, y2, conf, cls_id = box
150
- # cls_name = model.names[int(cls_id)]
151
-
152
- # if cls_name in TARGET_CLASSES:
153
- # yolo_detections.append({
154
- # 'coords': (x1, y1, x2, y2),
155
- # 'class': cls_name,
156
- # 'conf': conf
157
- # })
158
- # except Exception as e:
159
- # logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
- # return 0, 0
161
-
162
- # # Apply NMS/Merging/Filtering based on your provided logic
163
- # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
- # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
-
166
- # # Update Global Counters
167
- # for det in final_detections:
168
- # if det['class'] == 'figure':
169
- # GLOBAL_FIGURE_COUNT += 1
170
- # page_figures += 1
171
- # elif det['class'] == 'equation':
172
- # GLOBAL_EQUATION_COUNT += 1
173
- # page_equations += 1
174
-
175
- # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
176
- # return page_equations, page_figures
177
-
178
-
179
- # # ============================================================================
180
- # # --- MAIN DOCUMENT PROCESSING FUNCTION (Modified for Minimal Output) ---
181
- # # ============================================================================
182
-
183
- # def run_single_pdf_preprocessing(pdf_path: str) -> Tuple[int, int, int, str]:
184
- # """
185
- # Runs the pipeline and returns just the counts and a report.
186
- # No intermediate JSON saving or complex output structure.
187
- # """
188
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
189
-
190
- # # Reset globals
191
- # GLOBAL_FIGURE_COUNT = 0
192
- # GLOBAL_EQUATION_COUNT = 0
193
-
194
- # if not os.path.exists(pdf_path):
195
- # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
196
- # return 0, 0, 0, report
197
-
198
- # # Model Loading (CRITICAL: Requires best.pt)
199
- # try:
200
- # model = YOLO(WEIGHTS_PATH)
201
- # logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
202
- # except Exception as e:
203
- # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
204
- # return 0, 0, 0, report
205
-
206
- # try:
207
- # doc = fitz.open(pdf_path)
208
- # total_pages = doc.page_count
209
- # logging.warning(f"✅ Opened PDF: {doc.page_count} pages")
210
- # except Exception as e:
211
- # report = f"❌ ERROR loading PDF file: {e}"
212
- # return 0, 0, 0, report
213
-
214
- # mat = fitz.Matrix(2.0, 2.0)
215
-
216
- # for page_num_0_based in range(doc.page_count):
217
- # fitz_page = doc.load_page(page_num_0_based)
218
- # page_num = page_num_0_based + 1
219
-
220
- # try:
221
- # pix = fitz_page.get_pixmap(matrix=mat)
222
- # original_img = pixmap_to_numpy(pix)
223
- # except Exception as e:
224
- # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
225
- # continue
226
-
227
- # # Core Detection and Counting
228
- # run_yolo_detection_and_count(original_img, model, page_num)
229
-
230
- # doc.close()
231
-
232
- # # Final Report Generation
233
- # report = (
234
- # f"✅ **YOLO Counting Complete!**\n\n"
235
- # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
236
- # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
237
- # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
238
- # )
239
-
240
- # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report
241
-
242
-
243
- # # ============================================================================
244
- # # --- GRADIO INTERFACE FUNCTION (Modified for minimal output) ---
245
- # # ============================================================================
246
-
247
- # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str]:
248
- # """
249
- # Gradio wrapper function to handle file upload and return all results as strings.
250
- # """
251
- # if pdf_file is None:
252
- # return "N/A", "N/A", "N/A", "Please upload a PDF file."
253
-
254
- # pdf_path = pdf_file.name
255
-
256
- # try:
257
- # # Run the core logic
258
- # num_pages, num_equations, num_figures, report = run_single_pdf_preprocessing(pdf_path)
259
-
260
- # # Return results as formatted strings
261
- # return str(num_pages), str(num_equations), str(num_figures), report
262
-
263
- # except Exception as e:
264
- # error_msg = f"An unexpected error occurred: {e}"
265
- # return "Error", "Error", "Error", error_msg
266
-
267
-
268
- # # ============================================================================
269
- # # --- GRADIO INTERFACE DEFINITION ---
270
- # # ============================================================================
271
-
272
- # if __name__ == "__main__":
273
-
274
- # if not os.path.exists(WEIGHTS_PATH):
275
- # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
276
-
277
- # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
278
-
279
- # # Outputs are now discrete number fields and a final markdown report
280
- # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
281
- # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
282
- # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
283
- # output_report = gr.Markdown(label="Processing Summary")
284
-
285
- # interface = gr.Interface(
286
- # fn=gradio_process_pdf,
287
- # inputs=input_file,
288
- # outputs=[output_pages, output_equations, output_figures, output_report],
289
- # title="🎯 Minimalist YOLO Counting for PDF Elements",
290
- # description=(
291
- # "Upload a PDF to instantly run YOLO detection using your **`best.pt`** model "
292
- # "and get the total counts for pages, equations, and figures."
293
- # ),
294
- # )
295
-
296
- # print("\nStarting Gradio application...")
297
- # interface.launch(inbrowser=True)
298
-
299
-
300
-
301
-
302
-
303
-
304
  import fitz # PyMuPDF
305
  import numpy as np
306
  import cv2
@@ -493,7 +190,7 @@ def run_yolo_detection_and_count(
493
 
494
 
495
  # ============================================================================
496
- # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for Gradio Gallery) ---
497
  # ============================================================================
498
 
499
  def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[int, int, int, str, List[str]]:
@@ -551,14 +248,14 @@ def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[i
551
  bbox = eq['bbox_pdf']
552
 
553
  try:
554
- # FIX 1 (PyMuPDF Rect): Only instantiate Rect, no .prerotate()
555
  rect = fitz.Rect(bbox)
556
  clip_rect = rect + (0, 0, 5, 5) # Add small padding
557
 
558
  # Get the pixmap for the cropped area (high-res render)
559
  eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
560
 
561
- # FIX 2 (Gradio Gallery): Save to a temporary file path
562
  img_bytes = eq_pix.tobytes("png")
563
  filename = f"eq_{GLOBAL_EQUATION_COUNT}_p{page_num}.png"
564
  output_path = os.path.join(temp_output_dir, filename)
@@ -586,18 +283,22 @@ def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[i
586
 
587
 
588
  # ============================================================================
589
- # --- GRADIO INTERFACE FUNCTION (Fixed for temp dir management) ---
590
  # ============================================================================
591
 
592
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
593
  """
594
  Gradio wrapper function to handle file upload, manage temporary directory, and return file paths.
 
 
 
595
  """
596
  if pdf_file is None:
597
  return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
598
 
599
  pdf_path = pdf_file.name
600
- temp_output_dir = tempfile.mkdtemp() # Create temp directory
 
601
 
602
  try:
603
  # Run the core logic, passing the temp directory
@@ -611,10 +312,12 @@ def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
611
  except Exception as e:
612
  error_msg = f"An unexpected error occurred: {e}"
613
  logging.error(error_msg, exc_info=True)
614
- return "Error", "Error", "Error", error_msg, []
615
- finally:
616
- # CRUCIAL: Clean up the temporary directory containing the image files
617
  shutil.rmtree(temp_output_dir, ignore_errors=True)
 
 
 
 
618
 
619
 
620
  # ============================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import fitz # PyMuPDF
2
  import numpy as np
3
  import cv2
 
190
 
191
 
192
  # ============================================================================
193
+ # --- MAIN DOCUMENT PROCESSING FUNCTION ---
194
  # ============================================================================
195
 
196
  def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[int, int, int, str, List[str]]:
 
248
  bbox = eq['bbox_pdf']
249
 
250
  try:
251
+ # Fixed Rect object creation
252
  rect = fitz.Rect(bbox)
253
  clip_rect = rect + (0, 0, 5, 5) # Add small padding
254
 
255
  # Get the pixmap for the cropped area (high-res render)
256
  eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
257
 
258
+ # Save to a temporary file path
259
  img_bytes = eq_pix.tobytes("png")
260
  filename = f"eq_{GLOBAL_EQUATION_COUNT}_p{page_num}.png"
261
  output_path = os.path.join(temp_output_dir, filename)
 
283
 
284
 
285
  # ============================================================================
286
+ # --- GRADIO INTERFACE FUNCTION (Fixed for temp dir cleanup) ---
287
  # ============================================================================
288
 
289
  def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
290
  """
291
  Gradio wrapper function to handle file upload, manage temporary directory, and return file paths.
292
+
293
+ The cleanup block is REMOVED to allow Gradio's front-end to access the images
294
+ before the files are deleted. Gradio will eventually handle the cleanup.
295
  """
296
  if pdf_file is None:
297
  return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
298
 
299
  pdf_path = pdf_file.name
300
+ # Create temp directory
301
+ temp_output_dir = tempfile.mkdtemp()
302
 
303
  try:
304
  # Run the core logic, passing the temp directory
 
312
  except Exception as e:
313
  error_msg = f"An unexpected error occurred: {e}"
314
  logging.error(error_msg, exc_info=True)
315
+ # Still clean up in case of a hard error
 
 
316
  shutil.rmtree(temp_output_dir, ignore_errors=True)
317
+ return "Error", "Error", "Error", error_msg, []
318
+
319
+ # NOTE: The final cleanup block for success case is intentionally removed
320
+ # to prevent files from being deleted before Gradio can serve them.
321
 
322
 
323
  # ============================================================================