heerjtdev commited on
Commit
fff6c9e
·
verified ·
1 Parent(s): 8d96f17

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -367
app.py CHANGED
@@ -1,370 +1,3 @@
1
- # import fitz # PyMuPDF
2
- # import numpy as np
3
- # import cv2
4
- # import torch
5
- # import torch.serialization
6
- # import os
7
- # from typing import Optional, Tuple, List, Dict, Any
8
- # from ultralytics import YOLO
9
- # import logging
10
- # import gradio as gr
11
- # import shutil
12
- # import tempfile
13
- # import io
14
-
15
- # # ============================================================================
16
- # # --- Global Patches and Setup ---
17
- # # ============================================================================
18
-
19
- # # Patch torch.load to prevent weights_only error with older models
20
- # _original_torch_load = torch.load
21
- # def patched_torch_load(*args, **kwargs):
22
- # kwargs["weights_only"] = False
23
- # return _original_torch_load(*args, **kwargs)
24
- # torch.load = patched_torch_load
25
-
26
- # logging.basicConfig(level=logging.WARNING)
27
-
28
- # # ============================================================================
29
- # # --- CONFIGURATION AND CONSTANTS ---
30
- # # ============================================================================
31
-
32
- # WEIGHTS_PATH = 'best.pt'
33
- # SCALE_FACTOR = 2.0 # Used for page rendering and coordinate scaling
34
-
35
- # # Detection parameters
36
- # CONF_THRESHOLD = 0.2
37
- # TARGET_CLASSES = ['figure', 'equation']
38
- # IOU_MERGE_THRESHOLD = 0.4
39
- # IOA_SUPPRESSION_THRESHOLD = 0.7
40
-
41
- # # Global counters (Reset per run)
42
- # GLOBAL_FIGURE_COUNT = 0
43
- # GLOBAL_EQUATION_COUNT = 0
44
-
45
- # # ============================================================================
46
- # # --- BOX COMBINATION LOGIC ---
47
- # # ============================================================================
48
-
49
- # def calculate_iou(box1, box2):
50
- # x1_a, y1_a, x2_a, y2_a = box1
51
- # x1_b, y1_b, x2_b, y2_b = box2
52
- # x_left = max(x1_a, x1_b)
53
- # y_top = max(y1_a, y1_b)
54
- # x_right = min(x2_a, x2_b)
55
- # y_bottom = min(y2_a, y2_b)
56
- # intersection_area = max(0, x_right - x_left) * max(0, y_bottom - y_top)
57
- # box_a_area = (x2_a - x1_a) * (y2_a - y1_a)
58
- # box_b_area = (x2_b - x1_b) * (y2_b - y1_b)
59
- # union_area = float(box_a_area + box_b_area - intersection_area)
60
- # return intersection_area / union_area if union_area > 0 else 0
61
-
62
-
63
- # def filter_nested_boxes(detections, ioa_threshold=0.80):
64
- # if not detections: return []
65
- # for d in detections:
66
- # x1, y1, x2, y2 = d['coords']
67
- # d['area'] = (x2 - x1) * (y2 - y1)
68
- # detections.sort(key=lambda x: x['area'], reverse=True)
69
- # keep_indices = []
70
- # is_suppressed = [False] * len(detections)
71
- # for i in range(len(detections)):
72
- # if is_suppressed[i]: continue
73
- # keep_indices.append(i)
74
- # box_a = detections[i]['coords']
75
- # for j in range(i + 1, len(detections)):
76
- # if is_suppressed[j]: continue
77
- # box_b = detections[j]['coords']
78
- # x_left = max(box_a[0], box_b[0])
79
- # y_top = max(box_a[1], box_b[1])
80
- # x_right = min(box_a[2], box_b[2])
81
- # y_bottom = min(box_a[3], box_b[3])
82
- # intersection = max(0, x_right - x_left) * max(0, y_bottom - y_top)
83
- # area_b = detections[j]['area']
84
- # if area_b > 0 and intersection / area_b > ioa_threshold:
85
- # is_suppressed[j] = True
86
- # return [detections[i] for i in keep_indices]
87
-
88
-
89
- # def merge_overlapping_boxes(detections, iou_threshold):
90
- # if not detections: return []
91
- # detections.sort(key=lambda d: d['conf'], reverse=True)
92
- # merged_detections = []
93
- # is_merged = [False] * len(detections)
94
- # for i in range(len(detections)):
95
- # if is_merged[i]: continue
96
- # current_box = detections[i]['coords']
97
- # current_class = detections[i]['class']
98
- # merged_x1, merged_y1, merged_x2, merged_y2 = current_box
99
- # for j in range(i + 1, len(detections)):
100
- # if is_merged[j] or detections[j]['class'] != current_class: continue
101
- # other_box = detections[j]['coords']
102
- # iou = calculate_iou(current_box, other_box)
103
- # if iou > iou_threshold:
104
- # merged_x1 = min(merged_x1, other_box[0])
105
- # merged_y1 = min(merged_y1, other_box[1])
106
- # merged_x2 = max(merged_x2, other_box[2])
107
- # merged_y2 = max(merged_y2, other_box[3])
108
- # is_merged[j] = True
109
- # merged_detections.append({
110
- # 'coords': (merged_x1, merged_y1, merged_x2, merged_y2),
111
- # 'y1': merged_y1, 'class': current_class, 'conf': detections[i]['conf']
112
- # })
113
- # return merged_detections
114
-
115
- # # ============================================================================
116
- # # --- UTILITY FUNCTIONS ---
117
- # # ============================================================================
118
-
119
- # def pixmap_to_numpy(pix: fitz.Pixmap) -> np.ndarray:
120
- # """Converts a PyMuPDF Pixmap to a NumPy array for OpenCV/YOLO."""
121
- # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(
122
- # (pix.h, pix.w, pix.n)
123
- # )
124
- # if pix.n == 4:
125
- # img = cv2.cvtColor(img, cv2.COLOR_RGBA2RGB)
126
- # elif pix.n == 1:
127
- # img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
128
- # return img
129
-
130
-
131
- # def run_yolo_detection_and_count(
132
- # image: np.ndarray, model: YOLO, page_num: int
133
- # ) -> Tuple[int, int, List[Dict[str, Any]]]:
134
- # """
135
- # Runs YOLO inference, applies NMS/filtering, and updates global counters.
136
- # Returns counts AND a list of equation detection results (PDF coordinates).
137
- # """
138
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
139
-
140
- # yolo_detections = []
141
- # page_equations = 0
142
- # page_figures = 0
143
-
144
- # try:
145
- # results = model.predict(image, conf=CONF_THRESHOLD, verbose=False)
146
-
147
- # if results and results[0].boxes:
148
- # for box in results[0].boxes.data.tolist():
149
- # x1, y1, x2, y2, conf, cls_id = box
150
- # cls_name = model.names[int(cls_id)]
151
-
152
- # if cls_name in TARGET_CLASSES:
153
- # yolo_detections.append({
154
- # 'coords': (x1, y1, x2, y2),
155
- # 'class': cls_name,
156
- # 'conf': conf
157
- # })
158
- # except Exception as e:
159
- # logging.error(f"YOLO inference failed on page {page_num}: {e}")
160
- # return 0, 0, []
161
-
162
- # # Apply NMS/Merging/Filtering
163
- # merged_detections = merge_overlapping_boxes(yolo_detections, IOU_MERGE_THRESHOLD)
164
- # final_detections = filter_nested_boxes(merged_detections, IOA_SUPPRESSION_THRESHOLD)
165
-
166
- # equation_results = []
167
-
168
- # # Update Global Counters
169
- # for det in final_detections:
170
- # # Scale coordinates back to the original PDF space (points)
171
- # x1_pix, y1_pix, x2_pix, y2_pix = det['coords']
172
- # x1_pdf = x1_pix / SCALE_FACTOR
173
- # y1_pdf = y1_pix / SCALE_FACTOR
174
- # x2_pdf = x2_pix / SCALE_FACTOR
175
- # y2_pdf = y2_pix / SCALE_FACTOR
176
-
177
- # if det['class'] == 'figure':
178
- # GLOBAL_FIGURE_COUNT += 1
179
- # page_figures += 1
180
- # elif det['class'] == 'equation':
181
- # GLOBAL_EQUATION_COUNT += 1
182
- # page_equations += 1
183
- # equation_results.append({
184
- # 'page': page_num,
185
- # 'bbox_pdf': (x1_pdf, y1_pdf, x2_pdf, y2_pdf) # Coordinates in PDF space
186
- # })
187
-
188
- # logging.warning(f" -> Page {page_num}: EQs={page_equations}, Figs={page_figures}")
189
- # return page_equations, page_figures, equation_results
190
-
191
-
192
- # # ============================================================================
193
- # # --- MAIN DOCUMENT PROCESSING FUNCTION (Fixed for unique filenames) ---
194
- # # ============================================================================
195
-
196
- # def run_single_pdf_preprocessing(pdf_path: str, temp_output_dir: str) -> Tuple[int, int, int, str, List[str]]:
197
- # """
198
- # Runs the pipeline, returns counts, report, and a list of paths to cropped equation images.
199
- # """
200
- # global GLOBAL_FIGURE_COUNT, GLOBAL_EQUATION_COUNT
201
-
202
- # # Reset globals
203
- # GLOBAL_FIGURE_COUNT = 0
204
- # GLOBAL_EQUATION_COUNT = 0
205
-
206
- # if not os.path.exists(pdf_path):
207
- # report = f"❌ FATAL ERROR: Input PDF not found at {pdf_path}."
208
- # return 0, 0, 0, report, []
209
-
210
- # # Model Loading
211
- # try:
212
- # model = YOLO(WEIGHTS_PATH)
213
- # logging.warning(f"✅ Loaded YOLO model from: {WEIGHTS_PATH}")
214
- # except Exception as e:
215
- # report = f"❌ ERROR loading YOLO model: {e}\n(Ensure 'best.pt' is available and valid.)"
216
- # return 0, 0, 0, report, []
217
-
218
- # # PDF Loading
219
- # try:
220
- # doc = fitz.open(pdf_path)
221
- # total_pages = doc.page_count
222
- # logging.warning(f"✅ Opened PDF with {doc.page_count} pages")
223
- # except Exception as e:
224
- # report = f"❌ ERROR loading PDF file: {e}"
225
- # return 0, 0, 0, report, []
226
-
227
- # mat = fitz.Matrix(SCALE_FACTOR, SCALE_FACTOR)
228
- # all_equation_images = [] # Stores file paths (strings) for Gradio gallery
229
-
230
- # # NEW LOCAL COUNTER: Tracks total equations processed for unique filename creation
231
- # equation_save_count = 0
232
-
233
- # for page_num_0_based in range(doc.page_count):
234
- # fitz_page = doc.load_page(page_num_0_based)
235
- # page_num = page_num_0_based + 1
236
-
237
- # try:
238
- # pix = fitz_page.get_pixmap(matrix=mat)
239
- # original_img = pixmap_to_numpy(pix)
240
- # except Exception as e:
241
- # logging.error(f"Error converting page {page_num} to image: {e}. Skipping.")
242
- # continue
243
-
244
- # # Core Detection, Counting, and Equation Result Collection
245
- # _, _, equation_results_page = run_yolo_detection_and_count(
246
- # original_img, model, page_num
247
- # )
248
-
249
- # # --- Image Cropping and Saving for Debugging ---
250
- # for eq in equation_results_page:
251
- # bbox = eq['bbox_pdf']
252
-
253
- # try:
254
- # # Fixed Rect object creation
255
- # rect = fitz.Rect(bbox)
256
- # clip_rect = rect + (0, 0, 5, 5) # Add small padding
257
-
258
- # # Get the pixmap for the cropped area (high-res render)
259
- # eq_pix = fitz_page.get_pixmap(matrix=fitz.Matrix(3.0, 3.0), clip=clip_rect)
260
-
261
- # # Save to a temporary file path
262
- # img_bytes = eq_pix.tobytes("png")
263
-
264
- # # FIX APPLIED: Increment and use local counter for unique filename
265
- # equation_save_count += 1
266
- # filename = f"eq_{equation_save_count}_p{page_num}.png"
267
-
268
- # output_path = os.path.join(temp_output_dir, filename)
269
-
270
- # with open(output_path, 'wb') as f:
271
- # f.write(img_bytes)
272
-
273
- # all_equation_images.append(output_path)
274
-
275
- # except Exception as e:
276
- # logging.error(f"Error cropping equation on page {page_num} with bbox {bbox}: {e}")
277
-
278
- # doc.close()
279
-
280
- # # Final Report Generation (GLOBAL_EQUATION_COUNT is correct here)
281
- # report = (
282
- # f"✅ **YOLO Counting Complete!**\n\n"
283
- # f"**1) Total Pages Detected in PDF:** **{total_pages}**\n"
284
- # f"**2) Total Equations Detected:** **{GLOBAL_EQUATION_COUNT}**\n"
285
- # f"**3) Total Figures Detected:** **{GLOBAL_FIGURE_COUNT}**"
286
- # )
287
-
288
- # # Return the list of file paths (strings)
289
- # return total_pages, GLOBAL_EQUATION_COUNT, GLOBAL_FIGURE_COUNT, report, all_equation_images
290
-
291
-
292
- # # ============================================================================
293
- # # --- GRADIO INTERFACE FUNCTION ---
294
- # # ============================================================================
295
-
296
- # def gradio_process_pdf(pdf_file) -> Tuple[str, str, str, str, List[str]]:
297
- # """
298
- # Gradio wrapper function to handle file upload, manage temporary directory, and return file paths.
299
- # """
300
- # if pdf_file is None:
301
- # return "N/A", "N/A", "N/A", "Please upload a PDF file.", []
302
-
303
- # pdf_path = pdf_file.name
304
- # temp_output_dir = tempfile.mkdtemp() # Create temp directory
305
-
306
- # try:
307
- # # Run the core logic, passing the temp directory
308
- # num_pages, num_equations, num_figures, report, equation_images = run_single_pdf_preprocessing(
309
- # pdf_path, temp_output_dir
310
- # )
311
-
312
- # # Return results and the list of image file paths
313
- # return str(num_pages), str(num_equations), str(num_figures), report, equation_images
314
-
315
- # except Exception as e:
316
- # error_msg = f"An unexpected error occurred: {e}"
317
- # logging.error(error_msg, exc_info=True)
318
- # # Still clean up in case of a hard error
319
- # shutil.rmtree(temp_output_dir, ignore_errors=True)
320
- # return "Error", "Error", "Error", error_msg, []
321
-
322
- # # NOTE: The final cleanup block for success case is intentionally removed
323
- # # to prevent files from being deleted before Gradio can serve them.
324
-
325
-
326
- # # ============================================================================
327
- # # --- GRADIO INTERFACE DEFINITION ---
328
- # # ============================================================================
329
-
330
- # if __name__ == "__main__":
331
-
332
- # if not os.path.exists(WEIGHTS_PATH):
333
- # logging.error(f"❌ FATAL ERROR: YOLO weight file '{WEIGHTS_PATH}' not found. Cannot run live inference.")
334
-
335
- # input_file = gr.File(label="Upload PDF Document", type="filepath", file_types=[".pdf"])
336
-
337
- # # Outputs
338
- # output_pages = gr.Textbox(label="Total Pages in PDF", interactive=False)
339
- # output_equations = gr.Textbox(label="Total Equations Detected", interactive=False)
340
- # output_figures = gr.Textbox(label="Total Figures Detected", interactive=False)
341
- # output_report = gr.Markdown(label="Processing Summary")
342
-
343
- # # Gradio Gallery expects a list of file paths (strings)
344
- # output_gallery = gr.Gallery(
345
- # label="Detected Equations for Debugging",
346
- # columns=5,
347
- # height="auto",
348
- # object_fit="contain",
349
- # allow_preview=True
350
- # )
351
-
352
- # interface = gr.Interface(
353
- # fn=gradio_process_pdf,
354
- # inputs=input_file,
355
- # outputs=[output_pages, output_equations, output_figures, output_report, output_gallery],
356
- # title="🎯 Minimalist YOLO Counting & Equation Debugger",
357
- # description=(
358
- # "Upload a PDF to run YOLO detection using your **`best.pt`** model. "
359
- # "The counts are displayed, and a gallery of **all detected equation images** is shown for debugging."
360
- # ),
361
- # )
362
-
363
- # print("\nStarting Gradio application...")
364
- # interface.launch(inbrowser=True)
365
-
366
-
367
-
368
 
369
 
370
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
 
3