heerjtdev commited on
Commit
eeaa348
Β·
verified Β·
1 Parent(s): 5c74425

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +436 -255
app.py CHANGED
@@ -1,22 +1,34 @@
1
 
2
-
3
-
4
-
5
  # import gradio as gr
6
  # import json
7
  # import os
8
  # import tempfile
9
  # import img2pdf
 
 
10
  # from img2pdf import Rotation
11
  # from pathlib import Path
12
 
 
 
 
 
 
 
 
13
  # # ==============================
14
  # # PIPELINE IMPORT
15
  # # ==============================
 
 
 
 
16
  # try:
17
  # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
18
- # except ImportError:
19
- # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
 
 
20
  # def run_document_pipeline(*args):
21
  # return {"error": "Placeholder pipeline function called."}
22
  # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
@@ -25,13 +37,11 @@
25
  # def process_file(uploaded_files, layoutlmv3_model_path=None):
26
  # """
27
  # Robust handler for multiple or single file uploads.
 
28
  # """
29
  # if uploaded_files is None:
30
  # return "❌ Error: No files uploaded.", None
31
 
32
- # # --- THE ROBUST FIX ---
33
- # # Gradio sometimes sends a single dict even when set to multiple.
34
- # # We force everything into a list so the rest of the logic doesn't break.
35
  # if not isinstance(uploaded_files, list):
36
  # file_list = [uploaded_files]
37
  # else:
@@ -39,7 +49,6 @@
39
 
40
  # if len(file_list) == 0:
41
  # return "❌ Error: Empty file list.", None
42
- # # ----------------------
43
 
44
  # # 1. Resolve all file paths safely
45
  # resolved_paths = []
@@ -62,17 +71,13 @@
62
  # is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
63
 
64
  # try:
65
- # # If it's multiple files or just one image, wrap it in a PDF
66
  # if len(resolved_paths) > 1 or is_image:
67
  # print(f"πŸ“¦ Converting {len(resolved_paths)} image(s) to a single PDF...")
68
  # temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
69
  # with open(temp_pdf.name, "wb") as f_out:
70
- # # f_out.write(img2pdf.convert(resolved_paths))
71
  # f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
72
-
73
  # processing_path = temp_pdf.name
74
  # else:
75
- # # It's a single PDF
76
  # processing_path = resolved_paths[0]
77
 
78
  # # 3. Standard Pipeline Checks
@@ -84,267 +89,277 @@
84
  # print(f"πŸš€ Starting pipeline for: {processing_path}")
85
  # result = run_document_pipeline(processing_path, final_model_path)
86
 
87
- # if result is None:
88
- # return "❌ Error: Pipeline returned None.", None
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
- # # 5. Prepare output
91
- # temp_output = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='analysis_')
92
- # with open(temp_output.name, 'w', encoding='utf-8') as f:
93
- # json.dump(result, f, indent=2, ensure_ascii=False)
 
 
 
 
 
 
 
94
 
95
- # return json.dumps(result, indent=2, ensure_ascii=False), temp_output.name
96
 
97
  # except Exception as e:
98
  # import traceback
99
  # traceback.print_exc()
100
  # return f"❌ Error: {str(e)}", None
101
 
102
- # # ==============================
103
- # # GRADIO INTERFACE
104
- # # ==============================
105
- # with gr.Blocks(title="Document Analysis Pipeline") as demo:
106
-
107
- # gr.Markdown("# πŸ“„ Document & Image Analysis Pipeline")
108
-
109
- # with gr.Row():
110
- # with gr.Column(scale=1):
111
- # file_input = gr.File(
112
- # label="Upload PDFs or Images",
113
- # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
114
- # file_count="multiple", # Keep this
115
- # type="filepath" # Keep this
116
- # )
117
-
118
- # model_path_input = gr.Textbox(
119
- # label="Model Path",
120
- # value=DEFAULT_LAYOUTLMV3_MODEL_PATH
121
- # )
122
 
123
- # process_btn = gr.Button("πŸš€ Process Files", variant="primary")
124
 
125
- # with gr.Column(scale=2):
126
- # json_output = gr.Code(label="JSON Output", language="json", lines=20)
127
- # download_output = gr.File(label="Download JSON")
128
 
129
- # process_btn.click(
130
- # fn=process_file,
131
- # inputs=[file_input, model_path_input],
132
- # outputs=[json_output, download_output]
133
- # )
134
 
135
- # if __name__ == "__main__":
136
- # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
 
 
 
 
 
139
 
 
 
 
140
 
 
 
 
 
 
 
 
 
141
 
142
- import gradio as gr
143
- import json
144
- import os
145
- import tempfile
146
- import img2pdf
147
- import glob
148
- import shutil
149
- from img2pdf import Rotation
150
- from pathlib import Path
151
 
 
152
 
 
 
 
 
153
 
154
- print("--- DEBUG: Current Working Directory ---")
155
- print(os.getcwd())
156
- print("--- DEBUG: Files in Root ---")
157
- print(os.listdir('.'))
 
158
 
159
- # ==============================
160
- # PIPELINE IMPORT
161
- # ==============================
162
- # try:
163
- # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
164
- # except ImportError:
165
- # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
166
- try:
167
- from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
168
- except Exception as e: # Catch ALL exceptions
169
- print(f"Warning: Failed to import pipeline: {e}")
170
- import traceback
171
- traceback.print_exc() # Show the actual error
172
- def run_document_pipeline(*args):
173
- return {"error": "Placeholder pipeline function called."}
174
- DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
175
- WEIGHTS_PATH = "./weights/yolo_weights.pt"
176
 
177
- def process_file(uploaded_files, layoutlmv3_model_path=None):
178
- """
179
- Robust handler for multiple or single file uploads.
180
- Returns the final JSON and a LIST of all intermediate JSON files (OCR, Predictions, BIO).
181
- """
182
- if uploaded_files is None:
183
- return "❌ Error: No files uploaded.", None
184
 
185
- if not isinstance(uploaded_files, list):
186
- file_list = [uploaded_files]
187
- else:
188
- file_list = uploaded_files
189
 
190
- if len(file_list) == 0:
191
- return "❌ Error: Empty file list.", None
192
 
193
- # 1. Resolve all file paths safely
194
- resolved_paths = []
195
- for f in file_list:
196
- try:
197
- if isinstance(f, dict) and "path" in f:
198
- resolved_paths.append(f["path"])
199
- elif hasattr(f, 'path'):
200
- resolved_paths.append(f.path)
201
- else:
202
- resolved_paths.append(str(f))
203
- except Exception as e:
204
- print(f"Error resolving path for {f}: {e}")
205
 
206
- if not resolved_paths:
207
- return "❌ Error: Could not resolve file paths.", None
208
 
209
- # 2. Determine if we should merge into a single PDF
210
- first_file = Path(resolved_paths[0])
211
- is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
 
 
 
 
212
 
213
- try:
214
- if len(resolved_paths) > 1 or is_image:
215
- print(f"πŸ“¦ Converting {len(resolved_paths)} image(s) to a single PDF...")
216
- temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
217
- with open(temp_pdf.name, "wb") as f_out:
218
- f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
219
- processing_path = temp_pdf.name
220
- else:
221
- processing_path = resolved_paths[0]
222
-
223
- # 3. Standard Pipeline Checks
224
- final_model_path = layoutlmv3_model_path or DEFAULT_LAYOUTLMV3_MODEL_PATH
225
- if not os.path.exists(final_model_path):
226
- return f"❌ Error: Model not found at {final_model_path}", None
227
 
228
- # 4. Call the pipeline
229
- print(f"πŸš€ Starting pipeline for: {processing_path}")
230
- result = run_document_pipeline(processing_path, final_model_path)
231
-
232
- # 5. SCRAPE FOR INTERMEDIATE FILES
233
- # We look for all .json files in /tmp/ created during this run
234
- base_name = Path(processing_path).stem
235
- # This matches common patterns like /tmp/pipeline_run_... or filenames in /tmp/
236
- search_patterns = [
237
- f"/tmp/pipeline_run_{base_name}*/*.json",
238
- f"/tmp/*{base_name}*.json"
239
- ]
240
 
241
- all_intermediate_jsons = []
242
- for pattern in search_patterns:
243
- all_intermediate_jsons.extend(glob.glob(pattern))
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- # Remove duplicates while preserving order
246
- all_intermediate_jsons = list(dict.fromkeys(all_intermediate_jsons))
247
-
248
- # 6. Prepare Final Output for Display
249
- if result is None or (isinstance(result, list) and len(result) == 0):
250
- display_text = "⚠️ Pipeline failed at Step 3 (BIO Decoding).\nDownload the intermediate JSONs below to inspect OCR and Model Predictions."
251
- else:
252
- display_text = json.dumps(result, indent=2, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
 
254
- # If the final result succeeded, save it to a temp file so it can be downloaded too
255
- temp_final = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='final_result_')
256
- json.dump(result, temp_final, indent=2, ensure_ascii=False)
257
- temp_final.close()
258
- all_intermediate_jsons.append(temp_final.name)
259
-
260
- return display_text, all_intermediate_jsons
261
-
262
- except Exception as e:
263
- import traceback
264
- traceback.print_exc()
265
- return f"❌ Error: {str(e)}", None
266
-
267
-
268
-
269
-
270
-
271
- # def visualize_detections(uploaded_files):
272
- # """Shows the first uploaded image with YOLO bounding boxes"""
273
- # if not uploaded_files:
274
- # return None
275
-
276
- # # Get first file path
277
- # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
278
- # if isinstance(file_path, dict):
279
- # file_path = file_path["path"]
280
-
281
- # import cv2
282
- # from ultralytics import YOLO
283
-
284
- # # Load image
285
- # img = cv2.imread(str(file_path))
286
- # if img is None:
287
- # return None
288
-
289
- # # Run YOLO
290
- # model = YOLO(WEIGHTS_PATH)
291
- # results = model.predict(source=img, conf=0.2, imgsz=640, verbose=False)
292
-
293
- # # Draw boxes
294
- # for box in results[0].boxes:
295
- # class_id = int(box.cls[0])
296
- # class_name = model.names[class_id]
297
- # if class_name in ['figure', 'equation']:
298
- # x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
299
- # color = (0, 255, 0) if class_name == 'figure' else (255, 0, 0)
300
- # cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
301
- # cv2.putText(img, f"{class_name} {box.conf[0]:.2f}",
302
- # (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
303
-
304
- # # Save and return
305
- # temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
306
- # cv2.imwrite(temp_path, img)
307
- # return temp_path
308
-
309
 
310
- # # ==============================
311
- # # GRADIO INTERFACE
312
- # # ==============================
313
- # with gr.Blocks(title="Document Analysis Pipeline") as demo:
314
-
315
- # gr.Markdown("# πŸ“„ Full Pipeline Analysis")
316
- # gr.Markdown("### πŸ” Intermediate File Recovery Active")
317
- # gr.Markdown("The **Download** box will contain: \n1. OCR JSON (Step 1)\n2. Raw LayoutLMv3 Prediction JSON (Step 2)\n3. Final BIO JSON (Step 3)")
318
-
319
- # with gr.Row():
320
- # with gr.Column(scale=1):
321
- # file_input = gr.File(
322
- # label="Upload PDFs or Images",
323
- # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
324
- # file_count="multiple",
325
- # type="filepath"
326
- # )
327
 
328
- # model_path_input = gr.Textbox(
329
- # label="Model Path",
330
- # value=DEFAULT_LAYOUTLMV3_MODEL_PATH
331
- # )
 
 
332
 
333
- # process_btn = gr.Button("πŸš€ Run Pipeline", variant="primary")
 
 
 
 
 
334
 
335
- # with gr.Column(scale=2):
336
- # json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
337
- # # IMPORTANT: file_count="multiple" allows returning the list of all stage files
338
- # download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
339
 
340
- # process_btn.click(
341
- # fn=process_file,
342
- # inputs=[file_input, model_path_input],
343
- # outputs=[json_output, download_output]
344
- # )
345
 
346
- # if __name__ == "__main__":
347
- # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
348
 
349
 
350
 
@@ -354,12 +369,18 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
354
  # # ==============================
355
  # # VISUAL DEBUG FUNCTION
356
  # # ==============================
357
- # def visualize_detections(uploaded_files):
358
- # """Shows the first uploaded image with YOLO bounding boxes"""
359
  # if not uploaded_files:
360
  # return None
361
 
362
  # try:
 
 
 
 
 
 
363
  # # Get first file path
364
  # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
365
  # if isinstance(file_path, dict):
@@ -367,16 +388,11 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
367
  # elif hasattr(file_path, 'path'):
368
  # file_path = file_path.path
369
 
370
- # import cv2
371
- # import numpy as np
372
-
373
- # from ultralytics import YOLO
374
- # import fitz
375
-
376
  # # Handle PDF conversion to image
377
  # if str(file_path).lower().endswith('.pdf'):
378
  # doc = fitz.open(file_path)
379
- # page_idx = int(page_num) - 1
 
380
  # page = doc.load_page(page_idx)
381
 
382
  # pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -418,8 +434,8 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
418
  # detection_count[class_name] += 1
419
 
420
  # # Add summary text at top
421
- # summary = f"Detected: {detection_count['figure']} Figures (GREEN), {detection_count['equation']} Equations (RED)"
422
- # cv2.rectangle(img, (10, 10), (10 + len(summary) * 10, 40), (0, 0, 0), -1)
423
  # cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
424
 
425
  # # Save to temp file
@@ -433,7 +449,6 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
433
  # traceback.print_exc()
434
  # return None
435
 
436
-
437
  # # ==============================
438
  # # GRADIO INTERFACE
439
  # # ==============================
@@ -467,7 +482,7 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
467
  # )
468
 
469
  # # Debug button for visual inspection
470
- # debug_btn = gr.Button("πŸ” Show YOLO Detections (First Page)", variant="secondary")
471
 
472
  # # Main processing button
473
  # process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
@@ -496,8 +511,6 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
496
  # outputs=[json_output, download_output]
497
  # )
498
 
499
-
500
-
501
  # if __name__ == "__main__":
502
  # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
503
 
@@ -506,6 +519,172 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
506
 
507
 
508
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
509
  # ==============================
510
  # VISUAL DEBUG FUNCTION
511
  # ==============================
@@ -631,8 +810,8 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
631
  # Visual debug output
632
  detection_preview = gr.Image(label="YOLO Detection Preview (Green=Figure, Red=Equation)", type="filepath")
633
 
634
- # Final JSON output
635
- json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
636
 
637
  # Download all intermediate files
638
  download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
@@ -648,9 +827,11 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
648
  process_btn.click(
649
  fn=process_file,
650
  inputs=[file_input, model_path_input],
651
- outputs=[json_output, download_output]
 
652
  )
653
 
654
  if __name__ == "__main__":
655
- demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
656
 
 
1
 
 
 
 
2
  # import gradio as gr
3
  # import json
4
  # import os
5
  # import tempfile
6
  # import img2pdf
7
+ # import glob
8
+ # import shutil
9
  # from img2pdf import Rotation
10
  # from pathlib import Path
11
 
12
+
13
+
14
+ # print("--- DEBUG: Current Working Directory ---")
15
+ # print(os.getcwd())
16
+ # print("--- DEBUG: Files in Root ---")
17
+ # print(os.listdir('.'))
18
+
19
  # # ==============================
20
  # # PIPELINE IMPORT
21
  # # ==============================
22
+ # # try:
23
+ # # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
24
+ # # except ImportError:
25
+ # # print("Warning: 'working_yolo_pipeline.py' not found. Using dummy paths.")
26
  # try:
27
  # from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
28
+ # except Exception as e: # Catch ALL exceptions
29
+ # print(f"Warning: Failed to import pipeline: {e}")
30
+ # import traceback
31
+ # traceback.print_exc() # Show the actual error
32
  # def run_document_pipeline(*args):
33
  # return {"error": "Placeholder pipeline function called."}
34
  # DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
 
37
  # def process_file(uploaded_files, layoutlmv3_model_path=None):
38
  # """
39
  # Robust handler for multiple or single file uploads.
40
+ # Returns the final JSON and a LIST of all intermediate JSON files (OCR, Predictions, BIO).
41
  # """
42
  # if uploaded_files is None:
43
  # return "❌ Error: No files uploaded.", None
44
 
 
 
 
45
  # if not isinstance(uploaded_files, list):
46
  # file_list = [uploaded_files]
47
  # else:
 
49
 
50
  # if len(file_list) == 0:
51
  # return "❌ Error: Empty file list.", None
 
52
 
53
  # # 1. Resolve all file paths safely
54
  # resolved_paths = []
 
71
  # is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
72
 
73
  # try:
 
74
  # if len(resolved_paths) > 1 or is_image:
75
  # print(f"πŸ“¦ Converting {len(resolved_paths)} image(s) to a single PDF...")
76
  # temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
77
  # with open(temp_pdf.name, "wb") as f_out:
 
78
  # f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
 
79
  # processing_path = temp_pdf.name
80
  # else:
 
81
  # processing_path = resolved_paths[0]
82
 
83
  # # 3. Standard Pipeline Checks
 
89
  # print(f"πŸš€ Starting pipeline for: {processing_path}")
90
  # result = run_document_pipeline(processing_path, final_model_path)
91
 
92
+ # # 5. SCRAPE FOR INTERMEDIATE FILES
93
+ # # We look for all .json files in /tmp/ created during this run
94
+ # base_name = Path(processing_path).stem
95
+ # # This matches common patterns like /tmp/pipeline_run_... or filenames in /tmp/
96
+ # search_patterns = [
97
+ # f"/tmp/pipeline_run_{base_name}*/*.json",
98
+ # f"/tmp/*{base_name}*.json"
99
+ # ]
100
+
101
+ # all_intermediate_jsons = []
102
+ # for pattern in search_patterns:
103
+ # all_intermediate_jsons.extend(glob.glob(pattern))
104
+
105
+ # # Remove duplicates while preserving order
106
+ # all_intermediate_jsons = list(dict.fromkeys(all_intermediate_jsons))
107
 
108
+ # # 6. Prepare Final Output for Display
109
+ # if result is None or (isinstance(result, list) and len(result) == 0):
110
+ # display_text = "⚠️ Pipeline failed at Step 3 (BIO Decoding).\nDownload the intermediate JSONs below to inspect OCR and Model Predictions."
111
+ # else:
112
+ # display_text = json.dumps(result, indent=2, ensure_ascii=False)
113
+
114
+ # # If the final result succeeded, save it to a temp file so it can be downloaded too
115
+ # temp_final = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='final_result_')
116
+ # json.dump(result, temp_final, indent=2, ensure_ascii=False)
117
+ # temp_final.close()
118
+ # all_intermediate_jsons.append(temp_final.name)
119
 
120
+ # return display_text, all_intermediate_jsons
121
 
122
  # except Exception as e:
123
  # import traceback
124
  # traceback.print_exc()
125
  # return f"❌ Error: {str(e)}", None
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
 
128
 
 
 
 
129
 
 
 
 
 
 
130
 
131
+ # # def visualize_detections(uploaded_files):
132
+ # # """Shows the first uploaded image with YOLO bounding boxes"""
133
+ # # if not uploaded_files:
134
+ # # return None
135
+
136
+ # # # Get first file path
137
+ # # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
138
+ # # if isinstance(file_path, dict):
139
+ # # file_path = file_path["path"]
140
+
141
+ # # import cv2
142
+ # # from ultralytics import YOLO
143
+
144
+ # # # Load image
145
+ # # img = cv2.imread(str(file_path))
146
+ # # if img is None:
147
+ # # return None
148
+
149
+ # # # Run YOLO
150
+ # # model = YOLO(WEIGHTS_PATH)
151
+ # # results = model.predict(source=img, conf=0.2, imgsz=640, verbose=False)
152
+
153
+ # # # Draw boxes
154
+ # # for box in results[0].boxes:
155
+ # # class_id = int(box.cls[0])
156
+ # # class_name = model.names[class_id]
157
+ # # if class_name in ['figure', 'equation']:
158
+ # # x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
159
+ # # color = (0, 255, 0) if class_name == 'figure' else (255, 0, 0)
160
+ # # cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
161
+ # # cv2.putText(img, f"{class_name} {box.conf[0]:.2f}",
162
+ # # (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
163
+
164
+ # # # Save and return
165
+ # # temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
166
+ # # cv2.imwrite(temp_path, img)
167
+ # # return temp_path
168
 
169
 
170
+ # # # ==============================
171
+ # # # GRADIO INTERFACE
172
+ # # # ==============================
173
+ # # with gr.Blocks(title="Document Analysis Pipeline") as demo:
174
 
175
+ # # gr.Markdown("# πŸ“„ Full Pipeline Analysis")
176
+ # # gr.Markdown("### πŸ” Intermediate File Recovery Active")
177
+ # # gr.Markdown("The **Download** box will contain: \n1. OCR JSON (Step 1)\n2. Raw LayoutLMv3 Prediction JSON (Step 2)\n3. Final BIO JSON (Step 3)")
178
 
179
+ # # with gr.Row():
180
+ # # with gr.Column(scale=1):
181
+ # # file_input = gr.File(
182
+ # # label="Upload PDFs or Images",
183
+ # # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
184
+ # # file_count="multiple",
185
+ # # type="filepath"
186
+ # # )
187
 
188
+ # # model_path_input = gr.Textbox(
189
+ # # label="Model Path",
190
+ # # value=DEFAULT_LAYOUTLMV3_MODEL_PATH
191
+ # # )
 
 
 
 
 
192
 
193
+ # # process_btn = gr.Button("πŸš€ Run Pipeline", variant="primary")
194
 
195
+ # # with gr.Column(scale=2):
196
+ # # json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
197
+ # # # IMPORTANT: file_count="multiple" allows returning the list of all stage files
198
+ # # download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
199
 
200
+ # # process_btn.click(
201
+ # # fn=process_file,
202
+ # # inputs=[file_input, model_path_input],
203
+ # # outputs=[json_output, download_output]
204
+ # # )
205
 
206
+ # # if __name__ == "__main__":
207
+ # # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
 
 
 
 
 
 
 
209
 
 
 
 
 
210
 
 
 
211
 
 
 
 
 
 
 
 
 
 
 
 
 
212
 
 
 
213
 
214
+ # # # ==============================
215
+ # # # VISUAL DEBUG FUNCTION
216
+ # # # ==============================
217
+ # # def visualize_detections(uploaded_files):
218
+ # # """Shows the first uploaded image with YOLO bounding boxes"""
219
+ # # if not uploaded_files:
220
+ # # return None
221
 
222
+ # # try:
223
+ # # # Get first file path
224
+ # # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
225
+ # # if isinstance(file_path, dict):
226
+ # # file_path = file_path["path"]
227
+ # # elif hasattr(file_path, 'path'):
228
+ # # file_path = file_path.path
229
+
230
+ # # import cv2
231
+ # # import numpy as np
 
 
 
 
232
 
233
+ # # from ultralytics import YOLO
234
+ # # import fitz
 
 
 
 
 
 
 
 
 
 
235
 
236
+ # # # Handle PDF conversion to image
237
+ # # if str(file_path).lower().endswith('.pdf'):
238
+ # # doc = fitz.open(file_path)
239
+ # # page_idx = int(page_num) - 1
240
+ # # page = doc.load_page(page_idx)
241
+
242
+ # # pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
243
+ # # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
244
+ # # if pix.n == 3:
245
+ # # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
246
+ # # elif pix.n == 4:
247
+ # # img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
248
+ # # doc.close()
249
+ # # else:
250
+ # # img = cv2.imread(str(file_path))
251
 
252
+ # # if img is None:
253
+ # # return None
254
+
255
+ # # # Run YOLO detection
256
+ # # model = YOLO(WEIGHTS_PATH)
257
+ # # results = model.predict(source=img, conf=0.2, imgsz=640, verbose=False)
258
+
259
+ # # # Draw bounding boxes
260
+ # # detection_count = {'figure': 0, 'equation': 0}
261
+ # # for box in results[0].boxes:
262
+ # # class_id = int(box.cls[0])
263
+ # # class_name = model.names[class_id]
264
+ # # if class_name in ['figure', 'equation']:
265
+ # # x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
266
+ # # conf = float(box.conf[0])
267
+
268
+ # # # Green for figures, Red for equations
269
+ # # color = (0, 255, 0) if class_name == 'figure' else (0, 0, 255)
270
+ # # cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
271
+
272
+ # # # Add label with confidence
273
+ # # label = f"{class_name.upper()} {conf:.2f}"
274
+ # # (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
275
+ # # cv2.rectangle(img, (x1, y1 - text_height - 10), (x1 + text_width, y1), color, -1)
276
+ # # cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
277
+
278
+ # # detection_count[class_name] += 1
279
+
280
+ # # # Add summary text at top
281
+ # # summary = f"Detected: {detection_count['figure']} Figures (GREEN), {detection_count['equation']} Equations (RED)"
282
+ # # cv2.rectangle(img, (10, 10), (10 + len(summary) * 10, 40), (0, 0, 0), -1)
283
+ # # cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
284
+
285
+ # # # Save to temp file
286
+ # # temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
287
+ # # cv2.imwrite(temp_path, img)
288
+ # # return temp_path
289
+
290
+ # # except Exception as e:
291
+ # # print(f"Error in visualize_detections: {e}")
292
+ # # import traceback
293
+ # # traceback.print_exc()
294
+ # # return None
295
+
296
+
297
+ # # # ==============================
298
+ # # # GRADIO INTERFACE
299
+ # # # ==============================
300
+ # # with gr.Blocks(title="Document Analysis Pipeline") as demo:
301
+
302
+ # # gr.Markdown("# πŸ“„ Full Pipeline Analysis")
303
+ # # gr.Markdown("### πŸ” Intermediate File Recovery Active")
304
+ # # gr.Markdown("The **Download** box will contain: \n1. OCR JSON (Step 1)\n2. Raw LayoutLMv3 Prediction JSON (Step 2)\n3. Final BIO JSON (Step 3)")
305
+
306
+ # # with gr.Row():
307
+ # # with gr.Column(scale=1):
308
+ # # file_input = gr.File(
309
+ # # label="Upload PDFs or Images",
310
+ # # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
311
+ # # file_count="multiple",
312
+ # # type="filepath"
313
+ # # )
314
+
315
+ # # page_selector = gr.Slider(
316
+ # # minimum=1,
317
+ # # maximum=100,
318
+ # # value=1,
319
+ # # step=1,
320
+ # # label="PDF Page Number (for preview)",
321
+ # # visible=True
322
+ # # )
323
+
324
+ # # model_path_input = gr.Textbox(
325
+ # # label="Model Path",
326
+ # # value=DEFAULT_LAYOUTLMV3_MODEL_PATH
327
+ # # )
328
+
329
+ # # # Debug button for visual inspection
330
+ # # debug_btn = gr.Button("πŸ” Show YOLO Detections (First Page)", variant="secondary")
331
 
332
+ # # # Main processing button
333
+ # # process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
 
335
+ # # with gr.Column(scale=2):
336
+ # # # Visual debug output
337
+ # # detection_preview = gr.Image(label="YOLO Detection Preview (Green=Figure, Red=Equation)", type="filepath")
338
+
339
+ # # # Final JSON output
340
+ # # json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
341
+
342
+ # # # Download all intermediate files
343
+ # # download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
 
 
 
 
 
 
 
 
344
 
345
+ # # # Wire up the debug button
346
+ # # debug_btn.click(
347
+ # # fn=visualize_detections,
348
+ # # inputs=[file_input, page_selector],
349
+ # # outputs=[detection_preview]
350
+ # # )
351
 
352
+ # # # Wire up the main processing button
353
+ # # process_btn.click(
354
+ # # fn=process_file,
355
+ # # inputs=[file_input, model_path_input],
356
+ # # outputs=[json_output, download_output]
357
+ # # )
358
 
 
 
 
 
359
 
 
 
 
 
 
360
 
361
+ # # if __name__ == "__main__":
362
+ # # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
363
 
364
 
365
 
 
369
  # # ==============================
370
  # # VISUAL DEBUG FUNCTION
371
  # # ==============================
372
+ # def visualize_detections(uploaded_files, page_num):
373
+ # """Shows the selected PDF page or image with YOLO bounding boxes"""
374
  # if not uploaded_files:
375
  # return None
376
 
377
  # try:
378
+ # import cv2
379
+ # import numpy as np
380
+ # import tempfile
381
+ # from ultralytics import YOLO
382
+ # import fitz
383
+
384
  # # Get first file path
385
  # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
386
  # if isinstance(file_path, dict):
 
388
  # elif hasattr(file_path, 'path'):
389
  # file_path = file_path.path
390
 
 
 
 
 
 
 
391
  # # Handle PDF conversion to image
392
  # if str(file_path).lower().endswith('.pdf'):
393
  # doc = fitz.open(file_path)
394
+ # # Ensure the selected page exists in the document
395
+ # page_idx = min(max(int(page_num) - 1, 0), len(doc) - 1)
396
  # page = doc.load_page(page_idx)
397
 
398
  # pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
 
434
  # detection_count[class_name] += 1
435
 
436
  # # Add summary text at top
437
+ # summary = f"Page {page_num} | Detected: {detection_count['figure']} Figures, {detection_count['equation']} Equations"
438
+ # cv2.rectangle(img, (10, 10), (10 + len(summary) * 11, 40), (0, 0, 0), -1)
439
  # cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
440
 
441
  # # Save to temp file
 
449
  # traceback.print_exc()
450
  # return None
451
 
 
452
  # # ==============================
453
  # # GRADIO INTERFACE
454
  # # ==============================
 
482
  # )
483
 
484
  # # Debug button for visual inspection
485
+ # debug_btn = gr.Button("πŸ” Show YOLO Detections", variant="secondary")
486
 
487
  # # Main processing button
488
  # process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
 
511
  # outputs=[json_output, download_output]
512
  # )
513
 
 
 
514
  # if __name__ == "__main__":
515
  # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
516
 
 
519
 
520
 
521
 
522
+
523
+
524
+
525
+
526
+
527
+
528
+
529
+
530
+
531
+
532
+
533
+
534
+
535
+
536
+
537
+
538
+
539
+
540
+
541
+
542
+
543
+
544
+
545
+
546
+
547
+
548
+ import gradio as gr
549
+ import json
550
+ import os
551
+ import tempfile
552
+ import img2pdf
553
+ import glob
554
+ import shutil
555
+ from img2pdf import Rotation
556
+ from pathlib import Path
557
+
558
+ print("--- DEBUG: Current Working Directory ---")
559
+ print(os.getcwd())
560
+ print("--- DEBUG: Files in Root ---")
561
+ print(os.listdir('.'))
562
+
563
+ # ==============================
564
+ # PIPELINE IMPORT
565
+ # ==============================
566
+ try:
567
+ from working_yolo_pipeline import run_document_pipeline, DEFAULT_LAYOUTLMV3_MODEL_PATH, WEIGHTS_PATH
568
+ except Exception as e: # Catch ALL exceptions
569
+ print(f"Warning: Failed to import pipeline: {e}")
570
+ import traceback
571
+ traceback.print_exc() # Show the actual error
572
+ def run_document_pipeline(*args):
573
+ yield {"status": "error", "message": "Placeholder pipeline function called."}
574
+ DEFAULT_LAYOUTLMV3_MODEL_PATH = "./models/layoutlmv3_model"
575
+ WEIGHTS_PATH = "./weights/yolo_weights.pt"
576
+
577
+
578
+ # ==============================
579
+ # MAIN PROCESSING GENERATOR
580
+ # ==============================
581
+ def process_file(uploaded_files, layoutlmv3_model_path=None):
582
+ """
583
+ Robust handler for multiple or single file uploads.
584
+ Streams the estimation first, then yields the final JSON and intermediate files.
585
+ """
586
+ if uploaded_files is None:
587
+ yield "❌ Error: No files uploaded.", None
588
+ return
589
+
590
+ if not isinstance(uploaded_files, list):
591
+ file_list = [uploaded_files]
592
+ else:
593
+ file_list = uploaded_files
594
+
595
+ if len(file_list) == 0:
596
+ yield "❌ Error: Empty file list.", None
597
+ return
598
+
599
+ # 1. Resolve all file paths safely
600
+ resolved_paths = []
601
+ for f in file_list:
602
+ try:
603
+ if isinstance(f, dict) and "path" in f:
604
+ resolved_paths.append(f["path"])
605
+ elif hasattr(f, 'path'):
606
+ resolved_paths.append(f.path)
607
+ else:
608
+ resolved_paths.append(str(f))
609
+ except Exception as e:
610
+ print(f"Error resolving path for {f}: {e}")
611
+
612
+ if not resolved_paths:
613
+ yield "❌ Error: Could not resolve file paths.", None
614
+ return
615
+
616
+ # 2. Determine if we should merge into a single PDF
617
+ first_file = Path(resolved_paths[0])
618
+ is_image = first_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.tiff']
619
+
620
+ try:
621
+ if len(resolved_paths) > 1 or is_image:
622
+ print(f"πŸ“¦ Converting {len(resolved_paths)} image(s) to a single PDF...")
623
+ temp_pdf = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
624
+ with open(temp_pdf.name, "wb") as f_out:
625
+ f_out.write(img2pdf.convert(resolved_paths, rotation=Rotation.ifvalid))
626
+ processing_path = temp_pdf.name
627
+ else:
628
+ processing_path = resolved_paths[0]
629
+
630
+ # 3. Standard Pipeline Checks
631
+ final_model_path = layoutlmv3_model_path or DEFAULT_LAYOUTLMV3_MODEL_PATH
632
+ if not os.path.exists(final_model_path):
633
+ yield f"❌ Error: Model not found at {final_model_path}", None
634
+ return
635
+
636
+ # 4. Call the pipeline generator
637
+ print(f"πŸš€ Starting pipeline for: {processing_path}")
638
+
639
+ # Iterate through the yields from run_document_pipeline
640
+ for pipeline_update in run_document_pipeline(processing_path, final_model_path):
641
+
642
+ # --- Handle Estimation Yield ---
643
+ if pipeline_update.get("status") == "estimating":
644
+ display_text = "⏱️ ESTIMATING PROCESSING TIME...\n\n" + json.dumps(pipeline_update, indent=2)
645
+ yield display_text, None
646
+
647
+ # --- Handle Final Complete Yield ---
648
+ elif pipeline_update.get("status") == "complete":
649
+ final_result = pipeline_update.get("result")
650
+
651
+ # SCRAPE FOR INTERMEDIATE FILES
652
+ base_name = Path(processing_path).stem
653
+ search_patterns = [
654
+ f"/tmp/pipeline_run_{base_name}*/*.json",
655
+ f"/tmp/*{base_name}*.json"
656
+ ]
657
+
658
+ all_intermediate_jsons = []
659
+ for pattern in search_patterns:
660
+ all_intermediate_jsons.extend(glob.glob(pattern))
661
+
662
+ all_intermediate_jsons = list(dict.fromkeys(all_intermediate_jsons))
663
+
664
+ # Prepare Final Output for Display
665
+ if final_result is None or (isinstance(final_result, list) and len(final_result) == 0):
666
+ display_text = "⚠️ Pipeline failed at Step 3 (BIO Decoding).\nDownload the intermediate JSONs below to inspect OCR and Model Predictions."
667
+ else:
668
+ display_text = json.dumps(final_result, indent=2, ensure_ascii=False)
669
+
670
+ # Save it to a temp file so it can be downloaded too
671
+ temp_final = tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json', prefix='final_result_')
672
+ json.dump(final_result, temp_final, indent=2, ensure_ascii=False)
673
+ temp_final.close()
674
+ all_intermediate_jsons.append(temp_final.name)
675
+
676
+ yield display_text, all_intermediate_jsons
677
+
678
+ # --- Handle Error Yield ---
679
+ elif pipeline_update.get("status") == "error":
680
+ yield f"❌ Error: {pipeline_update.get('message')}", None
681
+
682
+ except Exception as e:
683
+ import traceback
684
+ traceback.print_exc()
685
+ yield f"❌ Error: {str(e)}", None
686
+
687
+
688
  # ==============================
689
  # VISUAL DEBUG FUNCTION
690
  # ==============================
 
810
  # Visual debug output
811
  detection_preview = gr.Image(label="YOLO Detection Preview (Green=Figure, Red=Equation)", type="filepath")
812
 
813
+ # Final JSON output (Will update with estimation, then final result)
814
+ json_output = gr.Code(label="Pipeline Output", language="json", lines=20)
815
 
816
  # Download all intermediate files
817
  download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
 
827
  process_btn.click(
828
  fn=process_file,
829
  inputs=[file_input, model_path_input],
830
+ outputs=[json_output, download_output],
831
+ api_name="process" # This enables the streaming endpoint /api/process
832
  )
833
 
834
  if __name__ == "__main__":
835
+ # IMPORTANT: .queue() is required for streaming generators to work!
836
+ demo.queue().launch(server_name="0.0.0.0", server_port=7860, show_error=True)
837