Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -351,15 +351,176 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
|
|
| 351 |
|
| 352 |
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
# ==============================
|
| 355 |
# VISUAL DEBUG FUNCTION
|
| 356 |
# ==============================
|
| 357 |
-
def visualize_detections(uploaded_files):
|
| 358 |
-
"""Shows the
|
| 359 |
if not uploaded_files:
|
| 360 |
return None
|
| 361 |
|
| 362 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 363 |
# Get first file path
|
| 364 |
file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
|
| 365 |
if isinstance(file_path, dict):
|
|
@@ -367,16 +528,11 @@ def visualize_detections(uploaded_files):
|
|
| 367 |
elif hasattr(file_path, 'path'):
|
| 368 |
file_path = file_path.path
|
| 369 |
|
| 370 |
-
import cv2
|
| 371 |
-
import numpy as np
|
| 372 |
-
|
| 373 |
-
from ultralytics import YOLO
|
| 374 |
-
import fitz
|
| 375 |
-
|
| 376 |
# Handle PDF conversion to image
|
| 377 |
if str(file_path).lower().endswith('.pdf'):
|
| 378 |
doc = fitz.open(file_path)
|
| 379 |
-
|
|
|
|
| 380 |
page = doc.load_page(page_idx)
|
| 381 |
|
| 382 |
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
@@ -418,8 +574,8 @@ def visualize_detections(uploaded_files):
|
|
| 418 |
detection_count[class_name] += 1
|
| 419 |
|
| 420 |
# Add summary text at top
|
| 421 |
-
summary = f"Detected: {detection_count['figure']} Figures
|
| 422 |
-
cv2.rectangle(img, (10, 10), (10 + len(summary) *
|
| 423 |
cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
| 424 |
|
| 425 |
# Save to temp file
|
|
@@ -433,7 +589,6 @@ def visualize_detections(uploaded_files):
|
|
| 433 |
traceback.print_exc()
|
| 434 |
return None
|
| 435 |
|
| 436 |
-
|
| 437 |
# ==============================
|
| 438 |
# GRADIO INTERFACE
|
| 439 |
# ==============================
|
|
@@ -467,7 +622,7 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
|
|
| 467 |
)
|
| 468 |
|
| 469 |
# Debug button for visual inspection
|
| 470 |
-
debug_btn = gr.Button("π Show YOLO Detections
|
| 471 |
|
| 472 |
# Main processing button
|
| 473 |
process_btn = gr.Button("π Run Full Pipeline", variant="primary")
|
|
@@ -496,8 +651,6 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
|
|
| 496 |
outputs=[json_output, download_output]
|
| 497 |
)
|
| 498 |
|
| 499 |
-
|
| 500 |
-
|
| 501 |
if __name__ == "__main__":
|
| 502 |
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|
| 503 |
|
|
|
|
| 351 |
|
| 352 |
|
| 353 |
|
| 354 |
+
# # ==============================
|
| 355 |
+
# # VISUAL DEBUG FUNCTION
|
| 356 |
+
# # ==============================
|
| 357 |
+
# def visualize_detections(uploaded_files):
|
| 358 |
+
# """Shows the first uploaded image with YOLO bounding boxes"""
|
| 359 |
+
# if not uploaded_files:
|
| 360 |
+
# return None
|
| 361 |
+
|
| 362 |
+
# try:
|
| 363 |
+
# # Get first file path
|
| 364 |
+
# file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
|
| 365 |
+
# if isinstance(file_path, dict):
|
| 366 |
+
# file_path = file_path["path"]
|
| 367 |
+
# elif hasattr(file_path, 'path'):
|
| 368 |
+
# file_path = file_path.path
|
| 369 |
+
|
| 370 |
+
# import cv2
|
| 371 |
+
# import numpy as np
|
| 372 |
+
|
| 373 |
+
# from ultralytics import YOLO
|
| 374 |
+
# import fitz
|
| 375 |
+
|
| 376 |
+
# # Handle PDF conversion to image
|
| 377 |
+
# if str(file_path).lower().endswith('.pdf'):
|
| 378 |
+
# doc = fitz.open(file_path)
|
| 379 |
+
# page_idx = int(page_num) - 1
|
| 380 |
+
# page = doc.load_page(page_idx)
|
| 381 |
+
|
| 382 |
+
# pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
| 383 |
+
# img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
|
| 384 |
+
# if pix.n == 3:
|
| 385 |
+
# img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 386 |
+
# elif pix.n == 4:
|
| 387 |
+
# img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
|
| 388 |
+
# doc.close()
|
| 389 |
+
# else:
|
| 390 |
+
# img = cv2.imread(str(file_path))
|
| 391 |
+
|
| 392 |
+
# if img is None:
|
| 393 |
+
# return None
|
| 394 |
+
|
| 395 |
+
# # Run YOLO detection
|
| 396 |
+
# model = YOLO(WEIGHTS_PATH)
|
| 397 |
+
# results = model.predict(source=img, conf=0.2, imgsz=640, verbose=False)
|
| 398 |
+
|
| 399 |
+
# # Draw bounding boxes
|
| 400 |
+
# detection_count = {'figure': 0, 'equation': 0}
|
| 401 |
+
# for box in results[0].boxes:
|
| 402 |
+
# class_id = int(box.cls[0])
|
| 403 |
+
# class_name = model.names[class_id]
|
| 404 |
+
# if class_name in ['figure', 'equation']:
|
| 405 |
+
# x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
|
| 406 |
+
# conf = float(box.conf[0])
|
| 407 |
+
|
| 408 |
+
# # Green for figures, Red for equations
|
| 409 |
+
# color = (0, 255, 0) if class_name == 'figure' else (0, 0, 255)
|
| 410 |
+
# cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
|
| 411 |
+
|
| 412 |
+
# # Add label with confidence
|
| 413 |
+
# label = f"{class_name.upper()} {conf:.2f}"
|
| 414 |
+
# (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
|
| 415 |
+
# cv2.rectangle(img, (x1, y1 - text_height - 10), (x1 + text_width, y1), color, -1)
|
| 416 |
+
# cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
|
| 417 |
+
|
| 418 |
+
# detection_count[class_name] += 1
|
| 419 |
+
|
| 420 |
+
# # Add summary text at top
|
| 421 |
+
# summary = f"Detected: {detection_count['figure']} Figures (GREEN), {detection_count['equation']} Equations (RED)"
|
| 422 |
+
# cv2.rectangle(img, (10, 10), (10 + len(summary) * 10, 40), (0, 0, 0), -1)
|
| 423 |
+
# cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
| 424 |
+
|
| 425 |
+
# # Save to temp file
|
| 426 |
+
# temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
|
| 427 |
+
# cv2.imwrite(temp_path, img)
|
| 428 |
+
# return temp_path
|
| 429 |
+
|
| 430 |
+
# except Exception as e:
|
| 431 |
+
# print(f"Error in visualize_detections: {e}")
|
| 432 |
+
# import traceback
|
| 433 |
+
# traceback.print_exc()
|
| 434 |
+
# return None
|
| 435 |
+
|
| 436 |
+
|
| 437 |
+
# # ==============================
|
| 438 |
+
# # GRADIO INTERFACE
|
| 439 |
+
# # ==============================
|
| 440 |
+
# with gr.Blocks(title="Document Analysis Pipeline") as demo:
|
| 441 |
+
|
| 442 |
+
# gr.Markdown("# π Full Pipeline Analysis")
|
| 443 |
+
# gr.Markdown("### π Intermediate File Recovery Active")
|
| 444 |
+
# gr.Markdown("The **Download** box will contain: \n1. OCR JSON (Step 1)\n2. Raw LayoutLMv3 Prediction JSON (Step 2)\n3. Final BIO JSON (Step 3)")
|
| 445 |
+
|
| 446 |
+
# with gr.Row():
|
| 447 |
+
# with gr.Column(scale=1):
|
| 448 |
+
# file_input = gr.File(
|
| 449 |
+
# label="Upload PDFs or Images",
|
| 450 |
+
# file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
|
| 451 |
+
# file_count="multiple",
|
| 452 |
+
# type="filepath"
|
| 453 |
+
# )
|
| 454 |
+
|
| 455 |
+
# page_selector = gr.Slider(
|
| 456 |
+
# minimum=1,
|
| 457 |
+
# maximum=100,
|
| 458 |
+
# value=1,
|
| 459 |
+
# step=1,
|
| 460 |
+
# label="PDF Page Number (for preview)",
|
| 461 |
+
# visible=True
|
| 462 |
+
# )
|
| 463 |
+
|
| 464 |
+
# model_path_input = gr.Textbox(
|
| 465 |
+
# label="Model Path",
|
| 466 |
+
# value=DEFAULT_LAYOUTLMV3_MODEL_PATH
|
| 467 |
+
# )
|
| 468 |
+
|
| 469 |
+
# # Debug button for visual inspection
|
| 470 |
+
# debug_btn = gr.Button("π Show YOLO Detections (First Page)", variant="secondary")
|
| 471 |
+
|
| 472 |
+
# # Main processing button
|
| 473 |
+
# process_btn = gr.Button("π Run Full Pipeline", variant="primary")
|
| 474 |
+
|
| 475 |
+
# with gr.Column(scale=2):
|
| 476 |
+
# # Visual debug output
|
| 477 |
+
# detection_preview = gr.Image(label="YOLO Detection Preview (Green=Figure, Red=Equation)", type="filepath")
|
| 478 |
+
|
| 479 |
+
# # Final JSON output
|
| 480 |
+
# json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
|
| 481 |
+
|
| 482 |
+
# # Download all intermediate files
|
| 483 |
+
# download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
|
| 484 |
+
|
| 485 |
+
# # Wire up the debug button
|
| 486 |
+
# debug_btn.click(
|
| 487 |
+
# fn=visualize_detections,
|
| 488 |
+
# inputs=[file_input, page_selector],
|
| 489 |
+
# outputs=[detection_preview]
|
| 490 |
+
# )
|
| 491 |
+
|
| 492 |
+
# # Wire up the main processing button
|
| 493 |
+
# process_btn.click(
|
| 494 |
+
# fn=process_file,
|
| 495 |
+
# inputs=[file_input, model_path_input],
|
| 496 |
+
# outputs=[json_output, download_output]
|
| 497 |
+
# )
|
| 498 |
+
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
# if __name__ == "__main__":
|
| 502 |
+
# demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
|
| 506 |
+
|
| 507 |
+
|
| 508 |
+
|
| 509 |
# ==============================
|
| 510 |
# VISUAL DEBUG FUNCTION
|
| 511 |
# ==============================
|
| 512 |
+
def visualize_detections(uploaded_files, page_num):
|
| 513 |
+
"""Shows the selected PDF page or image with YOLO bounding boxes"""
|
| 514 |
if not uploaded_files:
|
| 515 |
return None
|
| 516 |
|
| 517 |
try:
|
| 518 |
+
import cv2
|
| 519 |
+
import numpy as np
|
| 520 |
+
import tempfile
|
| 521 |
+
from ultralytics import YOLO
|
| 522 |
+
import fitz
|
| 523 |
+
|
| 524 |
# Get first file path
|
| 525 |
file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
|
| 526 |
if isinstance(file_path, dict):
|
|
|
|
| 528 |
elif hasattr(file_path, 'path'):
|
| 529 |
file_path = file_path.path
|
| 530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
# Handle PDF conversion to image
|
| 532 |
if str(file_path).lower().endswith('.pdf'):
|
| 533 |
doc = fitz.open(file_path)
|
| 534 |
+
# Ensure the selected page exists in the document
|
| 535 |
+
page_idx = min(max(int(page_num) - 1, 0), len(doc) - 1)
|
| 536 |
page = doc.load_page(page_idx)
|
| 537 |
|
| 538 |
pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
|
|
|
|
| 574 |
detection_count[class_name] += 1
|
| 575 |
|
| 576 |
# Add summary text at top
|
| 577 |
+
summary = f"Page {page_num} | Detected: {detection_count['figure']} Figures, {detection_count['equation']} Equations"
|
| 578 |
+
cv2.rectangle(img, (10, 10), (10 + len(summary) * 11, 40), (0, 0, 0), -1)
|
| 579 |
cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
|
| 580 |
|
| 581 |
# Save to temp file
|
|
|
|
| 589 |
traceback.print_exc()
|
| 590 |
return None
|
| 591 |
|
|
|
|
| 592 |
# ==============================
|
| 593 |
# GRADIO INTERFACE
|
| 594 |
# ==============================
|
|
|
|
| 622 |
)
|
| 623 |
|
| 624 |
# Debug button for visual inspection
|
| 625 |
+
debug_btn = gr.Button("π Show YOLO Detections", variant="secondary")
|
| 626 |
|
| 627 |
# Main processing button
|
| 628 |
process_btn = gr.Button("π Run Full Pipeline", variant="primary")
|
|
|
|
| 651 |
outputs=[json_output, download_output]
|
| 652 |
)
|
| 653 |
|
|
|
|
|
|
|
| 654 |
if __name__ == "__main__":
|
| 655 |
demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
|
| 656 |
|