heerjtdev commited on
Commit
467022b
Β·
verified Β·
1 Parent(s): 03ae0ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -15
app.py CHANGED
@@ -351,15 +351,176 @@ def process_file(uploaded_files, layoutlmv3_model_path=None):
351
 
352
 
353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  # ==============================
355
  # VISUAL DEBUG FUNCTION
356
  # ==============================
357
- def visualize_detections(uploaded_files):
358
- """Shows the first uploaded image with YOLO bounding boxes"""
359
  if not uploaded_files:
360
  return None
361
 
362
  try:
 
 
 
 
 
 
363
  # Get first file path
364
  file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
365
  if isinstance(file_path, dict):
@@ -367,16 +528,11 @@ def visualize_detections(uploaded_files):
367
  elif hasattr(file_path, 'path'):
368
  file_path = file_path.path
369
 
370
- import cv2
371
- import numpy as np
372
-
373
- from ultralytics import YOLO
374
- import fitz
375
-
376
  # Handle PDF conversion to image
377
  if str(file_path).lower().endswith('.pdf'):
378
  doc = fitz.open(file_path)
379
- page_idx = int(page_num) - 1
 
380
  page = doc.load_page(page_idx)
381
 
382
  pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
@@ -418,8 +574,8 @@ def visualize_detections(uploaded_files):
418
  detection_count[class_name] += 1
419
 
420
  # Add summary text at top
421
- summary = f"Detected: {detection_count['figure']} Figures (GREEN), {detection_count['equation']} Equations (RED)"
422
- cv2.rectangle(img, (10, 10), (10 + len(summary) * 10, 40), (0, 0, 0), -1)
423
  cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
424
 
425
  # Save to temp file
@@ -433,7 +589,6 @@ def visualize_detections(uploaded_files):
433
  traceback.print_exc()
434
  return None
435
 
436
-
437
  # ==============================
438
  # GRADIO INTERFACE
439
  # ==============================
@@ -467,7 +622,7 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
467
  )
468
 
469
  # Debug button for visual inspection
470
- debug_btn = gr.Button("πŸ” Show YOLO Detections (First Page)", variant="secondary")
471
 
472
  # Main processing button
473
  process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
@@ -496,8 +651,6 @@ with gr.Blocks(title="Document Analysis Pipeline") as demo:
496
  outputs=[json_output, download_output]
497
  )
498
 
499
-
500
-
501
  if __name__ == "__main__":
502
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
503
 
 
351
 
352
 
353
 
354
+ # # ==============================
355
+ # # VISUAL DEBUG FUNCTION
356
+ # # ==============================
357
+ # def visualize_detections(uploaded_files):
358
+ # """Shows the first uploaded image with YOLO bounding boxes"""
359
+ # if not uploaded_files:
360
+ # return None
361
+
362
+ # try:
363
+ # # Get first file path
364
+ # file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
365
+ # if isinstance(file_path, dict):
366
+ # file_path = file_path["path"]
367
+ # elif hasattr(file_path, 'path'):
368
+ # file_path = file_path.path
369
+
370
+ # import cv2
371
+ # import numpy as np
372
+
373
+ # from ultralytics import YOLO
374
+ # import fitz
375
+
376
+ # # Handle PDF conversion to image
377
+ # if str(file_path).lower().endswith('.pdf'):
378
+ # doc = fitz.open(file_path)
379
+ # page_idx = int(page_num) - 1
380
+ # page = doc.load_page(page_idx)
381
+
382
+ # pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
383
+ # img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
384
+ # if pix.n == 3:
385
+ # img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
386
+ # elif pix.n == 4:
387
+ # img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)
388
+ # doc.close()
389
+ # else:
390
+ # img = cv2.imread(str(file_path))
391
+
392
+ # if img is None:
393
+ # return None
394
+
395
+ # # Run YOLO detection
396
+ # model = YOLO(WEIGHTS_PATH)
397
+ # results = model.predict(source=img, conf=0.2, imgsz=640, verbose=False)
398
+
399
+ # # Draw bounding boxes
400
+ # detection_count = {'figure': 0, 'equation': 0}
401
+ # for box in results[0].boxes:
402
+ # class_id = int(box.cls[0])
403
+ # class_name = model.names[class_id]
404
+ # if class_name in ['figure', 'equation']:
405
+ # x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
406
+ # conf = float(box.conf[0])
407
+
408
+ # # Green for figures, Red for equations
409
+ # color = (0, 255, 0) if class_name == 'figure' else (0, 0, 255)
410
+ # cv2.rectangle(img, (x1, y1), (x2, y2), color, 3)
411
+
412
+ # # Add label with confidence
413
+ # label = f"{class_name.upper()} {conf:.2f}"
414
+ # (text_width, text_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
415
+ # cv2.rectangle(img, (x1, y1 - text_height - 10), (x1 + text_width, y1), color, -1)
416
+ # cv2.putText(img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
417
+
418
+ # detection_count[class_name] += 1
419
+
420
+ # # Add summary text at top
421
+ # summary = f"Detected: {detection_count['figure']} Figures (GREEN), {detection_count['equation']} Equations (RED)"
422
+ # cv2.rectangle(img, (10, 10), (10 + len(summary) * 10, 40), (0, 0, 0), -1)
423
+ # cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
424
+
425
+ # # Save to temp file
426
+ # temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
427
+ # cv2.imwrite(temp_path, img)
428
+ # return temp_path
429
+
430
+ # except Exception as e:
431
+ # print(f"Error in visualize_detections: {e}")
432
+ # import traceback
433
+ # traceback.print_exc()
434
+ # return None
435
+
436
+
437
+ # # ==============================
438
+ # # GRADIO INTERFACE
439
+ # # ==============================
440
+ # with gr.Blocks(title="Document Analysis Pipeline") as demo:
441
+
442
+ # gr.Markdown("# πŸ“„ Full Pipeline Analysis")
443
+ # gr.Markdown("### πŸ” Intermediate File Recovery Active")
444
+ # gr.Markdown("The **Download** box will contain: \n1. OCR JSON (Step 1)\n2. Raw LayoutLMv3 Prediction JSON (Step 2)\n3. Final BIO JSON (Step 3)")
445
+
446
+ # with gr.Row():
447
+ # with gr.Column(scale=1):
448
+ # file_input = gr.File(
449
+ # label="Upload PDFs or Images",
450
+ # file_types=[".pdf", ".jpg", ".jpeg", ".png", ".bmp", ".webp", ".tiff"],
451
+ # file_count="multiple",
452
+ # type="filepath"
453
+ # )
454
+
455
+ # page_selector = gr.Slider(
456
+ # minimum=1,
457
+ # maximum=100,
458
+ # value=1,
459
+ # step=1,
460
+ # label="PDF Page Number (for preview)",
461
+ # visible=True
462
+ # )
463
+
464
+ # model_path_input = gr.Textbox(
465
+ # label="Model Path",
466
+ # value=DEFAULT_LAYOUTLMV3_MODEL_PATH
467
+ # )
468
+
469
+ # # Debug button for visual inspection
470
+ # debug_btn = gr.Button("πŸ” Show YOLO Detections (First Page)", variant="secondary")
471
+
472
+ # # Main processing button
473
+ # process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
474
+
475
+ # with gr.Column(scale=2):
476
+ # # Visual debug output
477
+ # detection_preview = gr.Image(label="YOLO Detection Preview (Green=Figure, Red=Equation)", type="filepath")
478
+
479
+ # # Final JSON output
480
+ # json_output = gr.Code(label="Final Structured Output", language="json", lines=20)
481
+
482
+ # # Download all intermediate files
483
+ # download_output = gr.File(label="Download All Pipeline Stages (JSON)", file_count="multiple")
484
+
485
+ # # Wire up the debug button
486
+ # debug_btn.click(
487
+ # fn=visualize_detections,
488
+ # inputs=[file_input, page_selector],
489
+ # outputs=[detection_preview]
490
+ # )
491
+
492
+ # # Wire up the main processing button
493
+ # process_btn.click(
494
+ # fn=process_file,
495
+ # inputs=[file_input, model_path_input],
496
+ # outputs=[json_output, download_output]
497
+ # )
498
+
499
+
500
+
501
+ # if __name__ == "__main__":
502
+ # demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
503
+
504
+
505
+
506
+
507
+
508
+
509
  # ==============================
510
  # VISUAL DEBUG FUNCTION
511
  # ==============================
512
+ def visualize_detections(uploaded_files, page_num):
513
+ """Shows the selected PDF page or image with YOLO bounding boxes"""
514
  if not uploaded_files:
515
  return None
516
 
517
  try:
518
+ import cv2
519
+ import numpy as np
520
+ import tempfile
521
+ from ultralytics import YOLO
522
+ import fitz
523
+
524
  # Get first file path
525
  file_path = uploaded_files[0] if isinstance(uploaded_files, list) else uploaded_files
526
  if isinstance(file_path, dict):
 
528
  elif hasattr(file_path, 'path'):
529
  file_path = file_path.path
530
 
 
 
 
 
 
 
531
  # Handle PDF conversion to image
532
  if str(file_path).lower().endswith('.pdf'):
533
  doc = fitz.open(file_path)
534
+ # Ensure the selected page exists in the document
535
+ page_idx = min(max(int(page_num) - 1, 0), len(doc) - 1)
536
  page = doc.load_page(page_idx)
537
 
538
  pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))
 
574
  detection_count[class_name] += 1
575
 
576
  # Add summary text at top
577
+ summary = f"Page {page_num} | Detected: {detection_count['figure']} Figures, {detection_count['equation']} Equations"
578
+ cv2.rectangle(img, (10, 10), (10 + len(summary) * 11, 40), (0, 0, 0), -1)
579
  cv2.putText(img, summary, (15, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
580
 
581
  # Save to temp file
 
589
  traceback.print_exc()
590
  return None
591
 
 
592
  # ==============================
593
  # GRADIO INTERFACE
594
  # ==============================
 
622
  )
623
 
624
  # Debug button for visual inspection
625
+ debug_btn = gr.Button("πŸ” Show YOLO Detections", variant="secondary")
626
 
627
  # Main processing button
628
  process_btn = gr.Button("πŸš€ Run Full Pipeline", variant="primary")
 
651
  outputs=[json_output, download_output]
652
  )
653
 
 
 
654
  if __name__ == "__main__":
655
  demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)
656