mmrech commited on
Commit
df56589
Β·
1 Parent(s): a8862db

Add automatic subject/patient ID detection from filenames and folders for grouping slices

Browse files
Files changed (1) hide show
  1. app.py +171 -39
app.py CHANGED
@@ -412,29 +412,122 @@ def process_sequence(image_files, prompt_text, modality, window_type):
412
  # Store processed results for interactive viewer
413
  processed_results_cache = {}
414
 
415
- def process_slices_for_viewer(image_files, prompt_text, modality, window_type):
416
- """Process all slices and cache results for interactive viewing."""
417
- if model is None or processor is None:
418
- return None, 0, "❌ Error: Model not loaded.", "No slices loaded"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
  if not image_files:
421
- return None, 0, "⚠️ Please upload medical image files.", "No slices loaded"
422
 
423
- # Handle single file or list of files
424
  if isinstance(image_files, str):
425
  image_files = [image_files]
426
 
427
  # Filter out None files
428
  image_files = [f for f in image_files if f is not None]
429
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  if not image_files:
431
- return None, 0, "⚠️ No valid files uploaded.", "No slices loaded"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432
 
433
  results = []
434
  status_messages = []
435
 
436
- for idx, image_file in enumerate(image_files):
437
- status_msg = f"Processing slice {idx + 1}/{len(image_files)}..."
438
  status_messages.append(status_msg)
439
 
440
  result = process_medical_image(image_file, prompt_text, modality, window_type)
@@ -446,47 +539,55 @@ def process_slices_for_viewer(image_files, prompt_text, modality, window_type):
446
  status_messages.append(f"❌ Failed to process slice {idx + 1}")
447
 
448
  if results:
449
- # Cache results with a unique key
450
- cache_key = f"{len(image_files)}_{prompt_text}_{modality}"
451
  processed_results_cache[cache_key] = results
452
 
453
  max_slices = len(results) - 1
454
- status = f"βœ… Processed {len(results)}/{len(image_files)} slices!\nUse slider or buttons to navigate."
455
- slice_info = f"Slice 1/{len(results)}"
 
 
 
 
 
 
456
 
457
- return results[0], max_slices, status, slice_info
458
  else:
459
- return None, 0, "❌ No slices were processed successfully.", "No slices loaded"
460
 
461
- def navigate_slice(slice_idx, image_files, prompt_text, modality, window_type):
462
  """Navigate to a specific slice in the sequence."""
463
  if not image_files:
464
  return None, "No slices loaded"
465
 
466
- # Handle single file or list of files
467
- if isinstance(image_files, str):
468
- image_files = [image_files]
469
 
470
- # Filter out None files
471
- image_files = [f for f in image_files if f is not None]
 
 
472
 
473
- if not image_files:
474
  return None, "No slices loaded"
475
 
 
476
  slice_idx = int(slice_idx)
477
- cache_key = f"{len(image_files)}_{prompt_text}_{modality}"
478
 
479
  if cache_key in processed_results_cache:
480
  results = processed_results_cache[cache_key]
481
  if 0 <= slice_idx < len(results):
482
- slice_info = f"Slice {slice_idx + 1}/{len(results)}"
483
  return results[slice_idx], slice_info
484
 
485
  # If not cached, process on the fly (fallback)
486
- if 0 <= slice_idx < len(image_files):
487
- result = process_medical_image(image_files[slice_idx], prompt_text, modality, window_type)
488
  if result:
489
- slice_info = f"Slice {slice_idx + 1}/{len(image_files)}"
490
  return result, slice_info
491
 
492
  return None, f"Invalid slice index: {slice_idx}"
@@ -572,6 +673,14 @@ with gr.Blocks() as demo:
572
 
573
  with gr.Tab("Interactive Slice Viewer"):
574
  gr.Markdown("**Scroll through multiple slices/images from the same subject interactively**")
 
 
 
 
 
 
 
 
575
  with gr.Row():
576
  with gr.Column():
577
  files_input = gr.File(
@@ -579,7 +688,15 @@ with gr.Blocks() as demo:
579
  file_types=[".dcm", ".png", ".jpg", ".jpeg"],
580
  file_count="multiple",
581
  type="filepath",
582
- info="Upload multiple slices from the same subject (e.g., axial MRI slices)"
 
 
 
 
 
 
 
 
583
  )
584
 
585
  text_input_batch = gr.Textbox(
@@ -603,6 +720,7 @@ with gr.Blocks() as demo:
603
  info="CT windowing preset (ignored for MRI)"
604
  )
605
 
 
606
  submit_batch_btn = gr.Button("Process All Slices", variant="primary", size="lg")
607
 
608
  gr.Markdown("---")
@@ -636,6 +754,13 @@ with gr.Blocks() as demo:
636
  interactive=False
637
  )
638
 
 
 
 
 
 
 
 
639
  gr.Markdown("### Status")
640
  status_batch_text = gr.Textbox(
641
  label="Processing Status",
@@ -776,46 +901,53 @@ with gr.Blocks() as demo:
776
  outputs=[image_output, status_text]
777
  )
778
 
 
 
 
 
 
 
 
779
  # Interactive slice viewer
780
  submit_batch_btn.click(
781
  fn=process_slices_for_viewer,
782
- inputs=[files_input, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
783
- outputs=[current_slice_output, slice_slider, status_batch_text, slice_info_text]
784
  ).then(
785
  lambda max_val: gr.Slider(maximum=max_val, interactive=True),
786
  inputs=[slice_slider],
787
  outputs=[slice_slider]
788
  )
789
 
790
- def update_slice(slice_num, files, prompt, mod, window):
791
- result, info = navigate_slice(int(slice_num), files, prompt, mod, window)
792
  return result, info
793
 
794
  slice_slider.change(
795
  fn=update_slice,
796
- inputs=[slice_slider, files_input, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
797
  outputs=[current_slice_output, slice_info_text]
798
  )
799
 
800
- def prev_slice(current, files, prompt, mod, window):
801
  new_val = max(0, current - 1)
802
- result, info = navigate_slice(new_val, files, prompt, mod, window)
803
  return new_val, result, info
804
 
805
- def next_slice(current, max_val, files, prompt, mod, window):
806
  new_val = min(max_val, current + 1)
807
- result, info = navigate_slice(new_val, files, prompt, mod, window)
808
  return new_val, result, info
809
 
810
  prev_btn.click(
811
  fn=prev_slice,
812
- inputs=[slice_slider, files_input, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
813
  outputs=[slice_slider, current_slice_output, slice_info_text]
814
  )
815
 
816
  next_btn.click(
817
  fn=next_slice,
818
- inputs=[slice_slider, slice_slider, files_input, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
819
  outputs=[slice_slider, current_slice_output, slice_info_text]
820
  )
821
 
 
412
  # Store processed results for interactive viewer
413
  processed_results_cache = {}
414
 
415
+ def extract_subject_id(file_path):
416
+ """Extract subject/patient ID from file path.
417
+
418
+ Common patterns:
419
+ - Folder name: /subject_001/image.png -> subject_001
420
+ - Filename prefix: subject_001_slice_01.png -> subject_001
421
+ - Patient ID in filename: patient_123_slice_5.dcm -> patient_123
422
+ - Study UID in DICOM: extract from DICOM metadata
423
+ """
424
+ import re
425
+
426
+ file_path = str(file_path)
427
+ filename = os.path.basename(file_path)
428
+ dir_path = os.path.dirname(file_path)
429
+
430
+ # Try to extract from folder name (common in medical datasets)
431
+ folder_name = os.path.basename(dir_path.rstrip('/'))
432
+ if folder_name and folder_name not in ['', '.', '..']:
433
+ # Check if folder name looks like a subject ID
434
+ if re.match(r'(subject|patient|sub|pat|case|id)[_-]?\d+', folder_name, re.I):
435
+ return folder_name
436
+
437
+ # Try to extract from filename
438
+ # Pattern: subject_001, patient_123, sub_001, case_001, etc.
439
+ patterns = [
440
+ r'(subject|patient|sub|pat|case|id)[_-]?(\d+)', # subject_001, patient_123
441
+ r'(\d{3,})', # Just numbers (3+ digits) - might be subject ID
442
+ r'([A-Z]{2,}\d+)', # BR001, MR123, etc.
443
+ ]
444
+
445
+ for pattern in patterns:
446
+ match = re.search(pattern, filename, re.I)
447
+ if match:
448
+ if len(match.groups()) > 1:
449
+ return f"{match.group(1)}_{match.group(2)}"
450
+ else:
451
+ return match.group(1)
452
 
453
+ # For DICOM files, try to read PatientID or StudyInstanceUID
454
+ if file_path.lower().endswith('.dcm'):
455
+ try:
456
+ ds = pydicom.dcmread(file_path, stop_before_pixels=True)
457
+ patient_id = getattr(ds, 'PatientID', None)
458
+ if patient_id:
459
+ return f"patient_{patient_id}"
460
+ study_uid = getattr(ds, 'StudyInstanceUID', None)
461
+ if study_uid:
462
+ # Use last part of UID as identifier
463
+ return f"study_{study_uid.split('.')[-1][:8]}"
464
+ except:
465
+ pass
466
+
467
+ # Fallback: use filename without extension
468
+ base_name = os.path.splitext(filename)[0]
469
+ if len(base_name) > 0:
470
+ return base_name
471
+
472
+ return "unknown"
473
+
474
+ def group_images_by_subject(image_files):
475
+ """Group image files by subject/patient ID."""
476
  if not image_files:
477
+ return {}
478
 
 
479
  if isinstance(image_files, str):
480
  image_files = [image_files]
481
 
482
  # Filter out None files
483
  image_files = [f for f in image_files if f is not None]
484
 
485
+ # Group by subject ID
486
+ subject_groups = {}
487
+ for file_path in image_files:
488
+ subject_id = extract_subject_id(file_path)
489
+ if subject_id not in subject_groups:
490
+ subject_groups[subject_id] = []
491
+ subject_groups[subject_id].append(file_path)
492
+
493
+ # Sort files within each group (by filename)
494
+ for subject_id in subject_groups:
495
+ subject_groups[subject_id].sort()
496
+
497
+ return subject_groups
498
+
499
+ def process_slices_for_viewer(image_files, selected_subject, prompt_text, modality, window_type):
500
+ """Process all slices for selected subject and cache results for interactive viewing."""
501
+ if model is None or processor is None:
502
+ return None, 0, "❌ Error: Model not loaded.", "No slices loaded", gr.Dropdown(choices=[], value=None), ""
503
+
504
  if not image_files:
505
+ return None, 0, "⚠️ Please upload medical image files.", "No slices loaded", gr.Dropdown(choices=[], value=None), ""
506
+
507
+ # Group by subject
508
+ subject_groups = group_images_by_subject(image_files)
509
+
510
+ if not subject_groups:
511
+ return None, 0, "⚠️ Could not detect subjects in uploaded files.", "No slices loaded", gr.Dropdown(choices=[], value=None), ""
512
+
513
+ # Extract subject ID from selection (format: "subject_id (N slices)")
514
+ if selected_subject:
515
+ subject_id = selected_subject.split(" (")[0]
516
+ else:
517
+ # Use first subject if none selected
518
+ subject_id = list(subject_groups.keys())[0]
519
+
520
+ if subject_id not in subject_groups:
521
+ return None, 0, f"⚠️ Subject '{subject_id}' not found.", "No slices loaded", gr.Dropdown(choices=[], value=None), ""
522
+
523
+ # Get files for selected subject
524
+ subject_files = subject_groups[subject_id]
525
 
526
  results = []
527
  status_messages = []
528
 
529
+ for idx, image_file in enumerate(subject_files):
530
+ status_msg = f"Processing slice {idx + 1}/{len(subject_files)}..."
531
  status_messages.append(status_msg)
532
 
533
  result = process_medical_image(image_file, prompt_text, modality, window_type)
 
539
  status_messages.append(f"❌ Failed to process slice {idx + 1}")
540
 
541
  if results:
542
+ # Cache results with a unique key including subject ID
543
+ cache_key = f"{subject_id}_{len(subject_files)}_{prompt_text}_{modality}"
544
  processed_results_cache[cache_key] = results
545
 
546
  max_slices = len(results) - 1
547
+ status = f"βœ… Processed {len(results)}/{len(subject_files)} slices for {subject_id}!\nUse slider or buttons to navigate."
548
+ slice_info = f"Slice 1/{len(results)} ({subject_id})"
549
+
550
+ # Update subject dropdown choices
551
+ choices = []
552
+ for sid, files in sorted(subject_groups.items()):
553
+ marker = "βœ“" if sid == subject_id else ""
554
+ choices.append(f"{marker} {sid} ({len(files)} slices)")
555
 
556
+ return results[0], max_slices, status, slice_info, gr.Dropdown(choices=choices, value=choices[0] if choices else None), f"Viewing: {subject_id}"
557
  else:
558
+ return None, 0, "❌ No slices were processed successfully.", "No slices loaded", gr.Dropdown(choices=[], value=None), ""
559
 
560
+ def navigate_slice(slice_idx, image_files, selected_subject, prompt_text, modality, window_type):
561
  """Navigate to a specific slice in the sequence."""
562
  if not image_files:
563
  return None, "No slices loaded"
564
 
565
+ # Group by subject and get selected subject's files
566
+ subject_groups = group_images_by_subject(image_files)
 
567
 
568
+ if selected_subject:
569
+ subject_id = selected_subject.split(" (")[0]
570
+ else:
571
+ subject_id = list(subject_groups.keys())[0] if subject_groups else None
572
 
573
+ if not subject_id or subject_id not in subject_groups:
574
  return None, "No slices loaded"
575
 
576
+ subject_files = subject_groups[subject_id]
577
  slice_idx = int(slice_idx)
578
+ cache_key = f"{subject_id}_{len(subject_files)}_{prompt_text}_{modality}"
579
 
580
  if cache_key in processed_results_cache:
581
  results = processed_results_cache[cache_key]
582
  if 0 <= slice_idx < len(results):
583
+ slice_info = f"Slice {slice_idx + 1}/{len(results)} ({subject_id})"
584
  return results[slice_idx], slice_info
585
 
586
  # If not cached, process on the fly (fallback)
587
+ if 0 <= slice_idx < len(subject_files):
588
+ result = process_medical_image(subject_files[slice_idx], prompt_text, modality, window_type)
589
  if result:
590
+ slice_info = f"Slice {slice_idx + 1}/{len(subject_files)} ({subject_id})"
591
  return result, slice_info
592
 
593
  return None, f"Invalid slice index: {slice_idx}"
 
673
 
674
  with gr.Tab("Interactive Slice Viewer"):
675
  gr.Markdown("**Scroll through multiple slices/images from the same subject interactively**")
676
+ gr.Markdown("""
677
+ **πŸ“‹ Subject Detection:** The app automatically detects subject/patient IDs from:
678
+ - Folder names (e.g., `subject_001/`, `patient_123/`)
679
+ - Filenames (e.g., `subject_001_slice_01.png`, `patient_123.dcm`)
680
+ - DICOM metadata (PatientID, StudyInstanceUID)
681
+
682
+ **πŸ’‘ Tip:** Upload images organized by subject folders for best results!
683
+ """)
684
  with gr.Row():
685
  with gr.Column():
686
  files_input = gr.File(
 
688
  file_types=[".dcm", ".png", ".jpg", ".jpeg"],
689
  file_count="multiple",
690
  type="filepath",
691
+ info="Upload multiple slices from the same subject (e.g., axial MRI slices). Images are automatically grouped by subject ID."
692
+ )
693
+
694
+ subject_dropdown = gr.Dropdown(
695
+ label="Select Subject/Patient",
696
+ choices=[],
697
+ value=None,
698
+ interactive=True,
699
+ info="Select which subject's slices to view (auto-detected from filenames/folders)"
700
  )
701
 
702
  text_input_batch = gr.Textbox(
 
720
  info="CT windowing preset (ignored for MRI)"
721
  )
722
 
723
+ detect_subjects_btn = gr.Button("πŸ” Detect Subjects", variant="secondary", size="sm")
724
  submit_batch_btn = gr.Button("Process All Slices", variant="primary", size="lg")
725
 
726
  gr.Markdown("---")
 
754
  interactive=False
755
  )
756
 
757
+ subject_info_text = gr.Textbox(
758
+ label="Subject Info",
759
+ value="",
760
+ interactive=False,
761
+ visible=False
762
+ )
763
+
764
  gr.Markdown("### Status")
765
  status_batch_text = gr.Textbox(
766
  label="Processing Status",
 
901
  outputs=[image_output, status_text]
902
  )
903
 
904
+ # Detect subjects when files are uploaded
905
+ detect_subjects_btn.click(
906
+ fn=detect_subjects,
907
+ inputs=[files_input],
908
+ outputs=[subject_dropdown, status_batch_text]
909
+ )
910
+
911
  # Interactive slice viewer
912
  submit_batch_btn.click(
913
  fn=process_slices_for_viewer,
914
+ inputs=[files_input, subject_dropdown, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
915
+ outputs=[current_slice_output, slice_slider, status_batch_text, slice_info_text, subject_dropdown, subject_info_text]
916
  ).then(
917
  lambda max_val: gr.Slider(maximum=max_val, interactive=True),
918
  inputs=[slice_slider],
919
  outputs=[slice_slider]
920
  )
921
 
922
+ def update_slice(slice_num, files, selected_subject, prompt, mod, window):
923
+ result, info = navigate_slice(int(slice_num), files, selected_subject, prompt, mod, window)
924
  return result, info
925
 
926
  slice_slider.change(
927
  fn=update_slice,
928
+ inputs=[slice_slider, files_input, subject_dropdown, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
929
  outputs=[current_slice_output, slice_info_text]
930
  )
931
 
932
+ def prev_slice(current, files, selected_subject, prompt, mod, window):
933
  new_val = max(0, current - 1)
934
+ result, info = navigate_slice(new_val, files, selected_subject, prompt, mod, window)
935
  return new_val, result, info
936
 
937
+ def next_slice(current, max_val, files, selected_subject, prompt, mod, window):
938
  new_val = min(max_val, current + 1)
939
+ result, info = navigate_slice(new_val, files, selected_subject, prompt, mod, window)
940
  return new_val, result, info
941
 
942
  prev_btn.click(
943
  fn=prev_slice,
944
+ inputs=[slice_slider, files_input, subject_dropdown, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
945
  outputs=[slice_slider, current_slice_output, slice_info_text]
946
  )
947
 
948
  next_btn.click(
949
  fn=next_slice,
950
+ inputs=[slice_slider, slice_slider, files_input, subject_dropdown, text_input_batch, modality_dropdown_batch, window_dropdown_batch],
951
  outputs=[slice_slider, current_slice_output, slice_info_text]
952
  )
953