Spaces:

raidium
/

curia

Sleeping

App Files Files Community

cdancette commited on Nov 14

Commit

a8db175

1 Parent(s): 467b7ba

simplify dataset management

Browse files

Files changed (2) hide show

app.py +92 -77
inference.py +2 -3

app.py CHANGED Viewed

@@ -73,22 +73,31 @@ HEADS_REQUIRING_MASK: set[str] = {
 }
-DATASET_OPTIONS: Dict[str, Dict[str, Any]] = {
-    "anatomy-ct": {"label": "Anatomy CT (test)", "head": "anatomy-ct"},
-    "anatomy-ct-hard": {"label": "Anatomy CT Hard (test)", "head": "anatomy-ct"},
-    "anatomy-mri": {"label": "Anatomy MRI (test)", "head": "anatomy-mri"},
-    "covidx-ct": {"label": "COVIDx CT (test)", "head": "covidx-ct"},
-    "deep-lesion-site": {"label": "Deep Lesion Site (test)", "head": "deep-lesion-site"},
-    "emidec-classification-mask": {
-        "label": "EMIDEC Classification Mask (test)",
-        "head": "emidec-classification-mask",
-    },
-    "ixi": {"label": "IXI (test)", "head": "ixi"},
-    "kits": {"label": "KiTS (test)", "head": "kits"},
-    "kneeMRI": {"label": "Knee MRI (test)", "head": "kneeMRI"},
-    "luna16": {"label": "LUNA16 (test)", "head": "luna16-3D"},
-    "luna16-3D": {"label": "LUNA16 3D (test)", "head": "luna16-3D"},
-    "oasis": {"label": "OASIS (test)", "head": "oasis"},
 }
@@ -118,7 +127,7 @@ DEFAULT_WINDOWINGS: Dict[str, Optional[Dict[str, int]]] = {
 # ---------------------------------------------------------------------------
-def apply_windowing(image: np.ndarray, subset: str) -> np.ndarray:
     """Apply CT windowing based on the dataset.
     For CT images, applies window level and width transformation.
@@ -131,7 +140,7 @@ def apply_windowing(image: np.ndarray, subset: str) -> np.ndarray:
     Returns:
         Windowed image array
     """
-    windowing = DEFAULT_WINDOWINGS.get(subset)
     # No windowing for MRI or unknown datasets
     if windowing is None:
@@ -250,21 +259,6 @@ def render_image_with_mask_info(image: np.ndarray, mask: Any) -> np.ndarray:
         return display
-def dataset_class_metadata(dataset: Dataset) -> Tuple[List[int], Dict[int, str]]:
-    target_feature = dataset.features.get("target")
-    if target_feature and hasattr(target_feature, "names"):
-        names = list(target_feature.names)
-        id2label = {i: name for i, name in enumerate(names)}
-        classes = list(range(len(names)))
-        return classes, id2label
-    # Fall back to generic inspection
-    targets = dataset["target"] if "target" in dataset.column_names else []
-    unique = sorted({int(t) for t in targets}) if targets else []
-    id2label = {i: str(i) for i in unique}
-    return unique, id2label
 def pick_random_indices(dataset: Dataset, target: Optional[int]) -> int:
     if "target" not in dataset.column_names:
         return random.randrange(len(dataset))
@@ -283,12 +277,14 @@ def pick_random_indices(dataset: Dataset, target: Optional[int]) -> int:
 # ---------------------------------------------------------------------------
-def update_dataset_from_head(head: str) -> Dict[str, Any]:
-    # Find the first dataset that matches this head
-    for dataset_key, meta in DATASET_OPTIONS.items():
-        if meta["head"] == head:
-            return gr.update(value=dataset_key)
-    return gr.update()
 def update_upload_component_state(head: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
@@ -306,27 +302,34 @@ def update_upload_component_state(head: str) -> Tuple[Dict[str, Any], Dict[str,
     return info_update, upload_update
-def load_dataset_metadata(subset: str) -> Tuple[Dict[str, Any], str]:
     try:
         dataset = load_curia_dataset(subset)
     except Exception as exc:  # pragma: no cover - surfaced in UI
-        dropdown = gr.update(choices=["Random"], value="Random")
-        return dropdown, f"Failed to load dataset: {exc}"
-    classes, id2label = dataset_class_metadata(dataset)
-    if not classes:
-        dropdown = gr.update(
-            choices=["Random"],
-            value="Random",
-        )
-        return dropdown, "No class metadata detected; sampling at random"
     options = [
         "Random",
-        *[f"{cls_id}: {id2label.get(cls_id, str(cls_id))}" for cls_id in classes],
     ]
-    dropdown = gr.update(choices=options, value="Random")
-    return dropdown, f"Loaded {subset} ({len(dataset)} test samples)"
 def parse_target_selection(selection: str) -> Optional[int]:
@@ -348,6 +351,7 @@ def sample_dataset_example(
     index = pick_random_indices(dataset, target_id)
     record = dataset[index]
     image = to_numpy_image(record["image"])
     mask_array = record.get("mask")
     meta = {
@@ -360,15 +364,21 @@ def sample_dataset_example(
 def load_dataset_sample(
-    subset: str,
     target_selection: str,
     head: str,
 ) -> Tuple[
     Optional[np.ndarray],
     pd.DataFrame,
     Dict[str, Any],
     Optional[Dict[str, Any]],
 ]:
     try:
         target_id = parse_target_selection(target_selection)
         image, meta = sample_dataset_example(subset, target_id)
@@ -390,13 +400,14 @@ def load_dataset_sample(
         return (
             display,
             pd.DataFrame(),
             ground_truth_update,
             {"image": image, "mask": meta.get("mask")},  # Store raw image for inference
         )
     except Exception as exc:  # pragma: no cover - surfaced in UI
         gr.Warning(f"Failed to load sample: {exc}")
-        return None, pd.DataFrame(), gr.update(visible=False), None
 def format_probabilities(probs: torch.Tensor, id2label: Dict[int, str]) -> pd.DataFrame:
@@ -436,27 +447,28 @@ def run_inference(
 def handle_upload_preview(
     image: np.ndarray | Image.Image | None,
-    subset: str,
-) -> Tuple[Optional[np.ndarray], str, pd.DataFrame, Dict[str, Any], Optional[Dict[str, Any]]]:
     if image is None:
-        return None, "Please upload an image.", pd.DataFrame(), gr.update(visible=False), None
     try:
         np_image = to_numpy_image(image)
         # Apply windowing only for display, keep raw image for model inference
-        windowed_image = apply_windowing(np_image, subset)
-        display = to_display_image(windowed_image)
         return (
             display,
             "Image uploaded. Click 'Run inference' to compute predictions.",
             pd.DataFrame(),
             gr.update(visible=False),
             {"image": np_image, "mask": None},  # Store raw image for inference
         )
     except Exception as exc:  # pragma: no cover - surfaced in UI
-        return None, f"Failed to load image: {exc}", pd.DataFrame(), gr.update(visible=False), None
 # ---------------------------------------------------------------------------
@@ -490,12 +502,8 @@ def build_demo() -> gr.Blocks:
         with gr.Row():
             with gr.Column():
                 gr.Markdown("### Load dataset sample")
-                dataset_dropdown = gr.Dropdown(
-                    label="CuriaBench subset",
-                    choices=[(meta["label"], key) for key, meta in DATASET_OPTIONS.items()],
-                    value="kits",
-                )
-                dataset_status = gr.Markdown("Select a dataset to load class metadata.")
                 class_dropdown = gr.Dropdown(label="Target class filter", choices=["Random"], value="Random")
                 dataset_btn = gr.Button("Load dataset sample")
@@ -525,9 +533,9 @@ def build_demo() -> gr.Blocks:
         with gr.Row():
             with gr.Column():
                 image_display = gr.Image(label="Image", interactive=False, type="numpy")
-                ground_truth_display = gr.Markdown(visible=False)
             with gr.Column():
                 gr.Markdown("### Predictions")
                 main_prediction = gr.Markdown()
                 prediction_probs = gr.Dataframe(headers=["class_id", "label", "probability"])
@@ -535,27 +543,33 @@ def build_demo() -> gr.Blocks:
         image_state = gr.State()
         # Event wiring
         head_dropdown.change(
-            fn=update_dataset_from_head,
             inputs=[head_dropdown],
-            outputs=[dataset_dropdown],
         ).then(
             fn=update_upload_component_state,
             inputs=[head_dropdown],
             outputs=[upload_info_text, upload_component],
-        )
-        dataset_dropdown.change(
             fn=load_dataset_metadata,
-            inputs=[dataset_dropdown],
-            outputs=[class_dropdown, dataset_status],
         )
         dataset_btn.click(
             fn=load_dataset_sample,
-            inputs=[dataset_dropdown, class_dropdown, head_dropdown],
             outputs=[
                 image_display,
                 prediction_probs,
                 ground_truth_display,
                 image_state,
@@ -564,10 +578,11 @@ def build_demo() -> gr.Blocks:
         upload_component.upload(
             fn=handle_upload_preview,
-            inputs=[upload_component, dataset_dropdown],
             outputs=[
                 image_display,
                 status_text,
                 prediction_probs,
                 ground_truth_display,
                 image_state,

 }
+DATASET_OPTIONS: Dict[str, str] = {
+    "anatomy-ct": "Anatomy CT (test)",
+    "anatomy-ct-hard": "Anatomy CT Hard (test)",
+    "anatomy-mri": "Anatomy MRI (test)",
+    "covidx-ct": "COVIDx CT (test)",
+    "deep-lesion-site": "Deep Lesion Site (test)",
+    "emidec-classification-mask": "EMIDEC Classification Mask (test)",
+    "ixi": "IXI (test)",
+    "kits": "KiTS (test)",
+    "kneeMRI": "Knee MRI (test)",
+    "luna16-3D": "LUNA16 3D (test)",
+    "oasis": "OASIS (test)",
+}
+DEFAULT_DATASET_FOR_HEAD: Dict[str, str] = {
+    "anatomy-ct": "anatomy-ct",
+    "anatomy-mri": "anatomy-mri",
+    "covidx-ct": "covidx-ct",
+    "deep-lesion-site": "deep-lesion-site",
+    "emidec-classification-mask": "emidec-classification-mask",
+    "ixi": "ixi",
+    "kits": "kits",
+    "kneeMRI": "kneeMRI",
+    "luna16-3D": "luna16-3D",
+    "oasis": "oasis",
 }
 # ---------------------------------------------------------------------------
+def apply_windowing(image: np.ndarray, head: str) -> np.ndarray:
     """Apply CT windowing based on the dataset.
     For CT images, applies window level and width transformation.
     Returns:
         Windowed image array
     """
+    windowing = DEFAULT_WINDOWINGS.get(head)
     # No windowing for MRI or unknown datasets
     if windowing is None:
         return display
 def pick_random_indices(dataset: Dataset, target: Optional[int]) -> int:
     if "target" not in dataset.column_names:
         return random.randrange(len(dataset))
 # ---------------------------------------------------------------------------
+def update_dataset_display(head: str) -> str:
+    """Update the dataset name display based on the selected head."""
+    dataset_key = DEFAULT_DATASET_FOR_HEAD.get(head)
+    if dataset_key:
+        dataset_label = DATASET_OPTIONS.get(dataset_key, dataset_key)
+        return f"**Dataset:** {dataset_label}"
+    return "**Dataset:** not available"
 def update_upload_component_state(head: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     return info_update, upload_update
+def load_dataset_metadata(head: str) -> Tuple[Dict[str, Any], str, Dict[str, Any]]:
+    """Load dataset metadata based on the selected head."""
+    subset = DEFAULT_DATASET_FOR_HEAD.get(head)
+    if not subset:
+        dropdown = gr.update(choices=["Random"], value="Random", interactive=False)
+        button = gr.update(interactive=False)
+        return dropdown, "No dataset found for this head.", button
+    # Load class labels from id_to_labels.json
+    id2label = load_id_to_labels().get(head, {})
     try:
         dataset = load_curia_dataset(subset)
     except Exception as exc:  # pragma: no cover - surfaced in UI
+        dropdown = gr.update(choices=["Random"], value="Random", interactive=False)
+        button = gr.update(interactive=False)
+        return dropdown, f"Failed to load dataset: {exc}", button
+    # Build dropdown options from id_to_labels.json
+    classes = sorted(id2label.keys())
     options = [
         "Random",
+        *[f"{cls_id}: {id2label[cls_id]}" for cls_id in classes],
     ]
+    dropdown = gr.update(choices=options, value="Random", interactive=True)
+    button = gr.update(interactive=True)
+    return dropdown, f"Loaded {subset} ({len(dataset)} test samples)", button
 def parse_target_selection(selection: str) -> Optional[int]:
     index = pick_random_indices(dataset, target_id)
     record = dataset[index]
     image = to_numpy_image(record["image"])
+    print(image.shape)
     mask_array = record.get("mask")
     meta = {
 def load_dataset_sample(
     target_selection: str,
     head: str,
 ) -> Tuple[
     Optional[np.ndarray],
+    str,
     pd.DataFrame,
     Dict[str, Any],
     Optional[Dict[str, Any]],
 ]:
+    """Load a dataset sample based on the selected head."""
+    subset = DEFAULT_DATASET_FOR_HEAD.get(head)
+    if not subset:
+        gr.Warning("No dataset found for this head.")
+        return None, "", pd.DataFrame(), gr.update(visible=False), None
     try:
         target_id = parse_target_selection(target_selection)
         image, meta = sample_dataset_example(subset, target_id)
         return (
             display,
+            "",  # Reset prediction text
             pd.DataFrame(),
             ground_truth_update,
             {"image": image, "mask": meta.get("mask")},  # Store raw image for inference
         )
     except Exception as exc:  # pragma: no cover - surfaced in UI
         gr.Warning(f"Failed to load sample: {exc}")
+        return None, "", pd.DataFrame(), gr.update(visible=False), None
 def format_probabilities(probs: torch.Tensor, id2label: Dict[int, str]) -> pd.DataFrame:
 def handle_upload_preview(
     image: np.ndarray | Image.Image | None,
+    head: str,
+) -> Tuple[Optional[np.ndarray], str, str, pd.DataFrame, Dict[str, Any], Optional[Dict[str, Any]]]:
+    """Handle image upload preview, deriving dataset from head."""
     if image is None:
+        return None, "Please upload an image.", "", pd.DataFrame(), gr.update(visible=False), None
     try:
         np_image = to_numpy_image(image)
         # Apply windowing only for display, keep raw image for model inference
+        display = to_display_image(np_image)
         return (
             display,
             "Image uploaded. Click 'Run inference' to compute predictions.",
+            "",  # Reset prediction text
             pd.DataFrame(),
             gr.update(visible=False),
             {"image": np_image, "mask": None},  # Store raw image for inference
         )
     except Exception as exc:  # pragma: no cover - surfaced in UI
+        return None, f"Failed to load image: {exc}", "", pd.DataFrame(), gr.update(visible=False), None
 # ---------------------------------------------------------------------------
         with gr.Row():
             with gr.Column():
                 gr.Markdown("### Load dataset sample")
+                dataset_display = gr.Markdown(f"**Dataset:** {DATASET_OPTIONS.get(DEFAULT_DATASET_FOR_HEAD.get(default_head, ''), 'Unknown')}")
+                dataset_status = gr.Markdown("Select a model head to load class metadata.")
                 class_dropdown = gr.Dropdown(label="Target class filter", choices=["Random"], value="Random")
                 dataset_btn = gr.Button("Load dataset sample")
         with gr.Row():
             with gr.Column():
                 image_display = gr.Image(label="Image", interactive=False, type="numpy")
             with gr.Column():
+                ground_truth_display = gr.Markdown(visible=False)
                 gr.Markdown("### Predictions")
                 main_prediction = gr.Markdown()
                 prediction_probs = gr.Dataframe(headers=["class_id", "label", "probability"])
         image_state = gr.State()
         # Event wiring
+        # Initialize on page load
+        demo.load(
+            fn=load_dataset_metadata,
+            inputs=[head_dropdown],
+            outputs=[class_dropdown, dataset_status, dataset_btn],
+        )
         head_dropdown.change(
+            fn=update_dataset_display,
             inputs=[head_dropdown],
+            outputs=[dataset_display],
         ).then(
             fn=update_upload_component_state,
             inputs=[head_dropdown],
             outputs=[upload_info_text, upload_component],
+        ).then(
             fn=load_dataset_metadata,
+            inputs=[head_dropdown],
+            outputs=[class_dropdown, dataset_status, dataset_btn],
         )
         dataset_btn.click(
             fn=load_dataset_sample,
+            inputs=[class_dropdown, head_dropdown],
             outputs=[
                 image_display,
+                main_prediction,
                 prediction_probs,
                 ground_truth_display,
                 image_state,
         upload_component.upload(
             fn=handle_upload_preview,
+            inputs=[upload_component, head_dropdown],
             outputs=[
                 image_display,
                 status_text,
+                main_prediction,
                 prediction_probs,
                 ground_truth_display,
                 image_state,

inference.py CHANGED Viewed

@@ -5,12 +5,11 @@ from __future__ import annotations
 import json
 import os
 from functools import lru_cache
-from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
-import pandas as pd
 import torch
-from datasets import Dataset, DatasetDict, load_dataset
 from PIL import Image
 from torchvision import transforms
 from torchvision.transforms import functional as TF

 import json
 import os
 from functools import lru_cache
+from typing import Any, Dict, Optional
 import numpy as np
 import torch
+from datasets import DatasetDict, load_dataset
 from PIL import Image
 from torchvision import transforms
 from torchvision.transforms import functional as TF