Spaces:

raylim
/

mosaic-zero

Sleeping

App Files Files Community

raylim commited on Oct 22, 2025

Commit

588c780

unverified ·

2 Parent(s): ab10db7 641c24a

Merge pull request #4 from pathology-data-mining/copilot/deploy-gradio-app-without-gpu

Browse files

Files changed (6) hide show

.gitignore +2 -0
README.md +38 -0
app.py +20 -0
pyproject.toml +1 -0
requirements.txt +16 -0
src/mosaic/analysis.py +137 -69

.gitignore CHANGED Viewed

@@ -15,3 +15,5 @@ data/
 .pytest_cache/
 .coverage
 htmlcov/

 .pytest_cache/
 .coverage
 htmlcov/
+flagged/
+gradio_cached_examples/

README.md CHANGED Viewed

@@ -1,3 +1,15 @@
 # Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference
 Mosaic is a deep learning model designed for predicting cancer subtypes and biomarkers from Hematoxylin and Eosin (H&E) stained whole slide images (WSIs). This repository provides the code, pre-trained models, and instructions to use Mosaic for your own datasets.
@@ -7,6 +19,7 @@ Mosaic is a deep learning model designed for predicting cancer subtypes and biom
 - [System Requirements](#system-requirements)
 - [Pre-requisites](#pre-requisites)
 - [Installation](#installation)
 - [Usage](#usage)
   - [Initial Setup](#initial-setup)
   - [Web Application](#web-application)
@@ -51,6 +64,31 @@ Alternatively, install directly from the repository:
 uv pip install git+https://github.com/pathology-data-mining/mosaic.git
 ```
 ## Usage
 ### Initial Setup

+---
+title: Mosaic
+emoji: 🧬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 5.49.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
 # Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference
 Mosaic is a deep learning model designed for predicting cancer subtypes and biomarkers from Hematoxylin and Eosin (H&E) stained whole slide images (WSIs). This repository provides the code, pre-trained models, and instructions to use Mosaic for your own datasets.
 - [System Requirements](#system-requirements)
 - [Pre-requisites](#pre-requisites)
 - [Installation](#installation)
+- [Deploying to Hugging Face Spaces](#deploying-to-hugging-face-spaces)
 - [Usage](#usage)
   - [Initial Setup](#initial-setup)
   - [Web Application](#web-application)
 uv pip install git+https://github.com/pathology-data-mining/mosaic.git
 ```
+## Deploying to Hugging Face Spaces
+This repository is configured for deployment on Hugging Face Spaces with Zero GPU support.
+### Prerequisites
+1. You need to be added to the [PDM Group](https://huggingface.co/PDM-Group) on Hugging Face to access the models
+2. Create a Hugging Face access token with read permissions for the PDM-Group space
+### Deployment Steps
+1. Create a new Space on Hugging Face
+2. Select "Gradio" as the SDK
+3. Choose "Zero GPU" as the hardware option (if available)
+4. Clone this repository to your Space or push the code
+5. In your Space settings, add a secret named `HF_TOKEN` with your Hugging Face access token
+6. The app will automatically start and download the necessary models on first run
+### Zero GPU Configuration
+The app uses the `@spaces.GPU` decorator to allocate GPU resources only when needed for inference. This allows efficient use of Zero GPU resources on Hugging Face Spaces. The GPU is automatically allocated when:
+- Processing tissue segmentation
+- Extracting features with CTransPath and Optimus models
+- Running Aeon and Paladin model inference
 ## Usage
 ### Initial Setup

app.py ADDED Viewed

	@@ -0,0 +1,20 @@

+"""Entry point for Hugging Face Spaces deployment.
+This module serves as the main entry point when deploying Mosaic to
+Hugging Face Spaces. It initializes the models and launches the Gradio interface.
+"""
+from mosaic.gradio_app import download_and_process_models
+from mosaic.ui import launch_gradio
+if __name__ == "__main__":
+    # Download models and initialize cancer subtype mappings
+    download_and_process_models()
+    # Launch the Gradio interface
+    # Use default settings suitable for Hugging Face Spaces
+    launch_gradio(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+    )

pyproject.toml CHANGED Viewed

@@ -14,6 +14,7 @@ dependencies = [
   "memory-profiler>=0.61.0",
   "mussel[torch-gpu]",
   "paladin",
 ]
 [project.scripts]

   "memory-profiler>=0.61.0",
   "mussel[torch-gpu]",
   "paladin",
+  "spaces>=0.30.0",
 ]
 [project.scripts]

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+gradio>=5.49.0
+loguru>=0.7.3
+memory-profiler>=0.61.0
+spaces>=0.30.0
+torch>=2.0.0
+torchvision>=0.15.0
+pandas>=2.0.0
+numpy>=1.24.0
+pillow>=10.0.0
+opencv-python-headless>=4.8.0
+scikit-learn>=1.3.0
+requests>=2.31.0
+huggingface-hub>=0.20.0
+openslide-python>=1.3.0
+git+https://github.com/pathology-data-mining/Mussel.git@ray-dev
+git+https://github.com/pathology-data-mining/paladin.git@dev

src/mosaic/analysis.py CHANGED Viewed

@@ -15,94 +15,60 @@ from mussel.utils.segment import draw_slide_mask
 from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
 from loguru import logger
 from mosaic.inference import run_aeon, run_paladin
-def analyze_slide(
     slide_path,
-    seg_config,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
-    ihc_subtype="",
-    num_workers=4,
-    progress=gr.Progress(track_tqdm=True),
 ):
-    """Analyze a whole slide image for cancer subtype and biomarker prediction.
-    This function performs a complete analysis pipeline including:
-    1. Tissue segmentation
-    2. CTransPath feature extraction
-    3. Feature filtering with marker classifier
-    4. Optimus feature extraction on filtered tiles
-    5. Aeon inference for cancer subtype (if not provided)
-    6. Paladin inference for biomarker prediction
     Args:
         slide_path: Path to the whole slide image file
-        seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
         site_type: Site type, either "Primary" or "Metastatic"
         cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
         cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
-        ihc_subtype: IHC subtype for breast cancer (optional)
         num_workers: Number of worker processes for feature extraction
         progress: Gradio progress tracker for UI updates
     Returns:
-        tuple: (slide_mask, aeon_results, paladin_results)
-            - slide_mask: PIL Image of tissue segmentation visualization
             - aeon_results: DataFrame with cancer subtype predictions and confidence scores
             - paladin_results: DataFrame with biomarker predictions
-    Raises:
-        gr.Error: If no slide is provided
-        gr.Warning: If no tissue is detected in the slide
-        ValueError: If an unknown segmentation configuration is provided
     """
-    if slide_path is None:
-        raise gr.Error("Please upload a slide.")
-    # Step 1: Segment tissue
-    start_time = pd.Timestamp.now()
-    if seg_config == "Biopsy":
-        seg_config = BiopsySegConfig()
-    elif seg_config == "Resection":
-        seg_config = ResectionSegConfig()
-    elif seg_config == "TCGA":
-        seg_config = TcgaSegConfig()
-    else:
-        raise ValueError(f"Unknown segmentation configuration: {seg_config}")
-    progress(0.0, desc="Segmenting tissue")
-    logger.info(f"Segmenting tissue for slide: {slide_path}")
-    if values := segment_tissue(
-        slide_path=slide_path,
-        patch_size=224,
-        mpp=0.5,
-        seg_level=-1,
-        segment_threshold=seg_config.segment_threshold,
-        median_blur_ksize=seg_config.median_blur_ksize,
-        morphology_ex_kernel=seg_config.morphology_ex_kernel,
-        tissue_area_threshold=seg_config.tissue_area_threshold,
-        hole_area_threshold=seg_config.hole_area_threshold,
-        max_num_holes=seg_config.max_num_holes,
-    ):
-        polygon, _, coords, attrs = values
-    else:
-        gr.Warning(f"No tissue detected in slide: {slide_path}")
-        return None, None, None
-    end_time = pd.Timestamp.now()
-    logger.info(f"Tissue segmentation took {end_time - start_time}")
-    logger.info(f"Found {len(coords)} tissue tiles")
-    progress(0.2, desc="Tissue segmented")
-    # Draw slide mask for visualization
-    logger.info("Drawing slide mask")
-    progress(0.25, desc="Drawing slide mask")
-    slide_mask = draw_slide_mask(
-        slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
-    )
-    logger.info("Slide mask drawn")
     # Step 2: Extract features with CTransPath
     start_time = pd.Timestamp.now()
@@ -173,7 +139,7 @@ def analyze_slide(
     torch.cuda.reset_peak_memory_stats()
-    # Step 3: Run Aeon to predict histology if not supplied
     if cancer_subtype == "Unknown":
         start_time = pd.Timestamp.now()
         progress(0.9, desc="Running Aeon for cancer subtype inference")
@@ -206,10 +172,10 @@ def analyze_slide(
         )
         logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
-    # Step 4: Run Paladin to predict biomarkers
     if len(aeon_results) == 0:
         logger.warning("No Aeon results, skipping Paladin inference")
-        return slide_mask, None, None
     start_time = pd.Timestamp.now()
     progress(0.95, desc="Running Paladin for biomarker inference")
     logger.info("Running Paladin for biomarker inference")
@@ -234,4 +200,106 @@ def analyze_slide(
     aeon_results.set_index("Cancer Subtype", inplace=True)
     return slide_mask, aeon_results, paladin_results

 from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
 from loguru import logger
+try:
+    import spaces
+    HAS_SPACES = True
+except ImportError:
+    HAS_SPACES = False
+    # Create a no-op decorator if spaces is not available
+    class spaces:
+        @staticmethod
+        def GPU(fn):
+            return fn
 from mosaic.inference import run_aeon, run_paladin
+@spaces.GPU
+def _run_gpu_inference(
+    coords,
     slide_path,
+    attrs,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
+    num_workers,
+    progress,
 ):
+    """Run GPU-intensive feature extraction and model inference.
+    This function is decorated with @spaces.GPU to allocate GPU resources only
+    when needed for GPU-intensive operations including:
+    - CTransPath feature extraction
+    - Feature filtering with marker classifier
+    - Optimus feature extraction
+    - Aeon cancer subtype inference
+    - Paladin biomarker prediction
     Args:
+        coords: Tissue tile coordinates
         slide_path: Path to the whole slide image file
+        attrs: Slide attributes
         site_type: Site type, either "Primary" or "Metastatic"
         cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
         cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
         num_workers: Number of worker processes for feature extraction
         progress: Gradio progress tracker for UI updates
     Returns:
+        tuple: (aeon_results, paladin_results)
             - aeon_results: DataFrame with cancer subtype predictions and confidence scores
             - paladin_results: DataFrame with biomarker predictions
     """
+    # Zero GPU requires num_workers=0 to avoid multiprocessing issues
+    if HAS_SPACES:
+        num_workers = 0
+        logger.info("Running on Hugging Face Spaces Zero GPU: setting num_workers=0")
     # Step 2: Extract features with CTransPath
     start_time = pd.Timestamp.now()
     torch.cuda.reset_peak_memory_stats()
+    # Step 5: Run Aeon to predict histology if not supplied
     if cancer_subtype == "Unknown":
         start_time = pd.Timestamp.now()
         progress(0.9, desc="Running Aeon for cancer subtype inference")
         )
         logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
+    # Step 6: Run Paladin to predict biomarkers
     if len(aeon_results) == 0:
         logger.warning("No Aeon results, skipping Paladin inference")
+        return None, None
     start_time = pd.Timestamp.now()
     progress(0.95, desc="Running Paladin for biomarker inference")
     logger.info("Running Paladin for biomarker inference")
     aeon_results.set_index("Cancer Subtype", inplace=True)
+    return aeon_results, paladin_results
+def analyze_slide(
+    slide_path,
+    seg_config,
+    site_type,
+    cancer_subtype,
+    cancer_subtype_name_map,
+    ihc_subtype="",
+    num_workers=4,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """Analyze a whole slide image for cancer subtype and biomarker prediction.
+    This function performs a complete analysis pipeline including:
+    1. Tissue segmentation (CPU-only, no GPU required)
+    2. GPU-intensive feature extraction and model inference
+    The GPU-intensive operations are handled by a separate function decorated
+    with @spaces.GPU to efficiently manage GPU resources on Hugging Face Spaces.
+    Tissue segmentation runs on CPU and is not included in the GPU allocation.
+    Args:
+        slide_path: Path to the whole slide image file
+        seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
+        site_type: Site type, either "Primary" or "Metastatic"
+        cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
+        cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
+        ihc_subtype: IHC subtype for breast cancer (optional)
+        num_workers: Number of worker processes for feature extraction
+        progress: Gradio progress tracker for UI updates
+    Returns:
+        tuple: (slide_mask, aeon_results, paladin_results)
+            - slide_mask: PIL Image of tissue segmentation visualization
+            - aeon_results: DataFrame with cancer subtype predictions and confidence scores
+            - paladin_results: DataFrame with biomarker predictions
+    Raises:
+        gr.Error: If no slide is provided
+        gr.Warning: If no tissue is detected in the slide
+        ValueError: If an unknown segmentation configuration is provided
+    """
+    if slide_path is None:
+        raise gr.Error("Please upload a slide.")
+    # Step 1: Segment tissue (CPU-only, not GPU-intensive)
+    start_time = pd.Timestamp.now()
+    if seg_config == "Biopsy":
+        seg_config = BiopsySegConfig()
+    elif seg_config == "Resection":
+        seg_config = ResectionSegConfig()
+    elif seg_config == "TCGA":
+        seg_config = TcgaSegConfig()
+    else:
+        raise ValueError(f"Unknown segmentation configuration: {seg_config}")
+    progress(0.0, desc="Segmenting tissue")
+    logger.info(f"Segmenting tissue for slide: {slide_path}")
+    if values := segment_tissue(
+        slide_path=slide_path,
+        patch_size=224,
+        mpp=0.5,
+        seg_level=-1,
+        segment_threshold=seg_config.segment_threshold,
+        median_blur_ksize=seg_config.median_blur_ksize,
+        morphology_ex_kernel=seg_config.morphology_ex_kernel,
+        tissue_area_threshold=seg_config.tissue_area_threshold,
+        hole_area_threshold=seg_config.hole_area_threshold,
+        max_num_holes=seg_config.max_num_holes,
+    ):
+        polygon, _, coords, attrs = values
+    else:
+        gr.Warning(f"No tissue detected in slide: {slide_path}")
+        return None, None, None
+    end_time = pd.Timestamp.now()
+    logger.info(f"Tissue segmentation took {end_time - start_time}")
+    logger.info(f"Found {len(coords)} tissue tiles")
+    progress(0.2, desc="Tissue segmented")
+    # Draw slide mask for visualization
+    logger.info("Drawing slide mask")
+    progress(0.25, desc="Drawing slide mask")
+    slide_mask = draw_slide_mask(
+        slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
+    )
+    logger.info("Slide mask drawn")
+    # Step 2-6: Run GPU-intensive operations (feature extraction and inference)
+    aeon_results, paladin_results = _run_gpu_inference(
+        coords,
+        slide_path,
+        attrs,
+        site_type,
+        cancer_subtype,
+        cancer_subtype_name_map,
+        num_workers,
+        progress,
+    )
     return slide_mask, aeon_results, paladin_results