raylim commited on
Commit
588c780
·
unverified ·
2 Parent(s): ab10db7 641c24a

Merge pull request #4 from pathology-data-mining/copilot/deploy-gradio-app-without-gpu

Browse files
Files changed (6) hide show
  1. .gitignore +2 -0
  2. README.md +38 -0
  3. app.py +20 -0
  4. pyproject.toml +1 -0
  5. requirements.txt +16 -0
  6. src/mosaic/analysis.py +137 -69
.gitignore CHANGED
@@ -15,3 +15,5 @@ data/
15
  .pytest_cache/
16
  .coverage
17
  htmlcov/
 
 
 
15
  .pytest_cache/
16
  .coverage
17
  htmlcov/
18
+ flagged/
19
+ gradio_cached_examples/
README.md CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference
2
 
3
  Mosaic is a deep learning model designed for predicting cancer subtypes and biomarkers from Hematoxylin and Eosin (H&E) stained whole slide images (WSIs). This repository provides the code, pre-trained models, and instructions to use Mosaic for your own datasets.
@@ -7,6 +19,7 @@ Mosaic is a deep learning model designed for predicting cancer subtypes and biom
7
  - [System Requirements](#system-requirements)
8
  - [Pre-requisites](#pre-requisites)
9
  - [Installation](#installation)
 
10
  - [Usage](#usage)
11
  - [Initial Setup](#initial-setup)
12
  - [Web Application](#web-application)
@@ -51,6 +64,31 @@ Alternatively, install directly from the repository:
51
  uv pip install git+https://github.com/pathology-data-mining/mosaic.git
52
  ```
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ## Usage
55
 
56
  ### Initial Setup
 
1
+ ---
2
+ title: Mosaic
3
+ emoji: 🧬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.49.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
  # Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference
14
 
15
  Mosaic is a deep learning model designed for predicting cancer subtypes and biomarkers from Hematoxylin and Eosin (H&E) stained whole slide images (WSIs). This repository provides the code, pre-trained models, and instructions to use Mosaic for your own datasets.
 
19
  - [System Requirements](#system-requirements)
20
  - [Pre-requisites](#pre-requisites)
21
  - [Installation](#installation)
22
+ - [Deploying to Hugging Face Spaces](#deploying-to-hugging-face-spaces)
23
  - [Usage](#usage)
24
  - [Initial Setup](#initial-setup)
25
  - [Web Application](#web-application)
 
64
  uv pip install git+https://github.com/pathology-data-mining/mosaic.git
65
  ```
66
 
67
+ ## Deploying to Hugging Face Spaces
68
+
69
+ This repository is configured for deployment on Hugging Face Spaces with Zero GPU support.
70
+
71
+ ### Prerequisites
72
+
73
+ 1. You need to be added to the [PDM Group](https://huggingface.co/PDM-Group) on Hugging Face to access the models
74
+ 2. Create a Hugging Face access token with read permissions for the PDM-Group space
75
+
76
+ ### Deployment Steps
77
+
78
+ 1. Create a new Space on Hugging Face
79
+ 2. Select "Gradio" as the SDK
80
+ 3. Choose "Zero GPU" as the hardware option (if available)
81
+ 4. Clone this repository to your Space or push the code
82
+ 5. In your Space settings, add a secret named `HF_TOKEN` with your Hugging Face access token
83
+ 6. The app will automatically start and download the necessary models on first run
84
+
85
+ ### Zero GPU Configuration
86
+
87
+ The app uses the `@spaces.GPU` decorator to allocate GPU resources only when needed for inference. This allows efficient use of Zero GPU resources on Hugging Face Spaces. The GPU is automatically allocated when:
88
+ - Processing tissue segmentation
89
+ - Extracting features with CTransPath and Optimus models
90
+ - Running Aeon and Paladin model inference
91
+
92
  ## Usage
93
 
94
  ### Initial Setup
app.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Entry point for Hugging Face Spaces deployment.
2
+
3
+ This module serves as the main entry point when deploying Mosaic to
4
+ Hugging Face Spaces. It initializes the models and launches the Gradio interface.
5
+ """
6
+
7
+ from mosaic.gradio_app import download_and_process_models
8
+ from mosaic.ui import launch_gradio
9
+
10
+ if __name__ == "__main__":
11
+ # Download models and initialize cancer subtype mappings
12
+ download_and_process_models()
13
+
14
+ # Launch the Gradio interface
15
+ # Use default settings suitable for Hugging Face Spaces
16
+ launch_gradio(
17
+ server_name="0.0.0.0",
18
+ server_port=7860,
19
+ share=False,
20
+ )
pyproject.toml CHANGED
@@ -14,6 +14,7 @@ dependencies = [
14
  "memory-profiler>=0.61.0",
15
  "mussel[torch-gpu]",
16
  "paladin",
 
17
  ]
18
 
19
  [project.scripts]
 
14
  "memory-profiler>=0.61.0",
15
  "mussel[torch-gpu]",
16
  "paladin",
17
+ "spaces>=0.30.0",
18
  ]
19
 
20
  [project.scripts]
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio>=5.49.0
2
+ loguru>=0.7.3
3
+ memory-profiler>=0.61.0
4
+ spaces>=0.30.0
5
+ torch>=2.0.0
6
+ torchvision>=0.15.0
7
+ pandas>=2.0.0
8
+ numpy>=1.24.0
9
+ pillow>=10.0.0
10
+ opencv-python-headless>=4.8.0
11
+ scikit-learn>=1.3.0
12
+ requests>=2.31.0
13
+ huggingface-hub>=0.20.0
14
+ openslide-python>=1.3.0
15
+ git+https://github.com/pathology-data-mining/Mussel.git@ray-dev
16
+ git+https://github.com/pathology-data-mining/paladin.git@dev
src/mosaic/analysis.py CHANGED
@@ -15,94 +15,60 @@ from mussel.utils.segment import draw_slide_mask
15
  from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
16
  from loguru import logger
17
 
 
 
 
 
 
 
 
 
 
 
 
18
  from mosaic.inference import run_aeon, run_paladin
19
 
20
 
21
- def analyze_slide(
 
 
22
  slide_path,
23
- seg_config,
24
  site_type,
25
  cancer_subtype,
26
  cancer_subtype_name_map,
27
- ihc_subtype="",
28
- num_workers=4,
29
- progress=gr.Progress(track_tqdm=True),
30
  ):
31
- """Analyze a whole slide image for cancer subtype and biomarker prediction.
32
 
33
- This function performs a complete analysis pipeline including:
34
- 1. Tissue segmentation
35
- 2. CTransPath feature extraction
36
- 3. Feature filtering with marker classifier
37
- 4. Optimus feature extraction on filtered tiles
38
- 5. Aeon inference for cancer subtype (if not provided)
39
- 6. Paladin inference for biomarker prediction
40
 
41
  Args:
 
42
  slide_path: Path to the whole slide image file
43
- seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
44
  site_type: Site type, either "Primary" or "Metastatic"
45
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
46
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
47
- ihc_subtype: IHC subtype for breast cancer (optional)
48
  num_workers: Number of worker processes for feature extraction
49
  progress: Gradio progress tracker for UI updates
50
 
51
  Returns:
52
- tuple: (slide_mask, aeon_results, paladin_results)
53
- - slide_mask: PIL Image of tissue segmentation visualization
54
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
55
  - paladin_results: DataFrame with biomarker predictions
56
-
57
- Raises:
58
- gr.Error: If no slide is provided
59
- gr.Warning: If no tissue is detected in the slide
60
- ValueError: If an unknown segmentation configuration is provided
61
  """
62
- if slide_path is None:
63
- raise gr.Error("Please upload a slide.")
64
- # Step 1: Segment tissue
65
- start_time = pd.Timestamp.now()
66
-
67
- if seg_config == "Biopsy":
68
- seg_config = BiopsySegConfig()
69
- elif seg_config == "Resection":
70
- seg_config = ResectionSegConfig()
71
- elif seg_config == "TCGA":
72
- seg_config = TcgaSegConfig()
73
- else:
74
- raise ValueError(f"Unknown segmentation configuration: {seg_config}")
75
-
76
- progress(0.0, desc="Segmenting tissue")
77
- logger.info(f"Segmenting tissue for slide: {slide_path}")
78
- if values := segment_tissue(
79
- slide_path=slide_path,
80
- patch_size=224,
81
- mpp=0.5,
82
- seg_level=-1,
83
- segment_threshold=seg_config.segment_threshold,
84
- median_blur_ksize=seg_config.median_blur_ksize,
85
- morphology_ex_kernel=seg_config.morphology_ex_kernel,
86
- tissue_area_threshold=seg_config.tissue_area_threshold,
87
- hole_area_threshold=seg_config.hole_area_threshold,
88
- max_num_holes=seg_config.max_num_holes,
89
- ):
90
- polygon, _, coords, attrs = values
91
- else:
92
- gr.Warning(f"No tissue detected in slide: {slide_path}")
93
- return None, None, None
94
- end_time = pd.Timestamp.now()
95
- logger.info(f"Tissue segmentation took {end_time - start_time}")
96
- logger.info(f"Found {len(coords)} tissue tiles")
97
- progress(0.2, desc="Tissue segmented")
98
-
99
- # Draw slide mask for visualization
100
- logger.info("Drawing slide mask")
101
- progress(0.25, desc="Drawing slide mask")
102
- slide_mask = draw_slide_mask(
103
- slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
104
- )
105
- logger.info("Slide mask drawn")
106
 
107
  # Step 2: Extract features with CTransPath
108
  start_time = pd.Timestamp.now()
@@ -173,7 +139,7 @@ def analyze_slide(
173
 
174
  torch.cuda.reset_peak_memory_stats()
175
 
176
- # Step 3: Run Aeon to predict histology if not supplied
177
  if cancer_subtype == "Unknown":
178
  start_time = pd.Timestamp.now()
179
  progress(0.9, desc="Running Aeon for cancer subtype inference")
@@ -206,10 +172,10 @@ def analyze_slide(
206
  )
207
  logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
208
 
209
- # Step 4: Run Paladin to predict biomarkers
210
  if len(aeon_results) == 0:
211
  logger.warning("No Aeon results, skipping Paladin inference")
212
- return slide_mask, None, None
213
  start_time = pd.Timestamp.now()
214
  progress(0.95, desc="Running Paladin for biomarker inference")
215
  logger.info("Running Paladin for biomarker inference")
@@ -234,4 +200,106 @@ def analyze_slide(
234
 
235
  aeon_results.set_index("Cancer Subtype", inplace=True)
236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
  return slide_mask, aeon_results, paladin_results
 
15
  from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
16
  from loguru import logger
17
 
18
+ try:
19
+ import spaces
20
+ HAS_SPACES = True
21
+ except ImportError:
22
+ HAS_SPACES = False
23
+ # Create a no-op decorator if spaces is not available
24
+ class spaces:
25
+ @staticmethod
26
+ def GPU(fn):
27
+ return fn
28
+
29
  from mosaic.inference import run_aeon, run_paladin
30
 
31
 
32
+ @spaces.GPU
33
+ def _run_gpu_inference(
34
+ coords,
35
  slide_path,
36
+ attrs,
37
  site_type,
38
  cancer_subtype,
39
  cancer_subtype_name_map,
40
+ num_workers,
41
+ progress,
 
42
  ):
43
+ """Run GPU-intensive feature extraction and model inference.
44
 
45
+ This function is decorated with @spaces.GPU to allocate GPU resources only
46
+ when needed for GPU-intensive operations including:
47
+ - CTransPath feature extraction
48
+ - Feature filtering with marker classifier
49
+ - Optimus feature extraction
50
+ - Aeon cancer subtype inference
51
+ - Paladin biomarker prediction
52
 
53
  Args:
54
+ coords: Tissue tile coordinates
55
  slide_path: Path to the whole slide image file
56
+ attrs: Slide attributes
57
  site_type: Site type, either "Primary" or "Metastatic"
58
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
59
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
 
60
  num_workers: Number of worker processes for feature extraction
61
  progress: Gradio progress tracker for UI updates
62
 
63
  Returns:
64
+ tuple: (aeon_results, paladin_results)
 
65
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
66
  - paladin_results: DataFrame with biomarker predictions
 
 
 
 
 
67
  """
68
+ # Zero GPU requires num_workers=0 to avoid multiprocessing issues
69
+ if HAS_SPACES:
70
+ num_workers = 0
71
+ logger.info("Running on Hugging Face Spaces Zero GPU: setting num_workers=0")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  # Step 2: Extract features with CTransPath
74
  start_time = pd.Timestamp.now()
 
139
 
140
  torch.cuda.reset_peak_memory_stats()
141
 
142
+ # Step 5: Run Aeon to predict histology if not supplied
143
  if cancer_subtype == "Unknown":
144
  start_time = pd.Timestamp.now()
145
  progress(0.9, desc="Running Aeon for cancer subtype inference")
 
172
  )
173
  logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
174
 
175
+ # Step 6: Run Paladin to predict biomarkers
176
  if len(aeon_results) == 0:
177
  logger.warning("No Aeon results, skipping Paladin inference")
178
+ return None, None
179
  start_time = pd.Timestamp.now()
180
  progress(0.95, desc="Running Paladin for biomarker inference")
181
  logger.info("Running Paladin for biomarker inference")
 
200
 
201
  aeon_results.set_index("Cancer Subtype", inplace=True)
202
 
203
+ return aeon_results, paladin_results
204
+
205
+
206
+ def analyze_slide(
207
+ slide_path,
208
+ seg_config,
209
+ site_type,
210
+ cancer_subtype,
211
+ cancer_subtype_name_map,
212
+ ihc_subtype="",
213
+ num_workers=4,
214
+ progress=gr.Progress(track_tqdm=True),
215
+ ):
216
+ """Analyze a whole slide image for cancer subtype and biomarker prediction.
217
+
218
+ This function performs a complete analysis pipeline including:
219
+ 1. Tissue segmentation (CPU-only, no GPU required)
220
+ 2. GPU-intensive feature extraction and model inference
221
+
222
+ The GPU-intensive operations are handled by a separate function decorated
223
+ with @spaces.GPU to efficiently manage GPU resources on Hugging Face Spaces.
224
+ Tissue segmentation runs on CPU and is not included in the GPU allocation.
225
+
226
+ Args:
227
+ slide_path: Path to the whole slide image file
228
+ seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
229
+ site_type: Site type, either "Primary" or "Metastatic"
230
+ cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
231
+ cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
232
+ ihc_subtype: IHC subtype for breast cancer (optional)
233
+ num_workers: Number of worker processes for feature extraction
234
+ progress: Gradio progress tracker for UI updates
235
+
236
+ Returns:
237
+ tuple: (slide_mask, aeon_results, paladin_results)
238
+ - slide_mask: PIL Image of tissue segmentation visualization
239
+ - aeon_results: DataFrame with cancer subtype predictions and confidence scores
240
+ - paladin_results: DataFrame with biomarker predictions
241
+
242
+ Raises:
243
+ gr.Error: If no slide is provided
244
+ gr.Warning: If no tissue is detected in the slide
245
+ ValueError: If an unknown segmentation configuration is provided
246
+ """
247
+ if slide_path is None:
248
+ raise gr.Error("Please upload a slide.")
249
+
250
+ # Step 1: Segment tissue (CPU-only, not GPU-intensive)
251
+ start_time = pd.Timestamp.now()
252
+
253
+ if seg_config == "Biopsy":
254
+ seg_config = BiopsySegConfig()
255
+ elif seg_config == "Resection":
256
+ seg_config = ResectionSegConfig()
257
+ elif seg_config == "TCGA":
258
+ seg_config = TcgaSegConfig()
259
+ else:
260
+ raise ValueError(f"Unknown segmentation configuration: {seg_config}")
261
+
262
+ progress(0.0, desc="Segmenting tissue")
263
+ logger.info(f"Segmenting tissue for slide: {slide_path}")
264
+ if values := segment_tissue(
265
+ slide_path=slide_path,
266
+ patch_size=224,
267
+ mpp=0.5,
268
+ seg_level=-1,
269
+ segment_threshold=seg_config.segment_threshold,
270
+ median_blur_ksize=seg_config.median_blur_ksize,
271
+ morphology_ex_kernel=seg_config.morphology_ex_kernel,
272
+ tissue_area_threshold=seg_config.tissue_area_threshold,
273
+ hole_area_threshold=seg_config.hole_area_threshold,
274
+ max_num_holes=seg_config.max_num_holes,
275
+ ):
276
+ polygon, _, coords, attrs = values
277
+ else:
278
+ gr.Warning(f"No tissue detected in slide: {slide_path}")
279
+ return None, None, None
280
+ end_time = pd.Timestamp.now()
281
+ logger.info(f"Tissue segmentation took {end_time - start_time}")
282
+ logger.info(f"Found {len(coords)} tissue tiles")
283
+ progress(0.2, desc="Tissue segmented")
284
+
285
+ # Draw slide mask for visualization
286
+ logger.info("Drawing slide mask")
287
+ progress(0.25, desc="Drawing slide mask")
288
+ slide_mask = draw_slide_mask(
289
+ slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
290
+ )
291
+ logger.info("Slide mask drawn")
292
+
293
+ # Step 2-6: Run GPU-intensive operations (feature extraction and inference)
294
+ aeon_results, paladin_results = _run_gpu_inference(
295
+ coords,
296
+ slide_path,
297
+ attrs,
298
+ site_type,
299
+ cancer_subtype,
300
+ cancer_subtype_name_map,
301
+ num_workers,
302
+ progress,
303
+ )
304
+
305
  return slide_mask, aeon_results, paladin_results