copilot-swe-agent[bot] raylim commited on
Commit
02bf3db
·
1 Parent(s): fe38b5b

Separate tissue segmentation from GPU-decorated function

Browse files

Co-authored-by: raylim <3074310+raylim@users.noreply.github.com>

Files changed (1) hide show
  1. src/mosaic/analysis.py +121 -69
src/mosaic/analysis.py CHANGED
@@ -30,91 +30,41 @@ from mosaic.inference import run_aeon, run_paladin
30
 
31
 
32
  @spaces.GPU
33
- def analyze_slide(
 
34
  slide_path,
35
- seg_config,
36
  site_type,
37
  cancer_subtype,
38
  cancer_subtype_name_map,
39
- ihc_subtype="",
40
- num_workers=4,
41
- progress=gr.Progress(track_tqdm=True),
42
  ):
43
- """Analyze a whole slide image for cancer subtype and biomarker prediction.
44
 
45
- This function performs a complete analysis pipeline including:
46
- 1. Tissue segmentation
47
- 2. CTransPath feature extraction
48
- 3. Feature filtering with marker classifier
49
- 4. Optimus feature extraction on filtered tiles
50
- 5. Aeon inference for cancer subtype (if not provided)
51
- 6. Paladin inference for biomarker prediction
52
 
53
  Args:
 
54
  slide_path: Path to the whole slide image file
55
- seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
56
  site_type: Site type, either "Primary" or "Metastatic"
57
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
58
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
59
- ihc_subtype: IHC subtype for breast cancer (optional)
60
  num_workers: Number of worker processes for feature extraction
61
  progress: Gradio progress tracker for UI updates
62
 
63
  Returns:
64
- tuple: (slide_mask, aeon_results, paladin_results)
65
- - slide_mask: PIL Image of tissue segmentation visualization
66
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
67
  - paladin_results: DataFrame with biomarker predictions
68
-
69
- Raises:
70
- gr.Error: If no slide is provided
71
- gr.Warning: If no tissue is detected in the slide
72
- ValueError: If an unknown segmentation configuration is provided
73
  """
74
- if slide_path is None:
75
- raise gr.Error("Please upload a slide.")
76
- # Step 1: Segment tissue
77
- start_time = pd.Timestamp.now()
78
-
79
- if seg_config == "Biopsy":
80
- seg_config = BiopsySegConfig()
81
- elif seg_config == "Resection":
82
- seg_config = ResectionSegConfig()
83
- elif seg_config == "TCGA":
84
- seg_config = TcgaSegConfig()
85
- else:
86
- raise ValueError(f"Unknown segmentation configuration: {seg_config}")
87
-
88
- progress(0.0, desc="Segmenting tissue")
89
- logger.info(f"Segmenting tissue for slide: {slide_path}")
90
- if values := segment_tissue(
91
- slide_path=slide_path,
92
- patch_size=224,
93
- mpp=0.5,
94
- seg_level=-1,
95
- segment_threshold=seg_config.segment_threshold,
96
- median_blur_ksize=seg_config.median_blur_ksize,
97
- morphology_ex_kernel=seg_config.morphology_ex_kernel,
98
- tissue_area_threshold=seg_config.tissue_area_threshold,
99
- hole_area_threshold=seg_config.hole_area_threshold,
100
- max_num_holes=seg_config.max_num_holes,
101
- ):
102
- polygon, _, coords, attrs = values
103
- else:
104
- gr.Warning(f"No tissue detected in slide: {slide_path}")
105
- return None, None, None
106
- end_time = pd.Timestamp.now()
107
- logger.info(f"Tissue segmentation took {end_time - start_time}")
108
- logger.info(f"Found {len(coords)} tissue tiles")
109
- progress(0.2, desc="Tissue segmented")
110
-
111
- # Draw slide mask for visualization
112
- logger.info("Drawing slide mask")
113
- progress(0.25, desc="Drawing slide mask")
114
- slide_mask = draw_slide_mask(
115
- slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
116
- )
117
- logger.info("Slide mask drawn")
118
 
119
  # Step 2: Extract features with CTransPath
120
  start_time = pd.Timestamp.now()
@@ -185,7 +135,7 @@ def analyze_slide(
185
 
186
  torch.cuda.reset_peak_memory_stats()
187
 
188
- # Step 3: Run Aeon to predict histology if not supplied
189
  if cancer_subtype == "Unknown":
190
  start_time = pd.Timestamp.now()
191
  progress(0.9, desc="Running Aeon for cancer subtype inference")
@@ -218,10 +168,10 @@ def analyze_slide(
218
  )
219
  logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
220
 
221
- # Step 4: Run Paladin to predict biomarkers
222
  if len(aeon_results) == 0:
223
  logger.warning("No Aeon results, skipping Paladin inference")
224
- return slide_mask, None, None
225
  start_time = pd.Timestamp.now()
226
  progress(0.95, desc="Running Paladin for biomarker inference")
227
  logger.info("Running Paladin for biomarker inference")
@@ -246,4 +196,106 @@ def analyze_slide(
246
 
247
  aeon_results.set_index("Cancer Subtype", inplace=True)
248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
249
  return slide_mask, aeon_results, paladin_results
 
30
 
31
 
32
  @spaces.GPU
33
+ def _run_gpu_inference(
34
+ coords,
35
  slide_path,
36
+ attrs,
37
  site_type,
38
  cancer_subtype,
39
  cancer_subtype_name_map,
40
+ num_workers,
41
+ progress,
 
42
  ):
43
+ """Run GPU-intensive feature extraction and model inference.
44
 
45
+ This function is decorated with @spaces.GPU to allocate GPU resources only
46
+ when needed for GPU-intensive operations including:
47
+ - CTransPath feature extraction
48
+ - Feature filtering with marker classifier
49
+ - Optimus feature extraction
50
+ - Aeon cancer subtype inference
51
+ - Paladin biomarker prediction
52
 
53
  Args:
54
+ coords: Tissue tile coordinates
55
  slide_path: Path to the whole slide image file
56
+ attrs: Slide attributes
57
  site_type: Site type, either "Primary" or "Metastatic"
58
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
59
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
 
60
  num_workers: Number of worker processes for feature extraction
61
  progress: Gradio progress tracker for UI updates
62
 
63
  Returns:
64
+ tuple: (aeon_results, paladin_results)
 
65
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
66
  - paladin_results: DataFrame with biomarker predictions
 
 
 
 
 
67
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  # Step 2: Extract features with CTransPath
70
  start_time = pd.Timestamp.now()
 
135
 
136
  torch.cuda.reset_peak_memory_stats()
137
 
138
+ # Step 5: Run Aeon to predict histology if not supplied
139
  if cancer_subtype == "Unknown":
140
  start_time = pd.Timestamp.now()
141
  progress(0.9, desc="Running Aeon for cancer subtype inference")
 
168
  )
169
  logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
170
 
171
+ # Step 6: Run Paladin to predict biomarkers
172
  if len(aeon_results) == 0:
173
  logger.warning("No Aeon results, skipping Paladin inference")
174
+ return None, None
175
  start_time = pd.Timestamp.now()
176
  progress(0.95, desc="Running Paladin for biomarker inference")
177
  logger.info("Running Paladin for biomarker inference")
 
196
 
197
  aeon_results.set_index("Cancer Subtype", inplace=True)
198
 
199
+ return aeon_results, paladin_results
200
+
201
+
202
+ def analyze_slide(
203
+ slide_path,
204
+ seg_config,
205
+ site_type,
206
+ cancer_subtype,
207
+ cancer_subtype_name_map,
208
+ ihc_subtype="",
209
+ num_workers=4,
210
+ progress=gr.Progress(track_tqdm=True),
211
+ ):
212
+ """Analyze a whole slide image for cancer subtype and biomarker prediction.
213
+
214
+ This function performs a complete analysis pipeline including:
215
+ 1. Tissue segmentation (CPU-only, no GPU required)
216
+ 2. GPU-intensive feature extraction and model inference
217
+
218
+ The GPU-intensive operations are handled by a separate function decorated
219
+ with @spaces.GPU to efficiently manage GPU resources on Hugging Face Spaces.
220
+ Tissue segmentation runs on CPU and is not included in the GPU allocation.
221
+
222
+ Args:
223
+ slide_path: Path to the whole slide image file
224
+ seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
225
+ site_type: Site type, either "Primary" or "Metastatic"
226
+ cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
227
+ cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
228
+ ihc_subtype: IHC subtype for breast cancer (optional)
229
+ num_workers: Number of worker processes for feature extraction
230
+ progress: Gradio progress tracker for UI updates
231
+
232
+ Returns:
233
+ tuple: (slide_mask, aeon_results, paladin_results)
234
+ - slide_mask: PIL Image of tissue segmentation visualization
235
+ - aeon_results: DataFrame with cancer subtype predictions and confidence scores
236
+ - paladin_results: DataFrame with biomarker predictions
237
+
238
+ Raises:
239
+ gr.Error: If no slide is provided
240
+ gr.Warning: If no tissue is detected in the slide
241
+ ValueError: If an unknown segmentation configuration is provided
242
+ """
243
+ if slide_path is None:
244
+ raise gr.Error("Please upload a slide.")
245
+
246
+ # Step 1: Segment tissue (CPU-only, not GPU-intensive)
247
+ start_time = pd.Timestamp.now()
248
+
249
+ if seg_config == "Biopsy":
250
+ seg_config = BiopsySegConfig()
251
+ elif seg_config == "Resection":
252
+ seg_config = ResectionSegConfig()
253
+ elif seg_config == "TCGA":
254
+ seg_config = TcgaSegConfig()
255
+ else:
256
+ raise ValueError(f"Unknown segmentation configuration: {seg_config}")
257
+
258
+ progress(0.0, desc="Segmenting tissue")
259
+ logger.info(f"Segmenting tissue for slide: {slide_path}")
260
+ if values := segment_tissue(
261
+ slide_path=slide_path,
262
+ patch_size=224,
263
+ mpp=0.5,
264
+ seg_level=-1,
265
+ segment_threshold=seg_config.segment_threshold,
266
+ median_blur_ksize=seg_config.median_blur_ksize,
267
+ morphology_ex_kernel=seg_config.morphology_ex_kernel,
268
+ tissue_area_threshold=seg_config.tissue_area_threshold,
269
+ hole_area_threshold=seg_config.hole_area_threshold,
270
+ max_num_holes=seg_config.max_num_holes,
271
+ ):
272
+ polygon, _, coords, attrs = values
273
+ else:
274
+ gr.Warning(f"No tissue detected in slide: {slide_path}")
275
+ return None, None, None
276
+ end_time = pd.Timestamp.now()
277
+ logger.info(f"Tissue segmentation took {end_time - start_time}")
278
+ logger.info(f"Found {len(coords)} tissue tiles")
279
+ progress(0.2, desc="Tissue segmented")
280
+
281
+ # Draw slide mask for visualization
282
+ logger.info("Drawing slide mask")
283
+ progress(0.25, desc="Drawing slide mask")
284
+ slide_mask = draw_slide_mask(
285
+ slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
286
+ )
287
+ logger.info("Slide mask drawn")
288
+
289
+ # Step 2-6: Run GPU-intensive operations (feature extraction and inference)
290
+ aeon_results, paladin_results = _run_gpu_inference(
291
+ coords,
292
+ slide_path,
293
+ attrs,
294
+ site_type,
295
+ cancer_subtype,
296
+ cancer_subtype_name_map,
297
+ num_workers,
298
+ progress,
299
+ )
300
+
301
  return slide_mask, aeon_results, paladin_results