copilot-swe-agent[bot] raylim commited on
Commit
b05124c
·
1 Parent(s): 6fcc1b9

Merge with main branch: refactor UI into separate modules and update tests

Browse files
ARCHITECTURE.md ADDED
@@ -0,0 +1 @@
 
 
1
+ 404: Not Found
src/mosaic/analysis.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import torch
3
+ import pandas as pd
4
+ import gradio as gr
5
+ from pathlib import Path
6
+ from mussel.models import ModelType
7
+ from mussel.utils import get_features, segment_tissue, filter_features
8
+ from mussel.utils.segment import draw_slide_mask
9
+ from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
10
+ from loguru import logger
11
+
12
+ from mosaic.inference import run_aeon, run_paladin
13
+
14
+
15
+ def analyze_slide(
16
+ slide_path,
17
+ seg_config,
18
+ site_type,
19
+ cancer_subtype,
20
+ cancer_subtype_name_map,
21
+ ihc_subtype="",
22
+ num_workers=4,
23
+ progress=gr.Progress(track_tqdm=True),
24
+ ):
25
+ if slide_path is None:
26
+ raise gr.Error("Please upload a slide.")
27
+ # Step 1: Segment tissue
28
+ start_time = pd.Timestamp.now()
29
+
30
+ if seg_config == "Biopsy":
31
+ seg_config = BiopsySegConfig()
32
+ elif seg_config == "Resection":
33
+ seg_config = ResectionSegConfig()
34
+ elif seg_config == "TCGA":
35
+ seg_config = TcgaSegConfig()
36
+ else:
37
+ raise ValueError(f"Unknown segmentation configuration: {seg_config}")
38
+
39
+ progress(0.0, desc="Segmenting tissue")
40
+ logger.info(f"Segmenting tissue for slide: {slide_path}")
41
+ if values := segment_tissue(
42
+ slide_path=slide_path,
43
+ patch_size=224,
44
+ mpp=0.5,
45
+ seg_level=-1,
46
+ segment_threshold=seg_config.segment_threshold,
47
+ median_blur_ksize=seg_config.median_blur_ksize,
48
+ morphology_ex_kernel=seg_config.morphology_ex_kernel,
49
+ tissue_area_threshold=seg_config.tissue_area_threshold,
50
+ hole_area_threshold=seg_config.hole_area_threshold,
51
+ max_num_holes=seg_config.max_num_holes,
52
+ ):
53
+ polygon, _, coords, attrs = values
54
+ else:
55
+ gr.Warning(f"No tissue detected in slide: {slide_path}")
56
+ return None, None, None
57
+ end_time = pd.Timestamp.now()
58
+ logger.info(f"Tissue segmentation took {end_time - start_time}")
59
+ logger.info(f"Found {len(coords)} tissue tiles")
60
+ progress(0.2, desc="Tissue segmented")
61
+
62
+ # Draw slide mask for visualization
63
+ logger.info("Drawing slide mask")
64
+ progress(0.25, desc="Drawing slide mask")
65
+ slide_mask = draw_slide_mask(
66
+ slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
67
+ )
68
+ logger.info("Slide mask drawn")
69
+
70
+ # Step 2: Extract features with CTransPath
71
+ start_time = pd.Timestamp.now()
72
+ progress(0.3, desc="Extracting CTransPath features")
73
+ logger.info("Extracting CTransPath features")
74
+ ctranspath_features, _ = get_features(
75
+ coords,
76
+ slide_path,
77
+ attrs,
78
+ model_type=ModelType.CTRANSPATH,
79
+ model_path="data/ctranspath.pth",
80
+ num_workers=num_workers,
81
+ batch_size=64,
82
+ use_gpu=True,
83
+ )
84
+ end_time = pd.Timestamp.now()
85
+ max_gpu_memory = (
86
+ torch.cuda.max_memory_allocated() / (1024**3)
87
+ if torch.cuda.is_available()
88
+ else 0
89
+ )
90
+ logger.info(
91
+ f"CTransPath Feature extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
92
+ )
93
+
94
+ torch.cuda.reset_peak_memory_stats()
95
+
96
+ # Step 3: Filter features using marker classifier
97
+ start_time = pd.Timestamp.now()
98
+ marker_classifier = pickle.load(open("data/marker_classifier.pkl", "rb"))
99
+ progress(0.35, desc="Filtering features with marker classifier")
100
+ logger.info("Filtering features with marker classifier")
101
+ _, filtered_coords = filter_features(
102
+ ctranspath_features,
103
+ coords,
104
+ marker_classifier,
105
+ threshold=0.25,
106
+ )
107
+ end_time = pd.Timestamp.now()
108
+ logger.info(f"Feature filtering took {end_time - start_time}")
109
+ logger.info(
110
+ f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
111
+ )
112
+
113
+ # Step 4: Extract features with Optimus on filtered coords
114
+ start_time = pd.Timestamp.now()
115
+ progress(0.4, desc="Extracting Optimus features")
116
+ logger.info("Extracting Optimus features")
117
+ features, _ = get_features(
118
+ filtered_coords,
119
+ slide_path,
120
+ attrs,
121
+ model_type=ModelType.OPTIMUS,
122
+ model_path="data/optimus.pkl",
123
+ num_workers=num_workers,
124
+ batch_size=64,
125
+ use_gpu=True,
126
+ )
127
+ end_time = pd.Timestamp.now()
128
+ max_gpu_memory = (
129
+ torch.cuda.max_memory_allocated() / (1024**3)
130
+ if torch.cuda.is_available()
131
+ else 0
132
+ )
133
+ logger.info(
134
+ f"Optimus Feature extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
135
+ )
136
+
137
+ torch.cuda.reset_peak_memory_stats()
138
+
139
+ # Step 3: Run Aeon to predict histology if not supplied
140
+ if cancer_subtype == "Unknown":
141
+ start_time = pd.Timestamp.now()
142
+ progress(0.9, desc="Running Aeon for cancer subtype inference")
143
+ logger.info("Running Aeon for cancer subtype inference")
144
+ aeon_results, _ = run_aeon(
145
+ features=features,
146
+ model_path="data/aeon_model.pkl",
147
+ metastatic=(site_type == "Metastatic"),
148
+ batch_size=8,
149
+ num_workers=num_workers,
150
+ use_cpu=False,
151
+ )
152
+ end_time = pd.Timestamp.now()
153
+ max_gpu_memory = (
154
+ torch.cuda.max_memory_allocated() / (1024**3)
155
+ if torch.cuda.is_available()
156
+ else 0
157
+ )
158
+ logger.info(
159
+ f"Aeon inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
160
+ )
161
+ torch.cuda.reset_peak_memory_stats()
162
+ else:
163
+ cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
164
+ aeon_results = pd.DataFrame(
165
+ {
166
+ "Cancer Subtype": [cancer_subtype_code],
167
+ "Confidence": [1.0],
168
+ }
169
+ )
170
+ logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
171
+
172
+ # Step 4: Run Paladin to predict biomarkers
173
+ if len(aeon_results) == 0:
174
+ logger.warning("No Aeon results, skipping Paladin inference")
175
+ return slide_mask, None, None
176
+ start_time = pd.Timestamp.now()
177
+ progress(0.95, desc="Running Paladin for biomarker inference")
178
+ logger.info("Running Paladin for biomarker inference")
179
+ paladin_results = run_paladin(
180
+ features=features,
181
+ model_map_path="data/paladin_model_map.csv",
182
+ aeon_results=aeon_results,
183
+ metastatic=(site_type == "Metastatic"),
184
+ batch_size=8,
185
+ num_workers=num_workers,
186
+ use_cpu=False,
187
+ )
188
+ end_time = pd.Timestamp.now()
189
+ max_gpu_memory = (
190
+ torch.cuda.max_memory_allocated() / (1024**3)
191
+ if torch.cuda.is_available()
192
+ else 0
193
+ )
194
+ logger.info(
195
+ f"Paladin inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
196
+ )
197
+
198
+ aeon_results.set_index("Cancer Subtype", inplace=True)
199
+
200
+ return slide_mask, aeon_results, paladin_results
src/mosaic/gradio_app.py CHANGED
@@ -1,57 +1,22 @@
1
  from argparse import ArgumentParser
2
- import gradio as gr
3
  import pandas as pd
4
- import pickle
5
- from mussel.models import ModelType
6
- from mussel.utils import get_features, segment_tissue, filter_features
7
- from mussel.utils.segment import draw_slide_mask
8
- from mussel.cli.tessellate import BiopsySegConfig, ResectionSegConfig, TcgaSegConfig
9
- import torch
10
  from pathlib import Path
11
  from huggingface_hub import snapshot_download
12
- import tempfile
13
- import requests
14
-
15
- from mosaic.inference import run_aeon, run_paladin
16
  from loguru import logger
17
 
18
- current_dir = Path(__file__).parent
19
-
20
- # This path should be outside your project directory if running locally
21
- TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
22
-
23
- IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
24
-
25
- SETTINGS_COLUMNS = [
26
- "Slide",
27
- "Site Type",
28
- "Cancer Subtype",
29
- "IHC Subtype",
30
- "Segmentation Config",
31
- ]
32
-
33
- oncotree_code_map = {}
34
-
35
-
36
- def get_oncotree_code_name(code):
37
- global oncotree_code_map
38
- if code in oncotree_code_map.keys():
39
- return oncotree_code_map[code]
40
-
41
- url = f"https://oncotree.mskcc.org/api/tumorTypes/search/code/{code}?exactMatch=true&version=oncotree_2025_04_08"
42
- response = requests.get(url)
43
- code_name = "Unknown"
44
- if response.status_code == 200:
45
- data = response.json()
46
- if data:
47
- code_name = data[0]["name"]
48
-
49
- oncotree_code_map[code] = code_name
50
- return code_name
51
 
52
 
53
  def download_and_process_models():
54
- global cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
55
  snapshot_download(repo_id="PDM-Group/paladin-aeon-models", local_dir="data")
56
 
57
  model_map = pd.read_csv(
@@ -65,594 +30,14 @@ def download_and_process_models():
65
  reversed_cancer_subtype_name_map = {
66
  value: key for key, value in cancer_subtype_name_map.items()
67
  }
 
 
 
 
 
68
 
69
 
70
- def create_user_directory(state, request: gr.Request):
71
- """Create a unique directory for each user session."""
72
- session_hash = request.session_hash
73
- if session_hash is None:
74
- return None, None
75
-
76
- user_dir = TEMP_USER_DATA_DIR / session_hash
77
- user_dir.mkdir(parents=True, exist_ok=True)
78
- return user_dir
79
-
80
-
81
- def load_settings(slide_csv_path):
82
- """Load settings from CSV file and validate columns."""
83
- settings_df = pd.read_csv(slide_csv_path, na_filter=False)
84
- if "Segmentation Config" not in settings_df.columns:
85
- settings_df["Segmentation Config"] = "Biopsy"
86
- if "Cancer Subtype" not in settings_df.columns:
87
- settings_df["Cancer Subtype"] = "Unknown"
88
- if "IHC Subtype" not in settings_df.columns:
89
- settings_df["IHC Subtype"] = ""
90
- if not set(SETTINGS_COLUMNS).issubset(settings_df.columns):
91
- raise ValueError("Missing required column in CSV file")
92
- settings_df = settings_df[SETTINGS_COLUMNS]
93
- return settings_df
94
-
95
-
96
- def validate_settings(settings_df):
97
- """Validate settings DataFrame and provide warnings for invalid entries."""
98
- settings_df.columns = SETTINGS_COLUMNS
99
- warnings = []
100
- for idx, row in settings_df.iterrows():
101
- slide_name = row["Slide"]
102
- subtype = row["Cancer Subtype"]
103
- if subtype in cancer_subtypes:
104
- settings_df.at[idx, "Cancer Subtype"] = reversed_cancer_subtype_name_map[
105
- subtype
106
- ]
107
- if settings_df.at[idx, "Cancer Subtype"] not in cancer_subtype_name_map.keys():
108
- warnings.append(
109
- f"Slide {slide_name}: Unknown cancer subtype. Valid subtypes are: {', '.join(cancer_subtype_name_map.keys())}. "
110
- )
111
- settings_df.at[idx, "Cancer Subtype"] = "Unknown"
112
- if row["Site Type"] not in ["Metastatic", "Primary"]:
113
- warnings.append(
114
- f"Slide {slide_name}: Unknown site type. Valid types are: Metastatic, Primary. "
115
- )
116
- settings_df.at[idx, "Site Type"] = "Primary"
117
- if (
118
- "Breast" not in settings_df.at[idx, "Cancer Subtype"]
119
- and row["IHC Subtype"] != ""
120
- ):
121
- warnings.append(
122
- f"Slide {slide_name}: IHC subtype should be empty for non-breast cancer subtypes. "
123
- )
124
- settings_df.at[idx, "IHC Subtype"] = ""
125
- if row["IHC Subtype"] not in IHC_SUBTYPES:
126
- warnings.append(
127
- f"Slide {slide_name}: Unknown IHC subtype. Valid subtypes are: {', '.join(IHC_SUBTYPES)}. "
128
- )
129
- settings_df.at[idx, "IHC Subtype"] = ""
130
- if row["Segmentation Config"] not in ["Biopsy", "Resection", "TCGA"]:
131
- warnings.append(
132
- f"Slide {slide_name}: Unknown segmentation config. Valid configs are: Biopsy, Resection, TCGA. "
133
- )
134
- settings_df.at[idx, "Segmentation Config"] = "Biopsy"
135
-
136
- if warnings:
137
- gr.Warning("\n".join(warnings))
138
-
139
- return settings_df
140
-
141
-
142
- def export_to_csv(df):
143
- if df is None or df.empty:
144
- raise gr.Error("No data to export.")
145
- csv_path = "paladin_results.csv"
146
- df.to_csv(csv_path, index=False)
147
- return csv_path
148
-
149
-
150
- def analyze_slides(
151
- slides,
152
- settings_input,
153
- user_dir,
154
- progress=gr.Progress(track_tqdm=True),
155
- ):
156
- if slides is None or len(slides) == 0:
157
- raise gr.Error("Please upload at least one slide.")
158
- if user_dir is None:
159
- user_dir = create_user_directory(None, gr.Request())
160
- settings_input = validate_settings(settings_input)
161
- if len(slides) != len(settings_input):
162
- raise gr.Error("Missing settings for uploaded slides")
163
-
164
- all_slide_masks = []
165
- all_aeon_results = []
166
- all_paladin_results = []
167
-
168
- progress(0.0, desc="Starting analysis")
169
- for idx, row in settings_input.iterrows():
170
- slide_name = row["Slide"]
171
- progress(
172
- idx / len(settings_input),
173
- desc=f"Analyzing {slide_name}, slide {idx + 1} of {len(settings_input)}",
174
- )
175
- for x in slides:
176
- s = x.split("/")[-1]
177
- if s == slide_name:
178
- slide_mask = x
179
-
180
- (
181
- slide_mask,
182
- aeon_results,
183
- paladin_results,
184
- ) = analyze_slide(
185
- slides[idx],
186
- row["Segmentation Config"],
187
- row["Site Type"],
188
- row["Cancer Subtype"],
189
- row["IHC Subtype"],
190
- progress=progress,
191
- )
192
- if aeon_results is not None:
193
- if len(slides) > 1:
194
- aeon_results.columns = [f"{slide_name}"]
195
- if row["Cancer Subtype"] == "Unknown":
196
- all_aeon_results.append(aeon_results)
197
- if paladin_results is not None:
198
- paladin_results.insert(
199
- 0, "Slide", pd.Series([slide_name] * len(paladin_results))
200
- )
201
- all_paladin_results.append(paladin_results)
202
- if slide_mask is not None:
203
- all_slide_masks.append((slide_mask, slide_name))
204
- # yield slide_mask, None, None, None # Yield intermediate results
205
- progress(0.99, desc="Analysis complete, wrapping up results")
206
-
207
- timestamp = pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
208
- combined_paladin_results = (
209
- pd.concat(all_paladin_results, ignore_index=True)
210
- if all_paladin_results
211
- else pd.DataFrame()
212
- )
213
- combined_aeon_results = gr.DataFrame(visible=False)
214
- aeon_output = gr.DownloadButton(visible=False)
215
- if all_aeon_results:
216
- combined_aeon_results = pd.concat(all_aeon_results, axis=1)
217
- combined_aeon_results.reset_index(inplace=True)
218
-
219
- combined_aeon_results = combined_aeon_results.round(3)
220
- cancer_subtype_names = [
221
- f"{get_oncotree_code_name(code)} ({code})"
222
- for code in combined_aeon_results["Cancer Subtype"]
223
- ]
224
- combined_aeon_results["Cancer Subtype"] = cancer_subtype_names
225
-
226
- aeon_output_path = user_dir / f"aeon_results-{timestamp}.csv"
227
- combined_aeon_results.to_csv(aeon_output_path)
228
-
229
- combined_aeon_results = gr.DataFrame(
230
- combined_aeon_results,
231
- visible=True,
232
- column_widths=["4px"] + ["2px"] * (combined_aeon_results.shape[1] - 1),
233
- )
234
- aeon_output = gr.DownloadButton(value=aeon_output_path, visible=True)
235
-
236
- # Convert Oncotree codes to names for display
237
- cancer_subtype_names = [
238
- f"{get_oncotree_code_name(code)} ({code})"
239
- for code in combined_paladin_results["Cancer Subtype"]
240
- ]
241
- combined_paladin_results["Cancer Subtype"] = cancer_subtype_names
242
- if len(combined_paladin_results) > 0:
243
- combined_paladin_results["Score"] = combined_paladin_results["Score"].round(3)
244
-
245
- paladin_output = gr.DownloadButton(visible=False)
246
- if len(combined_paladin_results) > 0:
247
- paladin_output_path = user_dir / f"paladin_results-{timestamp}.csv"
248
- combined_paladin_results.to_csv(paladin_output_path, index=False)
249
- paladin_output = gr.DownloadButton(value=paladin_output_path, visible=True)
250
-
251
- progress(1.0, desc="All done!")
252
-
253
- return (
254
- all_slide_masks,
255
- combined_aeon_results,
256
- aeon_output,
257
- combined_paladin_results if len(combined_paladin_results) > 0 else None,
258
- paladin_output,
259
- user_dir,
260
- )
261
-
262
-
263
- def analyze_slide(
264
- slide_path,
265
- seg_config,
266
- site_type,
267
- cancer_subtype,
268
- ihc_subtype="",
269
- num_workers=4,
270
- progress=gr.Progress(track_tqdm=True),
271
- ):
272
- if slide_path is None:
273
- raise gr.Error("Please upload a slide.")
274
- # Step 1: Segment tissue
275
- start_time = pd.Timestamp.now()
276
-
277
- if seg_config == "Biopsy":
278
- seg_config = BiopsySegConfig()
279
- elif seg_config == "Resection":
280
- seg_config = ResectionSegConfig()
281
- elif seg_config == "TCGA":
282
- seg_config = TcgaSegConfig()
283
- else:
284
- raise ValueError(f"Unknown segmentation configuration: {seg_config}")
285
-
286
- progress(0.0, desc="Segmenting tissue")
287
- logger.info(f"Segmenting tissue for slide: {slide_path}")
288
- if values := segment_tissue(
289
- slide_path=slide_path,
290
- patch_size=224,
291
- mpp=0.5,
292
- seg_level=-1,
293
- segment_threshold=seg_config.segment_threshold,
294
- median_blur_ksize=seg_config.median_blur_ksize,
295
- morphology_ex_kernel=seg_config.morphology_ex_kernel,
296
- tissue_area_threshold=seg_config.tissue_area_threshold,
297
- hole_area_threshold=seg_config.hole_area_threshold,
298
- max_num_holes=seg_config.max_num_holes,
299
- ):
300
- polygon, _, coords, attrs = values
301
- else:
302
- gr.Warning(f"No tissue detected in slide: {slide_path}")
303
- return None, None, None
304
- end_time = pd.Timestamp.now()
305
- logger.info(f"Tissue segmentation took {end_time - start_time}")
306
- logger.info(f"Found {len(coords)} tissue tiles")
307
- progress(0.2, desc="Tissue segmented")
308
-
309
- # Draw slide mask for visualization
310
- logger.info("Drawing slide mask")
311
- progress(0.25, desc="Drawing slide mask")
312
- slide_mask = draw_slide_mask(
313
- slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
314
- )
315
- logger.info("Slide mask drawn")
316
-
317
- # Step 2: Extract features with CTransPath
318
- start_time = pd.Timestamp.now()
319
- progress(0.3, desc="Extracting CTransPath features")
320
- logger.info("Extracting CTransPath features")
321
- ctranspath_features, _ = get_features(
322
- coords,
323
- slide_path,
324
- attrs,
325
- model_type=ModelType.CTRANSPATH,
326
- model_path="data/ctranspath.pth",
327
- num_workers=num_workers,
328
- batch_size=64,
329
- use_gpu=True,
330
- )
331
- end_time = pd.Timestamp.now()
332
- max_gpu_memory = (
333
- torch.cuda.max_memory_allocated() / (1024**3)
334
- if torch.cuda.is_available()
335
- else 0
336
- )
337
- logger.info(
338
- f"CTransPath Feature extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
339
- )
340
-
341
- torch.cuda.reset_peak_memory_stats()
342
-
343
- # Step 3: Filter features using marker classifier
344
- start_time = pd.Timestamp.now()
345
- marker_classifier = pickle.load(open("data/marker_classifier.pkl", "rb"))
346
- progress(0.35, desc="Filtering features with marker classifier")
347
- logger.info("Filtering features with marker classifier")
348
- _, filtered_coords = filter_features(
349
- ctranspath_features,
350
- coords,
351
- marker_classifier,
352
- threshold=0.25,
353
- )
354
- end_time = pd.Timestamp.now()
355
- logger.info(f"Feature filtering took {end_time - start_time}")
356
- logger.info(
357
- f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
358
- )
359
-
360
- # Step 4: Extract features with Optimus on filtered coords
361
- start_time = pd.Timestamp.now()
362
- progress(0.4, desc="Extracting Optimus features")
363
- logger.info("Extracting Optimus features")
364
- features, _ = get_features(
365
- filtered_coords,
366
- slide_path,
367
- attrs,
368
- model_type=ModelType.OPTIMUS,
369
- model_path="data/optimus.pkl",
370
- num_workers=num_workers,
371
- batch_size=64,
372
- use_gpu=True,
373
- )
374
- end_time = pd.Timestamp.now()
375
- max_gpu_memory = (
376
- torch.cuda.max_memory_allocated() / (1024**3)
377
- if torch.cuda.is_available()
378
- else 0
379
- )
380
- logger.info(
381
- f"Optimus Feature extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
382
- )
383
-
384
- torch.cuda.reset_peak_memory_stats()
385
-
386
- # Step 3: Run Aeon to predict histology if not supplied
387
- if cancer_subtype == "Unknown":
388
- start_time = pd.Timestamp.now()
389
- progress(0.9, desc="Running Aeon for cancer subtype inference")
390
- logger.info("Running Aeon for cancer subtype inference")
391
- aeon_results, _ = run_aeon(
392
- features=features,
393
- model_path="data/aeon_model.pkl",
394
- metastatic=(site_type == "Metastatic"),
395
- batch_size=8,
396
- num_workers=num_workers,
397
- use_cpu=False,
398
- )
399
- end_time = pd.Timestamp.now()
400
- max_gpu_memory = (
401
- torch.cuda.max_memory_allocated() / (1024**3)
402
- if torch.cuda.is_available()
403
- else 0
404
- )
405
- logger.info(
406
- f"Aeon inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
407
- )
408
- torch.cuda.reset_peak_memory_stats()
409
- else:
410
- cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
411
- aeon_results = pd.DataFrame(
412
- {
413
- "Cancer Subtype": [cancer_subtype_code],
414
- "Confidence": [1.0],
415
- }
416
- )
417
- logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
418
-
419
- # Step 4: Run Paladin to predict biomarkers
420
- if len(aeon_results) == 0:
421
- logger.warning("No Aeon results, skipping Paladin inference")
422
- return slide_mask, None, None
423
- start_time = pd.Timestamp.now()
424
- progress(0.95, desc="Running Paladin for biomarker inference")
425
- logger.info("Running Paladin for biomarker inference")
426
- paladin_results = run_paladin(
427
- features=features,
428
- model_map_path="data/paladin_model_map.csv",
429
- aeon_results=aeon_results,
430
- metastatic=(site_type == "Metastatic"),
431
- batch_size=8,
432
- num_workers=num_workers,
433
- use_cpu=False,
434
- )
435
- end_time = pd.Timestamp.now()
436
- max_gpu_memory = (
437
- torch.cuda.max_memory_allocated() / (1024**3)
438
- if torch.cuda.is_available()
439
- else 0
440
- )
441
- logger.info(
442
- f"Paladin inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
443
- )
444
-
445
- aeon_results.set_index("Cancer Subtype", inplace=True)
446
-
447
- return slide_mask, aeon_results, paladin_results
448
-
449
-
450
- def launch_gradio(server_name, server_port, share):
451
- with gr.Blocks(title="Mosaic") as demo:
452
- user_dir_state = gr.State(None)
453
- gr.Markdown(
454
- "# Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference"
455
- )
456
- gr.Markdown(
457
- "Upload an H&E whole slide image in SVS or TIFF format. The slide will be processed to infer cancer subtype and relevant biomarkers."
458
- )
459
- with gr.Row():
460
- with gr.Column():
461
- input_slides = gr.File(
462
- label="Upload H&E Whole Slide Image",
463
- file_types=[".svs", ".tiff", ".tif"],
464
- file_count="multiple",
465
- )
466
- site_dropdown = gr.Dropdown(
467
- choices=["Primary", "Metastatic"],
468
- label="Site Type",
469
- value="Primary",
470
- )
471
- cancer_subtype_dropdown = gr.Dropdown(
472
- choices=[name for name in cancer_subtype_name_map.keys()],
473
- label="Cancer Subtype",
474
- value="Unknown",
475
- )
476
- ihc_subtype_dropdown = gr.Dropdown(
477
- choices=IHC_SUBTYPES,
478
- label="IHC Subtype (if applicable)",
479
- value="",
480
- visible=False,
481
- )
482
- seg_config_dropdown = gr.Dropdown(
483
- choices=["Biopsy", "Resection", "TCGA"],
484
- label="Segmentation Config",
485
- value="Biopsy",
486
- )
487
- with gr.Row():
488
- settings_input = gr.Dataframe(
489
- headers=[
490
- "Slide",
491
- "Site Type",
492
- "Cancer Subtype",
493
- "IHC Subtype",
494
- "Segmentation Config",
495
- ],
496
- label="Current Settings",
497
- datatype=["str", "str", "str", "str", "str"],
498
- visible=False,
499
- interactive=True,
500
- static_columns="Slide",
501
- )
502
 
503
- with gr.Row():
504
- settings_csv = gr.File(
505
- file_types=[".csv"], label="Upload Settings CSV", visible=False
506
- )
507
-
508
- with gr.Row():
509
- clear_button = gr.Button("Clear")
510
- analyze_button = gr.Button("Analyze", variant="primary")
511
- with gr.Column():
512
- slide_masks = gr.Gallery(
513
- label="Slide Masks",
514
- columns=3,
515
- object_fit="contain",
516
- height="auto",
517
- )
518
- aeon_output_table = gr.Dataframe(
519
- headers=["Cancer Subtype", "Slide Name"],
520
- label="Cancer Subtype Inference Confidence",
521
- datatype=["str", "number"],
522
- visible=False,
523
- )
524
- aeon_download_button = gr.DownloadButton(
525
- "Download Aeon Results as CSV",
526
- label="Download Results",
527
- visible=False,
528
- )
529
- paladin_output_table = gr.Dataframe(
530
- headers=["Slide", "Cancer Subtype", "Biomarker", "Score"],
531
- label="Biomarker Inference",
532
- datatype=["str", "str", "str", "number"],
533
- )
534
- paladin_download_button = gr.DownloadButton(
535
- "Download Paladin Results as CSV",
536
- label="Download Results",
537
- visible=False,
538
- )
539
-
540
- @clear_button.click(
541
- outputs=[
542
- input_slides,
543
- slide_masks,
544
- paladin_output_table,
545
- paladin_download_button,
546
- aeon_output_table,
547
- aeon_download_button,
548
- settings_input,
549
- settings_csv,
550
- ],
551
- )
552
- def clear_fn():
553
- return (
554
- None,
555
- None,
556
- None,
557
- None,
558
- gr.Dataframe(visible=False),
559
- gr.DownloadButton(visible=False),
560
- gr.Dataframe(visible=False),
561
- gr.File(visible=False),
562
- )
563
-
564
- def get_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
565
- if files is None:
566
- return pd.DataFrame()
567
- settings = []
568
- for file in files:
569
- filename = file.name if hasattr(file, "name") else file
570
- slide_name = filename.split("/")[-1]
571
- settings.append(
572
- [slide_name, site_type, cancer_subtype, ihc_subtype, seg_config]
573
- )
574
- df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
575
- return df
576
-
577
- # Only display settings table and upload button if multiple slides are uploaded
578
- @gr.on(
579
- [
580
- input_slides.change,
581
- site_dropdown.change,
582
- cancer_subtype_dropdown.change,
583
- ihc_subtype_dropdown.change,
584
- seg_config_dropdown.change,
585
- ],
586
- inputs=[
587
- input_slides,
588
- site_dropdown,
589
- cancer_subtype_dropdown,
590
- ihc_subtype_dropdown,
591
- seg_config_dropdown,
592
- ],
593
- outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
594
- )
595
- def update_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
596
- has_ihc = "Breast" in cancer_subtype
597
- if not files:
598
- return None, None, gr.Dropdown(visible=has_ihc)
599
- settings_df = get_settings(
600
- files, site_type, cancer_subtype, ihc_subtype, seg_config
601
- )
602
- if settings_df is not None:
603
- has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
604
- visible = files and len(files) > 1
605
- return (
606
- gr.Dataframe(settings_df, visible=visible),
607
- gr.File(visible=visible),
608
- gr.Dropdown(visible=has_ihc),
609
- )
610
-
611
- @settings_csv.upload(
612
- inputs=[settings_csv],
613
- outputs=[settings_input],
614
- )
615
- def read_settings(file):
616
- if file is None:
617
- return None
618
- df = load_settings(file.name if hasattr(file, "name") else file)
619
- return gr.Dataframe(df, visible=True)
620
-
621
- analyze_button.click(
622
- analyze_slides,
623
- inputs=[
624
- input_slides,
625
- settings_input,
626
- user_dir_state,
627
- ],
628
- outputs=[
629
- slide_masks,
630
- aeon_output_table,
631
- aeon_download_button,
632
- paladin_output_table,
633
- paladin_download_button,
634
- user_dir_state,
635
- ],
636
- queue=True,
637
- show_progress_on=paladin_output_table,
638
- )
639
- settings_input.change(
640
- validate_settings, inputs=[settings_input], outputs=[settings_input]
641
- )
642
- demo.load(
643
- create_user_directory,
644
- inputs=[user_dir_state],
645
- outputs=[user_dir_state],
646
- )
647
-
648
- demo.queue(max_size=10, default_concurrency_limit=8)
649
- demo.launch(
650
- server_name=server_name,
651
- share=share,
652
- server_port=server_port,
653
- show_error=True,
654
- favicon_path=current_dir / "favicon.svg",
655
- )
656
 
657
 
658
  def main():
@@ -718,7 +103,7 @@ def main():
718
  logger.add("debug.log", level="DEBUG")
719
  logger.debug("Debug logging enabled")
720
 
721
- download_and_process_models()
722
 
723
  if args.slide_path and not args.slide_csv:
724
  # Single slide processing mode
@@ -736,12 +121,13 @@ def main():
736
  ],
737
  columns=SETTINGS_COLUMNS,
738
  )
739
- settings_df = validate_settings(settings_df)
740
  slide_mask, aeon_results, paladin_results = analyze_slide(
741
  args.slide_path,
742
  args.segmentation_config,
743
  args.site_type,
744
  args.cancer_subtype,
 
745
  args.ihc_subtype,
746
  num_workers=args.num_workers,
747
  )
@@ -770,7 +156,7 @@ def main():
770
  all_paladin_results = []
771
  all_aeon_results = []
772
  settings_df = load_settings(args.slide_csv)
773
- settings_df = validate_settings(settings_df)
774
  for idx, row in settings_df.iterrows():
775
  slide_path = row["Slide"]
776
  seg_config = row["Segmentation Config"]
@@ -785,6 +171,7 @@ def main():
785
  seg_config,
786
  site_type,
787
  cancer_subtype,
 
788
  ihc_subtype,
789
  num_workers=args.num_workers,
790
  )
 
1
  from argparse import ArgumentParser
 
2
  import pandas as pd
 
 
 
 
 
 
3
  from pathlib import Path
4
  from huggingface_hub import snapshot_download
 
 
 
 
5
  from loguru import logger
6
 
7
+ from mosaic.ui import launch_gradio
8
+ from mosaic.ui.app import set_cancer_subtype_maps
9
+ from mosaic.ui.utils import (
10
+ get_oncotree_code_name,
11
+ load_settings,
12
+ validate_settings,
13
+ IHC_SUBTYPES,
14
+ SETTINGS_COLUMNS,
15
+ )
16
+ from mosaic.analysis import analyze_slide
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  def download_and_process_models():
 
20
  snapshot_download(repo_id="PDM-Group/paladin-aeon-models", local_dir="data")
21
 
22
  model_map = pd.read_csv(
 
30
  reversed_cancer_subtype_name_map = {
31
  value: key for key, value in cancer_subtype_name_map.items()
32
  }
33
+
34
+ # Set the global maps in the UI module
35
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
36
+
37
+ return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
 
43
  def main():
 
103
  logger.add("debug.log", level="DEBUG")
104
  logger.debug("Debug logging enabled")
105
 
106
+ cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes = download_and_process_models()
107
 
108
  if args.slide_path and not args.slide_csv:
109
  # Single slide processing mode
 
121
  ],
122
  columns=SETTINGS_COLUMNS,
123
  )
124
+ settings_df = validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map)
125
  slide_mask, aeon_results, paladin_results = analyze_slide(
126
  args.slide_path,
127
  args.segmentation_config,
128
  args.site_type,
129
  args.cancer_subtype,
130
+ cancer_subtype_name_map,
131
  args.ihc_subtype,
132
  num_workers=args.num_workers,
133
  )
 
156
  all_paladin_results = []
157
  all_aeon_results = []
158
  settings_df = load_settings(args.slide_csv)
159
+ settings_df = validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map)
160
  for idx, row in settings_df.iterrows():
161
  slide_path = row["Slide"]
162
  seg_config = row["Segmentation Config"]
 
171
  seg_config,
172
  site_type,
173
  cancer_subtype,
174
+ cancer_subtype_name_map,
175
  ihc_subtype,
176
  num_workers=args.num_workers,
177
  )
src/mosaic/ui/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .app import launch_gradio
2
+
3
+ __all__ = ["launch_gradio"]
src/mosaic/ui/app.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from pathlib import Path
4
+ from loguru import logger
5
+
6
+ from mosaic.ui.utils import (
7
+ get_oncotree_code_name,
8
+ create_user_directory,
9
+ load_settings,
10
+ validate_settings,
11
+ IHC_SUBTYPES,
12
+ SETTINGS_COLUMNS,
13
+ )
14
+ from mosaic.analysis import analyze_slide
15
+
16
+ current_dir = Path(__file__).parent.parent
17
+
18
+ # Global variables for cancer subtypes (set by download_and_process_models)
19
+ cancer_subtype_name_map = {}
20
+ reversed_cancer_subtype_name_map = {}
21
+ cancer_subtypes = []
22
+
23
+
24
+ def set_cancer_subtype_maps(csn_map, rcsn_map, cs):
25
+ """Set the global cancer subtype maps."""
26
+ global cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
27
+ cancer_subtype_name_map = csn_map
28
+ reversed_cancer_subtype_name_map = rcsn_map
29
+ cancer_subtypes = cs
30
+
31
+
32
+ def analyze_slides(
33
+ slides,
34
+ settings_input,
35
+ user_dir,
36
+ progress=gr.Progress(track_tqdm=True),
37
+ ):
38
+ if slides is None or len(slides) == 0:
39
+ raise gr.Error("Please upload at least one slide.")
40
+ if user_dir is None:
41
+ user_dir = create_user_directory(None, gr.Request())
42
+ settings_input = validate_settings(
43
+ settings_input, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
44
+ )
45
+ if len(slides) != len(settings_input):
46
+ raise gr.Error("Missing settings for uploaded slides")
47
+
48
+ all_slide_masks = []
49
+ all_aeon_results = []
50
+ all_paladin_results = []
51
+
52
+ progress(0.0, desc="Starting analysis")
53
+ for idx, row in settings_input.iterrows():
54
+ slide_name = row["Slide"]
55
+ progress(
56
+ idx / len(settings_input),
57
+ desc=f"Analyzing {slide_name}, slide {idx + 1} of {len(settings_input)}",
58
+ )
59
+ for x in slides:
60
+ s = x.split("/")[-1]
61
+ if s == slide_name:
62
+ slide_mask = x
63
+
64
+ (
65
+ slide_mask,
66
+ aeon_results,
67
+ paladin_results,
68
+ ) = analyze_slide(
69
+ slides[idx],
70
+ row["Segmentation Config"],
71
+ row["Site Type"],
72
+ row["Cancer Subtype"],
73
+ cancer_subtype_name_map,
74
+ row["IHC Subtype"],
75
+ progress=progress,
76
+ )
77
+ if aeon_results is not None:
78
+ if len(slides) > 1:
79
+ aeon_results.columns = [f"{slide_name}"]
80
+ if row["Cancer Subtype"] == "Unknown":
81
+ all_aeon_results.append(aeon_results)
82
+ if paladin_results is not None:
83
+ paladin_results.insert(
84
+ 0, "Slide", pd.Series([slide_name] * len(paladin_results))
85
+ )
86
+ all_paladin_results.append(paladin_results)
87
+ if slide_mask is not None:
88
+ all_slide_masks.append((slide_mask, slide_name))
89
+ progress(0.99, desc="Analysis complete, wrapping up results")
90
+
91
+ timestamp = pd.Timestamp.now().strftime("%Y%m%d-%H%M%S")
92
+ combined_paladin_results = (
93
+ pd.concat(all_paladin_results, ignore_index=True)
94
+ if all_paladin_results
95
+ else pd.DataFrame()
96
+ )
97
+ combined_aeon_results = gr.DataFrame(visible=False)
98
+ aeon_output = gr.DownloadButton(visible=False)
99
+ if all_aeon_results:
100
+ combined_aeon_results = pd.concat(all_aeon_results, axis=1)
101
+ combined_aeon_results.reset_index(inplace=True)
102
+
103
+ combined_aeon_results = combined_aeon_results.round(3)
104
+ cancer_subtype_names = [
105
+ f"{get_oncotree_code_name(code)} ({code})"
106
+ for code in combined_aeon_results["Cancer Subtype"]
107
+ ]
108
+ combined_aeon_results["Cancer Subtype"] = cancer_subtype_names
109
+
110
+ aeon_output_path = user_dir / f"aeon_results-{timestamp}.csv"
111
+ combined_aeon_results.to_csv(aeon_output_path)
112
+
113
+ combined_aeon_results = gr.DataFrame(
114
+ combined_aeon_results,
115
+ visible=True,
116
+ column_widths=["4px"] + ["2px"] * (combined_aeon_results.shape[1] - 1),
117
+ )
118
+ aeon_output = gr.DownloadButton(value=aeon_output_path, visible=True)
119
+
120
+ # Convert Oncotree codes to names for display
121
+ cancer_subtype_names = [
122
+ f"{get_oncotree_code_name(code)} ({code})"
123
+ for code in combined_paladin_results["Cancer Subtype"]
124
+ ]
125
+ combined_paladin_results["Cancer Subtype"] = cancer_subtype_names
126
+ if len(combined_paladin_results) > 0:
127
+ combined_paladin_results["Score"] = combined_paladin_results["Score"].round(3)
128
+
129
+ paladin_output = gr.DownloadButton(visible=False)
130
+ if len(combined_paladin_results) > 0:
131
+ paladin_output_path = user_dir / f"paladin_results-{timestamp}.csv"
132
+ combined_paladin_results.to_csv(paladin_output_path, index=False)
133
+ paladin_output = gr.DownloadButton(value=paladin_output_path, visible=True)
134
+
135
+ progress(1.0, desc="All done!")
136
+
137
+ return (
138
+ all_slide_masks,
139
+ combined_aeon_results,
140
+ aeon_output,
141
+ combined_paladin_results if len(combined_paladin_results) > 0 else None,
142
+ paladin_output,
143
+ user_dir,
144
+ )
145
+
146
+
147
+ def launch_gradio(server_name, server_port, share):
148
+ with gr.Blocks(title="Mosaic") as demo:
149
+ user_dir_state = gr.State(None)
150
+ gr.Markdown(
151
+ "# Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference"
152
+ )
153
+ gr.Markdown(
154
+ "Upload an H&E whole slide image in SVS or TIFF format. The slide will be processed to infer cancer subtype and relevant biomarkers."
155
+ )
156
+ with gr.Row():
157
+ with gr.Column():
158
+ input_slides = gr.File(
159
+ label="Upload H&E Whole Slide Image",
160
+ file_types=[".svs", ".tiff", ".tif"],
161
+ file_count="multiple",
162
+ )
163
+ site_dropdown = gr.Dropdown(
164
+ choices=["Primary", "Metastatic"],
165
+ label="Site Type",
166
+ value="Primary",
167
+ )
168
+ cancer_subtype_dropdown = gr.Dropdown(
169
+ choices=[name for name in cancer_subtype_name_map.keys()],
170
+ label="Cancer Subtype",
171
+ value="Unknown",
172
+ )
173
+ ihc_subtype_dropdown = gr.Dropdown(
174
+ choices=IHC_SUBTYPES,
175
+ label="IHC Subtype (if applicable)",
176
+ value="",
177
+ visible=False,
178
+ )
179
+ seg_config_dropdown = gr.Dropdown(
180
+ choices=["Biopsy", "Resection", "TCGA"],
181
+ label="Segmentation Config",
182
+ value="Biopsy",
183
+ )
184
+ with gr.Row():
185
+ settings_input = gr.Dataframe(
186
+ headers=[
187
+ "Slide",
188
+ "Site Type",
189
+ "Cancer Subtype",
190
+ "IHC Subtype",
191
+ "Segmentation Config",
192
+ ],
193
+ label="Current Settings",
194
+ datatype=["str", "str", "str", "str", "str"],
195
+ visible=False,
196
+ interactive=True,
197
+ static_columns="Slide",
198
+ )
199
+
200
+ with gr.Row():
201
+ settings_csv = gr.File(
202
+ file_types=[".csv"], label="Upload Settings CSV", visible=False
203
+ )
204
+
205
+ with gr.Row():
206
+ clear_button = gr.Button("Clear")
207
+ analyze_button = gr.Button("Analyze", variant="primary")
208
+ with gr.Column():
209
+ slide_masks = gr.Gallery(
210
+ label="Slide Masks",
211
+ columns=3,
212
+ object_fit="contain",
213
+ height="auto",
214
+ )
215
+ aeon_output_table = gr.Dataframe(
216
+ headers=["Cancer Subtype", "Slide Name"],
217
+ label="Cancer Subtype Inference Confidence",
218
+ datatype=["str", "number"],
219
+ visible=False,
220
+ )
221
+ aeon_download_button = gr.DownloadButton(
222
+ "Download Aeon Results as CSV",
223
+ label="Download Results",
224
+ visible=False,
225
+ )
226
+ paladin_output_table = gr.Dataframe(
227
+ headers=["Slide", "Cancer Subtype", "Biomarker", "Score"],
228
+ label="Biomarker Inference",
229
+ datatype=["str", "str", "str", "number"],
230
+ )
231
+ paladin_download_button = gr.DownloadButton(
232
+ "Download Paladin Results as CSV",
233
+ label="Download Results",
234
+ visible=False,
235
+ )
236
+
237
+ @clear_button.click(
238
+ outputs=[
239
+ input_slides,
240
+ slide_masks,
241
+ paladin_output_table,
242
+ paladin_download_button,
243
+ aeon_output_table,
244
+ aeon_download_button,
245
+ settings_input,
246
+ settings_csv,
247
+ ],
248
+ )
249
+ def clear_fn():
250
+ return (
251
+ None,
252
+ None,
253
+ None,
254
+ None,
255
+ gr.Dataframe(visible=False),
256
+ gr.DownloadButton(visible=False),
257
+ gr.Dataframe(visible=False),
258
+ gr.File(visible=False),
259
+ )
260
+
261
+ def get_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
262
+ if files is None:
263
+ return pd.DataFrame()
264
+ settings = []
265
+ for file in files:
266
+ filename = file.name if hasattr(file, "name") else file
267
+ slide_name = filename.split("/")[-1]
268
+ settings.append(
269
+ [slide_name, site_type, cancer_subtype, ihc_subtype, seg_config]
270
+ )
271
+ df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
272
+ return df
273
+
274
+ # Only display settings table and upload button if multiple slides are uploaded
275
+ @gr.on(
276
+ [
277
+ input_slides.change,
278
+ site_dropdown.change,
279
+ cancer_subtype_dropdown.change,
280
+ ihc_subtype_dropdown.change,
281
+ seg_config_dropdown.change,
282
+ ],
283
+ inputs=[
284
+ input_slides,
285
+ site_dropdown,
286
+ cancer_subtype_dropdown,
287
+ ihc_subtype_dropdown,
288
+ seg_config_dropdown,
289
+ ],
290
+ outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
291
+ )
292
+ def update_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
293
+ has_ihc = "Breast" in cancer_subtype
294
+ if not files:
295
+ return None, None, gr.Dropdown(visible=has_ihc)
296
+ settings_df = get_settings(
297
+ files, site_type, cancer_subtype, ihc_subtype, seg_config
298
+ )
299
+ if settings_df is not None:
300
+ has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
301
+ visible = files and len(files) > 1
302
+ return (
303
+ gr.Dataframe(settings_df, visible=visible),
304
+ gr.File(visible=visible),
305
+ gr.Dropdown(visible=has_ihc),
306
+ )
307
+
308
+ @settings_csv.upload(
309
+ inputs=[settings_csv],
310
+ outputs=[settings_input],
311
+ )
312
+ def read_settings(file):
313
+ if file is None:
314
+ return None
315
+ df = load_settings(file.name if hasattr(file, "name") else file)
316
+ return gr.Dataframe(df, visible=True)
317
+
318
+ analyze_button.click(
319
+ analyze_slides,
320
+ inputs=[
321
+ input_slides,
322
+ settings_input,
323
+ user_dir_state,
324
+ ],
325
+ outputs=[
326
+ slide_masks,
327
+ aeon_output_table,
328
+ aeon_download_button,
329
+ paladin_output_table,
330
+ paladin_download_button,
331
+ user_dir_state,
332
+ ],
333
+ queue=True,
334
+ show_progress_on=paladin_output_table,
335
+ )
336
+ settings_input.change(
337
+ lambda df: validate_settings(df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map),
338
+ inputs=[settings_input],
339
+ outputs=[settings_input]
340
+ )
341
+ demo.load(
342
+ create_user_directory,
343
+ inputs=[user_dir_state],
344
+ outputs=[user_dir_state],
345
+ )
346
+
347
+ demo.queue(max_size=10, default_concurrency_limit=8)
348
+ demo.launch(
349
+ server_name=server_name,
350
+ share=share,
351
+ server_port=server_port,
352
+ show_error=True,
353
+ favicon_path=current_dir / "favicon.svg",
354
+ )
src/mosaic/ui/utils.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ from pathlib import Path
3
+ import pandas as pd
4
+ import gradio as gr
5
+ import requests
6
+
7
+ # This path should be outside your project directory if running locally
8
+ TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
9
+
10
+ IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
11
+
12
+ SETTINGS_COLUMNS = [
13
+ "Slide",
14
+ "Site Type",
15
+ "Cancer Subtype",
16
+ "IHC Subtype",
17
+ "Segmentation Config",
18
+ ]
19
+
20
+ oncotree_code_map = {}
21
+
22
+
23
+ def get_oncotree_code_name(code):
24
+ global oncotree_code_map
25
+ if code in oncotree_code_map.keys():
26
+ return oncotree_code_map[code]
27
+
28
+ url = f"https://oncotree.mskcc.org/api/tumorTypes/search/code/{code}?exactMatch=true&version=oncotree_2025_04_08"
29
+ response = requests.get(url)
30
+ code_name = "Unknown"
31
+ if response.status_code == 200:
32
+ data = response.json()
33
+ if data:
34
+ code_name = data[0]["name"]
35
+
36
+ oncotree_code_map[code] = code_name
37
+ return code_name
38
+
39
+
40
+ def create_user_directory(state, request: gr.Request):
41
+ """Create a unique directory for each user session."""
42
+ session_hash = request.session_hash
43
+ if session_hash is None:
44
+ return None, None
45
+
46
+ user_dir = TEMP_USER_DATA_DIR / session_hash
47
+ user_dir.mkdir(parents=True, exist_ok=True)
48
+ return user_dir
49
+
50
+
51
+ def load_settings(slide_csv_path):
52
+ """Load settings from CSV file and validate columns."""
53
+ settings_df = pd.read_csv(slide_csv_path, na_filter=False)
54
+ if "Segmentation Config" not in settings_df.columns:
55
+ settings_df["Segmentation Config"] = "Biopsy"
56
+ if "Cancer Subtype" not in settings_df.columns:
57
+ settings_df["Cancer Subtype"] = "Unknown"
58
+ if "IHC Subtype" not in settings_df.columns:
59
+ settings_df["IHC Subtype"] = ""
60
+ if not set(SETTINGS_COLUMNS).issubset(settings_df.columns):
61
+ raise ValueError("Missing required column in CSV file")
62
+ settings_df = settings_df[SETTINGS_COLUMNS]
63
+ return settings_df
64
+
65
+
66
+ def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map):
67
+ """Validate settings DataFrame and provide warnings for invalid entries."""
68
+ settings_df.columns = SETTINGS_COLUMNS
69
+ warnings = []
70
+ for idx, row in settings_df.iterrows():
71
+ slide_name = row["Slide"]
72
+ subtype = row["Cancer Subtype"]
73
+ if subtype in cancer_subtypes:
74
+ settings_df.at[idx, "Cancer Subtype"] = reversed_cancer_subtype_name_map[
75
+ subtype
76
+ ]
77
+ if settings_df.at[idx, "Cancer Subtype"] not in cancer_subtype_name_map.keys():
78
+ warnings.append(
79
+ f"Slide {slide_name}: Unknown cancer subtype. Valid subtypes are: {', '.join(cancer_subtype_name_map.keys())}. "
80
+ )
81
+ settings_df.at[idx, "Cancer Subtype"] = "Unknown"
82
+ if row["Site Type"] not in ["Metastatic", "Primary"]:
83
+ warnings.append(
84
+ f"Slide {slide_name}: Unknown site type. Valid types are: Metastatic, Primary. "
85
+ )
86
+ settings_df.at[idx, "Site Type"] = "Primary"
87
+ if (
88
+ "Breast" not in settings_df.at[idx, "Cancer Subtype"]
89
+ and row["IHC Subtype"] != ""
90
+ ):
91
+ warnings.append(
92
+ f"Slide {slide_name}: IHC subtype should be empty for non-breast cancer subtypes. "
93
+ )
94
+ settings_df.at[idx, "IHC Subtype"] = ""
95
+ if row["IHC Subtype"] not in IHC_SUBTYPES:
96
+ warnings.append(
97
+ f"Slide {slide_name}: Unknown IHC subtype. Valid subtypes are: {', '.join(IHC_SUBTYPES)}. "
98
+ )
99
+ settings_df.at[idx, "IHC Subtype"] = ""
100
+ if row["Segmentation Config"] not in ["Biopsy", "Resection", "TCGA"]:
101
+ warnings.append(
102
+ f"Slide {slide_name}: Unknown segmentation config. Valid configs are: Biopsy, Resection, TCGA. "
103
+ )
104
+ settings_df.at[idx, "Segmentation Config"] = "Biopsy"
105
+
106
+ if warnings:
107
+ gr.Warning("\n".join(warnings))
108
+
109
+ return settings_df
110
+
111
+
112
+ def export_to_csv(df):
113
+ if df is None or df.empty:
114
+ raise gr.Error("No data to export.")
115
+ csv_path = "paladin_results.csv"
116
+ df.to_csv(csv_path, index=False)
117
+ return csv_path
tests/conftest.py CHANGED
@@ -3,14 +3,34 @@
3
  import sys
4
  from unittest.mock import MagicMock
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  # Mock heavy dependencies before any imports
7
  # This is necessary to allow tests to run without full environment setup
8
- mock_modules = [
9
- 'mussel.models',
10
- 'mussel.utils',
11
- 'mussel.utils.segment',
12
- 'mussel.cli.tessellate',
13
- ]
14
-
15
- for module in mock_modules:
16
- sys.modules[module] = MagicMock()
 
 
 
 
 
 
3
  import sys
4
  from unittest.mock import MagicMock
5
 
6
+ # Create mock for torch with sub-modules
7
+ class TorchMock(MagicMock):
8
+ """Mock for torch that supports nested imports."""
9
+ utils = MagicMock()
10
+ nn = MagicMock()
11
+ cuda = MagicMock()
12
+
13
+ # Create mock for gradio with Error class
14
+ class GradioMock(MagicMock):
15
+ """Mock for gradio that supports Error and Warning classes."""
16
+ Error = Exception
17
+ Warning = lambda msg: None
18
+ Request = MagicMock
19
+ Progress = MagicMock
20
+
21
  # Mock heavy dependencies before any imports
22
  # This is necessary to allow tests to run without full environment setup
23
+ sys.modules['mussel'] = MagicMock()
24
+ sys.modules['mussel.models'] = MagicMock()
25
+ sys.modules['mussel.utils'] = MagicMock()
26
+ sys.modules['mussel.utils.segment'] = MagicMock()
27
+ sys.modules['mussel.cli'] = MagicMock()
28
+ sys.modules['mussel.cli.tessellate'] = MagicMock()
29
+ sys.modules['gradio'] = GradioMock()
30
+ sys.modules['torch'] = TorchMock()
31
+ sys.modules['torch.utils'] = TorchMock.utils
32
+ sys.modules['torch.utils.data'] = TorchMock.utils.data
33
+ sys.modules['torch.nn'] = TorchMock.nn
34
+ sys.modules['torch.cuda'] = TorchMock.cuda
35
+ sys.modules['huggingface_hub'] = MagicMock()
36
+ sys.modules['loguru'] = MagicMock()
tests/test_gradio_app.py CHANGED
@@ -1,4 +1,4 @@
1
- """Unit tests for mosaic.gradio_app module."""
2
 
3
  import tempfile
4
  from pathlib import Path
@@ -6,11 +6,14 @@ from pathlib import Path
6
  import pandas as pd
7
  import pytest
8
 
9
- from mosaic.gradio_app import (
10
  IHC_SUBTYPES,
11
  SETTINGS_COLUMNS,
12
  load_settings,
13
  validate_settings,
 
 
 
14
  )
15
 
16
 
@@ -55,6 +58,21 @@ class TestConstants:
55
  class TestLoadSettings:
56
  """Test load_settings function."""
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  @pytest.fixture
59
  def temp_settings_csv(self):
60
  """Create a temporary settings CSV file with all columns."""
@@ -135,8 +153,6 @@ class TestGetOncotreeCodeName:
135
 
136
  def test_oncotree_code_name_caching(self, mocker):
137
  """Test that oncotree code names are cached."""
138
- from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
139
-
140
  # Mock the requests.get call
141
  mock_response = mocker.Mock()
142
  mock_response.status_code = 200
@@ -159,8 +175,6 @@ class TestGetOncotreeCodeName:
159
 
160
  def test_oncotree_code_name_returns_string(self, mocker):
161
  """Test that function returns a string."""
162
- from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
163
-
164
  # Mock the requests.get call
165
  mock_response = mocker.Mock()
166
  mock_response.status_code = 200
@@ -175,8 +189,6 @@ class TestGetOncotreeCodeName:
175
 
176
  def test_oncotree_invalid_code_returns_unknown(self, mocker):
177
  """Test that invalid code returns 'Unknown'."""
178
- from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
179
-
180
  # Mock the requests.get call to return empty response (no matching codes)
181
  mock_response = mocker.Mock()
182
  mock_response.status_code = 200
@@ -194,8 +206,6 @@ class TestExportToCsv:
194
 
195
  def test_export_to_csv_returns_path(self):
196
  """Test that export_to_csv returns a file path."""
197
- from mosaic.gradio_app import export_to_csv
198
-
199
  df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
200
  result = export_to_csv(df)
201
  assert isinstance(result, str)
@@ -205,8 +215,6 @@ class TestExportToCsv:
205
 
206
  def test_export_to_csv_creates_file(self):
207
  """Test that export_to_csv creates a CSV file."""
208
- from mosaic.gradio_app import export_to_csv
209
-
210
  df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
211
  result = export_to_csv(df)
212
  assert Path(result).exists()
@@ -215,7 +223,6 @@ class TestExportToCsv:
215
 
216
  def test_export_to_csv_with_empty_dataframe_raises_error(self):
217
  """Test that exporting empty DataFrame raises error."""
218
- from mosaic.gradio_app import export_to_csv
219
  import gradio as gr
220
 
221
  df = pd.DataFrame()
@@ -224,7 +231,6 @@ class TestExportToCsv:
224
 
225
  def test_export_to_csv_with_none_raises_error(self):
226
  """Test that exporting None raises error."""
227
- from mosaic.gradio_app import export_to_csv
228
  import gradio as gr
229
 
230
  with pytest.raises(gr.Error):
 
1
+ """Unit tests for mosaic UI utility functions."""
2
 
3
  import tempfile
4
  from pathlib import Path
 
6
  import pandas as pd
7
  import pytest
8
 
9
+ from mosaic.ui.utils import (
10
  IHC_SUBTYPES,
11
  SETTINGS_COLUMNS,
12
  load_settings,
13
  validate_settings,
14
+ export_to_csv,
15
+ get_oncotree_code_name,
16
+ oncotree_code_map,
17
  )
18
 
19
 
 
58
  class TestLoadSettings:
59
  """Test load_settings function."""
60
 
61
+ @pytest.fixture
62
+ def sample_cancer_subtype_maps(self):
63
+ """Create sample cancer subtype maps for testing."""
64
+ cancer_subtypes = ["LUAD", "BRCA", "COAD"]
65
+ cancer_subtype_name_map = {
66
+ "Lung Adenocarcinoma (LUAD)": "LUAD",
67
+ "Breast Invasive Carcinoma (BRCA)": "BRCA",
68
+ "Colon Adenocarcinoma (COAD)": "COAD",
69
+ "Unknown": "UNK",
70
+ }
71
+ reversed_cancer_subtype_name_map = {
72
+ value: key for key, value in cancer_subtype_name_map.items()
73
+ }
74
+ return cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
75
+
76
  @pytest.fixture
77
  def temp_settings_csv(self):
78
  """Create a temporary settings CSV file with all columns."""
 
153
 
154
  def test_oncotree_code_name_caching(self, mocker):
155
  """Test that oncotree code names are cached."""
 
 
156
  # Mock the requests.get call
157
  mock_response = mocker.Mock()
158
  mock_response.status_code = 200
 
175
 
176
  def test_oncotree_code_name_returns_string(self, mocker):
177
  """Test that function returns a string."""
 
 
178
  # Mock the requests.get call
179
  mock_response = mocker.Mock()
180
  mock_response.status_code = 200
 
189
 
190
  def test_oncotree_invalid_code_returns_unknown(self, mocker):
191
  """Test that invalid code returns 'Unknown'."""
 
 
192
  # Mock the requests.get call to return empty response (no matching codes)
193
  mock_response = mocker.Mock()
194
  mock_response.status_code = 200
 
206
 
207
  def test_export_to_csv_returns_path(self):
208
  """Test that export_to_csv returns a file path."""
 
 
209
  df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
210
  result = export_to_csv(df)
211
  assert isinstance(result, str)
 
215
 
216
  def test_export_to_csv_creates_file(self):
217
  """Test that export_to_csv creates a CSV file."""
 
 
218
  df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
219
  result = export_to_csv(df)
220
  assert Path(result).exists()
 
223
 
224
  def test_export_to_csv_with_empty_dataframe_raises_error(self):
225
  """Test that exporting empty DataFrame raises error."""
 
226
  import gradio as gr
227
 
228
  df = pd.DataFrame()
 
231
 
232
  def test_export_to_csv_with_none_raises_error(self):
233
  """Test that exporting None raises error."""
 
234
  import gradio as gr
235
 
236
  with pytest.raises(gr.Error):