raylim commited on
Commit
de40714
·
unverified ·
1 Parent(s): 24b5de2

Add sex and tissue site parameters to Aeon inference

Browse files

- Add encoding functions to data.py (encode_sex, encode_tissue_site, tissue_site_to_one_hot)
- Update TileFeatureTensorDataset to optionally include SEX and TISSUE_SITE fields
- Add sex and tissue_site_idx parameters to Aeon run() function
- Thread sex and tissue_site through analysis pipeline (Aeon only, Paladin unchanged)
- Add Sex and Tissue Site dropdowns to UI
- Add validation for Sex and Tissue Site in settings CSV
- Update SETTINGS_COLUMNS to include new fields

src/mosaic/analysis.py CHANGED
@@ -379,6 +379,8 @@ def analyze_slide(
379
  slide_path,
380
  seg_config,
381
  site_type,
 
 
382
  cancer_subtype,
383
  cancer_subtype_name_map,
384
  ihc_subtype="",
 
379
  slide_path,
380
  seg_config,
381
  site_type,
382
+ sex,
383
+ tissue_site,
384
  cancer_subtype,
385
  cancer_subtype_name_map,
386
  ihc_subtype="",
src/mosaic/inference/data.py CHANGED
@@ -201,6 +201,82 @@ CANCER_TYPE_TO_INT_MAP = {
201
  INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
202
 
203
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  class SiteType(Enum):
205
  PRIMARY = "Primary"
206
  METASTASIS = "Metastasis"
 
201
  INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
202
 
203
 
204
+ # Tissue site mapping (module-level cache)
205
+ _TISSUE_SITE_MAP = None
206
+
207
+
208
+ def get_tissue_site_map():
209
+ """Load tissue site name → index mapping from CSV.
210
+
211
+ Returns:
212
+ dict: Mapping of tissue site names to indices (0-56)
213
+ """
214
+ global _TISSUE_SITE_MAP
215
+ if _TISSUE_SITE_MAP is None:
216
+ from pathlib import Path
217
+ import pandas as pd
218
+
219
+ csv_path = Path(__file__).parent.parent.parent / "data" / "tissue_site_original_to_idx.csv"
220
+ df = pd.read_csv(csv_path)
221
+
222
+ _TISSUE_SITE_MAP = {}
223
+ for _, row in df.iterrows():
224
+ _TISSUE_SITE_MAP[row['TISSUE_SITE']] = int(row['idx'])
225
+
226
+ return _TISSUE_SITE_MAP
227
+
228
+
229
+ def get_tissue_site_options():
230
+ """Get sorted unique tissue site names for UI dropdowns.
231
+
232
+ Returns:
233
+ list: Sorted list of unique tissue site names
234
+ """
235
+ site_map = get_tissue_site_map()
236
+ return sorted(set(site_map.keys()))
237
+
238
+
239
+ def encode_sex(sex):
240
+ """Convert sex to numeric encoding.
241
+
242
+ Args:
243
+ sex: "Male" or "Female"
244
+
245
+ Returns:
246
+ int: 0 for Male, 1 for Female
247
+ """
248
+ return 1 if sex == "Female" else 0
249
+
250
+
251
+ def encode_tissue_site(site_name):
252
+ """Convert tissue site name to index (0-56).
253
+
254
+ Args:
255
+ site_name: Tissue site name from CSV
256
+
257
+ Returns:
258
+ int: Tissue site index, defaults to 8 ("Not Applicable")
259
+ """
260
+ site_map = get_tissue_site_map()
261
+ return site_map.get(site_name, 8)
262
+
263
+
264
+ def tissue_site_to_one_hot(site_idx, num_classes=57):
265
+ """Convert tissue site index to one-hot vector.
266
+
267
+ Args:
268
+ site_idx: Index value (0-56 for tissue site, 0-2 for sex)
269
+ num_classes: Number of classes (57 for tissue site, 3 for sex)
270
+
271
+ Returns:
272
+ list: One-hot encoded vector
273
+ """
274
+ one_hot = [0] * num_classes
275
+ if 0 <= site_idx < num_classes:
276
+ one_hot[site_idx] = 1
277
+ return one_hot
278
+
279
+
280
  class SiteType(Enum):
281
  PRIMARY = "Primary"
282
  METASTASIS = "Metastasis"
src/mosaic/ui/utils.py CHANGED
@@ -21,6 +21,8 @@ IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
21
  SETTINGS_COLUMNS = [
22
  "Slide",
23
  "Site Type",
 
 
24
  "Cancer Subtype",
25
  "IHC Subtype",
26
  "Segmentation Config",
 
21
  SETTINGS_COLUMNS = [
22
  "Slide",
23
  "Site Type",
24
+ "Sex",
25
+ "Tissue Site",
26
  "Cancer Subtype",
27
  "IHC Subtype",
28
  "Segmentation Config",