Spaces:
Sleeping
Sleeping
Add sex and tissue site parameters to Aeon inference
Browse files- Add encoding functions to data.py (encode_sex, encode_tissue_site, tissue_site_to_one_hot)
- Update TileFeatureTensorDataset to optionally include SEX and TISSUE_SITE fields
- Add sex and tissue_site_idx parameters to Aeon run() function
- Thread sex and tissue_site through analysis pipeline (Aeon only, Paladin unchanged)
- Add Sex and Tissue Site dropdowns to UI
- Add validation for Sex and Tissue Site in settings CSV
- Update SETTINGS_COLUMNS to include new fields
- src/mosaic/analysis.py +2 -0
- src/mosaic/inference/data.py +76 -0
- src/mosaic/ui/utils.py +2 -0
src/mosaic/analysis.py
CHANGED
|
@@ -379,6 +379,8 @@ def analyze_slide(
|
|
| 379 |
slide_path,
|
| 380 |
seg_config,
|
| 381 |
site_type,
|
|
|
|
|
|
|
| 382 |
cancer_subtype,
|
| 383 |
cancer_subtype_name_map,
|
| 384 |
ihc_subtype="",
|
|
|
|
| 379 |
slide_path,
|
| 380 |
seg_config,
|
| 381 |
site_type,
|
| 382 |
+
sex,
|
| 383 |
+
tissue_site,
|
| 384 |
cancer_subtype,
|
| 385 |
cancer_subtype_name_map,
|
| 386 |
ihc_subtype="",
|
src/mosaic/inference/data.py
CHANGED
|
@@ -201,6 +201,82 @@ CANCER_TYPE_TO_INT_MAP = {
|
|
| 201 |
INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
|
| 202 |
|
| 203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
class SiteType(Enum):
|
| 205 |
PRIMARY = "Primary"
|
| 206 |
METASTASIS = "Metastasis"
|
|
|
|
| 201 |
INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
|
| 202 |
|
| 203 |
|
| 204 |
+
# Tissue site mapping (module-level cache)
|
| 205 |
+
_TISSUE_SITE_MAP = None
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def get_tissue_site_map():
|
| 209 |
+
"""Load tissue site name → index mapping from CSV.
|
| 210 |
+
|
| 211 |
+
Returns:
|
| 212 |
+
dict: Mapping of tissue site names to indices (0-56)
|
| 213 |
+
"""
|
| 214 |
+
global _TISSUE_SITE_MAP
|
| 215 |
+
if _TISSUE_SITE_MAP is None:
|
| 216 |
+
from pathlib import Path
|
| 217 |
+
import pandas as pd
|
| 218 |
+
|
| 219 |
+
csv_path = Path(__file__).parent.parent.parent / "data" / "tissue_site_original_to_idx.csv"
|
| 220 |
+
df = pd.read_csv(csv_path)
|
| 221 |
+
|
| 222 |
+
_TISSUE_SITE_MAP = {}
|
| 223 |
+
for _, row in df.iterrows():
|
| 224 |
+
_TISSUE_SITE_MAP[row['TISSUE_SITE']] = int(row['idx'])
|
| 225 |
+
|
| 226 |
+
return _TISSUE_SITE_MAP
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
def get_tissue_site_options():
|
| 230 |
+
"""Get sorted unique tissue site names for UI dropdowns.
|
| 231 |
+
|
| 232 |
+
Returns:
|
| 233 |
+
list: Sorted list of unique tissue site names
|
| 234 |
+
"""
|
| 235 |
+
site_map = get_tissue_site_map()
|
| 236 |
+
return sorted(set(site_map.keys()))
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def encode_sex(sex):
|
| 240 |
+
"""Convert sex to numeric encoding.
|
| 241 |
+
|
| 242 |
+
Args:
|
| 243 |
+
sex: "Male" or "Female"
|
| 244 |
+
|
| 245 |
+
Returns:
|
| 246 |
+
int: 0 for Male, 1 for Female
|
| 247 |
+
"""
|
| 248 |
+
return 1 if sex == "Female" else 0
|
| 249 |
+
|
| 250 |
+
|
| 251 |
+
def encode_tissue_site(site_name):
|
| 252 |
+
"""Convert tissue site name to index (0-56).
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
site_name: Tissue site name from CSV
|
| 256 |
+
|
| 257 |
+
Returns:
|
| 258 |
+
int: Tissue site index, defaults to 8 ("Not Applicable")
|
| 259 |
+
"""
|
| 260 |
+
site_map = get_tissue_site_map()
|
| 261 |
+
return site_map.get(site_name, 8)
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def tissue_site_to_one_hot(site_idx, num_classes=57):
|
| 265 |
+
"""Convert tissue site index to one-hot vector.
|
| 266 |
+
|
| 267 |
+
Args:
|
| 268 |
+
site_idx: Index value (0-56 for tissue site, 0-2 for sex)
|
| 269 |
+
num_classes: Number of classes (57 for tissue site, 3 for sex)
|
| 270 |
+
|
| 271 |
+
Returns:
|
| 272 |
+
list: One-hot encoded vector
|
| 273 |
+
"""
|
| 274 |
+
one_hot = [0] * num_classes
|
| 275 |
+
if 0 <= site_idx < num_classes:
|
| 276 |
+
one_hot[site_idx] = 1
|
| 277 |
+
return one_hot
|
| 278 |
+
|
| 279 |
+
|
| 280 |
class SiteType(Enum):
|
| 281 |
PRIMARY = "Primary"
|
| 282 |
METASTASIS = "Metastasis"
|
src/mosaic/ui/utils.py
CHANGED
|
@@ -21,6 +21,8 @@ IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
|
|
| 21 |
SETTINGS_COLUMNS = [
|
| 22 |
"Slide",
|
| 23 |
"Site Type",
|
|
|
|
|
|
|
| 24 |
"Cancer Subtype",
|
| 25 |
"IHC Subtype",
|
| 26 |
"Segmentation Config",
|
|
|
|
| 21 |
SETTINGS_COLUMNS = [
|
| 22 |
"Slide",
|
| 23 |
"Site Type",
|
| 24 |
+
"Sex",
|
| 25 |
+
"Tissue Site",
|
| 26 |
"Cancer Subtype",
|
| 27 |
"IHC Subtype",
|
| 28 |
"Segmentation Config",
|