raylim Claude commited on
Commit
bac4d5d
·
unverified ·
1 Parent(s): 0506a57

Remove unused global cancer type mappings from aeon.py

Browse files

- Remove unused imports of INT_TO_CANCER_TYPE_MAP and CANCER_TYPE_TO_INT_MAP
- Remove unused col_indices_to_drop global variable
- Simplify CANCER_TYPES_TO_DROP to only the 5 types that are excluded
- Update tests to import mappings from data.py instead of aeon.py
- All 8 tests pass successfully

The Aeon inference now exclusively uses metadata-loaded mappings (160 classes)
instead of the old global constants (183 classes), eliminating confusion.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

src/mosaic/inference/aeon.py CHANGED
@@ -16,8 +16,6 @@ from torch.utils.data import DataLoader
16
  from mosaic.inference.data import (
17
  SiteType,
18
  TileFeatureTensorDataset,
19
- INT_TO_CANCER_TYPE_MAP,
20
- CANCER_TYPE_TO_INT_MAP,
21
  encode_sex,
22
  encode_tissue_site,
23
  )
@@ -25,29 +23,14 @@ from mosaic.inference.data import (
25
  from loguru import logger
26
 
27
  # Cancer types excluded from prediction (too broad or ambiguous)
28
- cancer_types_to_drop = [
 
29
  "UDMN",
30
  "ADNOS",
31
  "CUP",
32
  "CUPNOS",
33
- "BRCNOS",
34
- "GNOS",
35
- "SCCNOS",
36
- "PDC",
37
- "NSCLC",
38
- "BRCA",
39
- "SARCNOS",
40
- "NETNOS",
41
- "MEL",
42
- "RCC",
43
- "BRCANOS",
44
- "COADREAD",
45
- "MUP",
46
- "NECNOS",
47
- "UCEC",
48
  "NOT",
49
  ]
50
- col_indices_to_drop = [CANCER_TYPE_TO_INT_MAP[x] for x in cancer_types_to_drop]
51
 
52
 
53
  BATCH_SIZE = 8
@@ -98,8 +81,7 @@ def run(
98
  CANCER_TYPE_TO_INT_MAP_LOCAL = {v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()}
99
 
100
  # Calculate col_indices_to_drop using local mapping
101
- cancer_types_to_drop = ["UDMN", "ADNOS", "CUP", "CUPNOS", "NOT"]
102
- col_indices_to_drop_local = [CANCER_TYPE_TO_INT_MAP_LOCAL[x] for x in cancer_types_to_drop if x in CANCER_TYPE_TO_INT_MAP_LOCAL]
103
 
104
  site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
105
 
 
16
  from mosaic.inference.data import (
17
  SiteType,
18
  TileFeatureTensorDataset,
 
 
19
  encode_sex,
20
  encode_tissue_site,
21
  )
 
23
  from loguru import logger
24
 
25
  # Cancer types excluded from prediction (too broad or ambiguous)
26
+ # These are used to mask out predictions for overly general cancer types
27
+ CANCER_TYPES_TO_DROP = [
28
  "UDMN",
29
  "ADNOS",
30
  "CUP",
31
  "CUPNOS",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "NOT",
33
  ]
 
34
 
35
 
36
  BATCH_SIZE = 8
 
81
  CANCER_TYPE_TO_INT_MAP_LOCAL = {v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()}
82
 
83
  # Calculate col_indices_to_drop using local mapping
84
+ col_indices_to_drop_local = [CANCER_TYPE_TO_INT_MAP_LOCAL[x] for x in CANCER_TYPES_TO_DROP if x in CANCER_TYPE_TO_INT_MAP_LOCAL]
 
85
 
86
  site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
87
 
tests/inference/test_aeon.py CHANGED
@@ -4,43 +4,43 @@ import numpy as np
4
  import pytest
5
  import torch
6
 
7
- from mosaic.inference.aeon import (
 
8
  CANCER_TYPE_TO_INT_MAP,
9
  INT_TO_CANCER_TYPE_MAP,
10
- col_indices_to_drop,
11
  )
12
 
13
 
14
  class TestAeonConstants:
15
  """Test constants defined in aeon module."""
16
 
17
- def test_col_indices_to_drop_is_list(self):
18
- """Test that col_indices_to_drop is a list."""
19
- assert isinstance(col_indices_to_drop, list)
20
 
21
- def test_col_indices_to_drop_has_entries(self):
22
- """Test that col_indices_to_drop has entries."""
23
- assert len(col_indices_to_drop) > 0
24
 
25
- def test_col_indices_to_drop_are_integers(self):
26
- """Test that all indices are integers."""
27
- for idx in col_indices_to_drop:
28
- assert isinstance(idx, int)
29
 
30
- def test_col_indices_to_drop_are_valid(self):
31
- """Test that all indices are valid cancer type indices."""
32
- max_idx = max(CANCER_TYPE_TO_INT_MAP.values())
33
- for idx in col_indices_to_drop:
34
- assert 0 <= idx <= max_idx
 
 
35
 
36
- def test_col_indices_to_drop_contains_expected_types(self):
37
  """Test that specific cancer types are in the drop list."""
38
  # Check that some known cancer types to drop are in the list
39
- drop_types = ["UDMN", "CUP", "BRCA", "MEL"]
40
- for cancer_type in drop_types:
41
- if cancer_type in CANCER_TYPE_TO_INT_MAP:
42
- idx = CANCER_TYPE_TO_INT_MAP[cancer_type]
43
- assert idx in col_indices_to_drop
44
 
45
  def test_cancer_type_maps_available(self):
46
  """Test that cancer type maps are available."""
 
4
  import pytest
5
  import torch
6
 
7
+ from mosaic.inference.aeon import CANCER_TYPES_TO_DROP
8
+ from mosaic.inference.data import (
9
  CANCER_TYPE_TO_INT_MAP,
10
  INT_TO_CANCER_TYPE_MAP,
 
11
  )
12
 
13
 
14
  class TestAeonConstants:
15
  """Test constants defined in aeon module."""
16
 
17
+ def test_cancer_types_to_drop_is_list(self):
18
+ """Test that CANCER_TYPES_TO_DROP is a list."""
19
+ assert isinstance(CANCER_TYPES_TO_DROP, list)
20
 
21
+ def test_cancer_types_to_drop_has_entries(self):
22
+ """Test that CANCER_TYPES_TO_DROP has entries."""
23
+ assert len(CANCER_TYPES_TO_DROP) > 0
24
 
25
+ def test_cancer_types_to_drop_are_strings(self):
26
+ """Test that all cancer types are strings."""
27
+ for cancer_type in CANCER_TYPES_TO_DROP:
28
+ assert isinstance(cancer_type, str)
29
 
30
+ def test_cancer_types_to_drop_are_valid(self):
31
+ """Test that all cancer types to drop are valid cancer type codes."""
32
+ # They should all be uppercase alphanumeric codes
33
+ for cancer_type in CANCER_TYPES_TO_DROP:
34
+ assert cancer_type.isupper()
35
+ assert len(cancer_type) >= 2
36
+ assert len(cancer_type) <= 10
37
 
38
+ def test_cancer_types_to_drop_contains_expected_types(self):
39
  """Test that specific cancer types are in the drop list."""
40
  # Check that some known cancer types to drop are in the list
41
+ expected_types = ["UDMN", "CUP", "NOT"]
42
+ for cancer_type in expected_types:
43
+ assert cancer_type in CANCER_TYPES_TO_DROP
 
 
44
 
45
  def test_cancer_type_maps_available(self):
46
  """Test that cancer type maps are available."""