copilot-swe-agent[bot] raylim commited on
Commit
8f383ed
·
1 Parent(s): db83d34

Add comprehensive unit tests for mosaic package

Browse files

Co-authored-by: raylim <3074310+raylim@users.noreply.github.com>

.gitignore CHANGED
@@ -12,3 +12,6 @@ tmp*
12
  .idea/
13
  .vscode/
14
  data/
 
 
 
 
12
  .idea/
13
  .vscode/
14
  data/
15
+ .pytest_cache/
16
+ .coverage
17
+ htmlcov/
pyproject.toml CHANGED
@@ -22,7 +22,7 @@ paladin_inference = "mosaic.inference.paladin:main"
22
  mosaic = "mosaic.gradio_app:main"
23
 
24
  [dependency-groups]
25
- dev = ["black>=25.1.0", "pylint>=3.3.6"]
26
 
27
  [tool.pylint."messages control"]
28
  disable = [
@@ -34,3 +34,10 @@ disable = [
34
  [tool.uv.sources]
35
  paladin = { git = "ssh://git@github.com/pathology-data-mining/paladin.git", rev = "dev" }
36
  mussel = { git = "https://github.com/pathology-data-mining/Mussel.git", rev = "ray-dev" }
 
 
 
 
 
 
 
 
22
  mosaic = "mosaic.gradio_app:main"
23
 
24
  [dependency-groups]
25
+ dev = ["black>=25.1.0", "pylint>=3.3.6", "pytest>=8.3.0", "pytest-cov>=6.0.0", "pytest-mock>=3.14.0"]
26
 
27
  [tool.pylint."messages control"]
28
  disable = [
 
34
  [tool.uv.sources]
35
  paladin = { git = "ssh://git@github.com/pathology-data-mining/paladin.git", rev = "dev" }
36
  mussel = { git = "https://github.com/pathology-data-mining/Mussel.git", rev = "ray-dev" }
37
+
38
+ [tool.pytest.ini_options]
39
+ testpaths = ["tests"]
40
+ python_files = ["test_*.py"]
41
+ python_classes = ["Test*"]
42
+ python_functions = ["test_*"]
43
+ addopts = "-v --cov=src/mosaic --cov-report=term-missing"
tests/README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Mosaic Unit Tests
2
+
3
+ This directory contains comprehensive unit tests for the Mosaic package.
4
+
5
+ ## Running Tests
6
+
7
+ To run all tests:
8
+
9
+ ```bash
10
+ pytest tests/
11
+ ```
12
+
13
+ To run tests with coverage report:
14
+
15
+ ```bash
16
+ pytest tests/ --cov=src/mosaic --cov-report=term-missing
17
+ ```
18
+
19
+ To run a specific test file:
20
+
21
+ ```bash
22
+ pytest tests/inference/test_data.py -v
23
+ ```
24
+
25
+ ## Test Structure
26
+
27
+ - `tests/inference/` - Tests for inference modules
28
+ - `test_data.py` - Tests for data structures and datasets
29
+ - `test_aeon.py` - Tests for Aeon inference module
30
+ - `test_paladin.py` - Tests for Paladin inference module
31
+ - `tests/test_gradio_app.py` - Tests for Gradio application utilities
32
+ - `tests/conftest.py` - Pytest configuration and fixtures
33
+
34
+ ## Test Coverage
35
+
36
+ Current test coverage includes:
37
+
38
+ ### Data Module (`mosaic.inference.data`)
39
+ - ✅ Cancer type mapping constants
40
+ - ✅ SiteType enum
41
+ - ✅ TileFeatureTensorDataset class
42
+ - Dataset initialization
43
+ - Feature padding and truncation
44
+ - Data type conversions
45
+ - Site type handling
46
+
47
+ ### Aeon Module (`mosaic.inference.aeon`)
48
+ - ✅ Module constants
49
+ - ✅ Cancer type indices
50
+
51
+ ### Paladin Module (`mosaic.inference.paladin`)
52
+ - ✅ Model map loading
53
+ - ✅ Aeon score loading
54
+ - ✅ Cancer subtype selection
55
+ - ✅ Logits to point estimates conversion
56
+ - ✅ UsageError exception
57
+
58
+ ### Gradio App Module (`mosaic.gradio_app`)
59
+ - ✅ Constants (IHC subtypes, settings columns)
60
+ - ✅ Settings loading and validation
61
+ - ✅ Oncotree code name retrieval
62
+ - ✅ CSV export functionality
63
+
64
+ ## Notes
65
+
66
+ - Tests use mocking for external dependencies (e.g., mussel models, network requests)
67
+ - Some tests require pytest-mock for mocking functionality
68
+ - Tests are designed to run without requiring full model downloads or GPU access
tests/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Tests module for mosaic package
tests/conftest.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pytest configuration and fixtures."""
2
+
3
+ import sys
4
+ from unittest.mock import MagicMock
5
+
6
+ # Mock heavy dependencies before any imports
7
+ # This is necessary to allow tests to run without full environment setup
8
+ mock_modules = [
9
+ 'mussel.models',
10
+ 'mussel.utils',
11
+ 'mussel.utils.segment',
12
+ 'mussel.cli.tessellate',
13
+ ]
14
+
15
+ for module in mock_modules:
16
+ sys.modules[module] = MagicMock()
tests/inference/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Tests for inference module
tests/inference/test_aeon.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for mosaic.inference.aeon module."""
2
+
3
+ import numpy as np
4
+ import pytest
5
+ import torch
6
+
7
+ from mosaic.inference.aeon import (
8
+ CANCER_TYPE_TO_INT_MAP,
9
+ INT_TO_CANCER_TYPE_MAP,
10
+ col_indices_to_drop,
11
+ )
12
+
13
+
14
+ class TestAeonConstants:
15
+ """Test constants defined in aeon module."""
16
+
17
+ def test_col_indices_to_drop_is_list(self):
18
+ """Test that col_indices_to_drop is a list."""
19
+ assert isinstance(col_indices_to_drop, list)
20
+
21
+ def test_col_indices_to_drop_has_entries(self):
22
+ """Test that col_indices_to_drop has entries."""
23
+ assert len(col_indices_to_drop) > 0
24
+
25
+ def test_col_indices_to_drop_are_integers(self):
26
+ """Test that all indices are integers."""
27
+ for idx in col_indices_to_drop:
28
+ assert isinstance(idx, int)
29
+
30
+ def test_col_indices_to_drop_are_valid(self):
31
+ """Test that all indices are valid cancer type indices."""
32
+ max_idx = max(CANCER_TYPE_TO_INT_MAP.values())
33
+ for idx in col_indices_to_drop:
34
+ assert 0 <= idx <= max_idx
35
+
36
+ def test_col_indices_to_drop_contains_expected_types(self):
37
+ """Test that specific cancer types are in the drop list."""
38
+ # Check that some known cancer types to drop are in the list
39
+ drop_types = ["UDMN", "CUP", "BRCA", "MEL"]
40
+ for cancer_type in drop_types:
41
+ if cancer_type in CANCER_TYPE_TO_INT_MAP:
42
+ idx = CANCER_TYPE_TO_INT_MAP[cancer_type]
43
+ assert idx in col_indices_to_drop
44
+
45
+ def test_cancer_type_maps_available(self):
46
+ """Test that cancer type maps are available."""
47
+ assert CANCER_TYPE_TO_INT_MAP is not None
48
+ assert INT_TO_CANCER_TYPE_MAP is not None
49
+ assert len(CANCER_TYPE_TO_INT_MAP) > 0
50
+ assert len(INT_TO_CANCER_TYPE_MAP) > 0
51
+
52
+ def test_batch_size_constant(self):
53
+ """Test that BATCH_SIZE constant is defined."""
54
+ from mosaic.inference.aeon import BATCH_SIZE
55
+
56
+ assert isinstance(BATCH_SIZE, int)
57
+ assert BATCH_SIZE > 0
58
+
59
+ def test_num_workers_constant(self):
60
+ """Test that NUM_WORKERS constant is defined."""
61
+ from mosaic.inference.aeon import NUM_WORKERS
62
+
63
+ assert isinstance(NUM_WORKERS, int)
64
+ assert NUM_WORKERS > 0
tests/inference/test_data.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for mosaic.inference.data module."""
2
+
3
+ import numpy as np
4
+ import pytest
5
+ import torch
6
+
7
+ from mosaic.inference.data import (
8
+ CANCER_TYPE_TO_INT_MAP,
9
+ INT_TO_CANCER_TYPE_MAP,
10
+ SiteType,
11
+ TileFeatureTensorDataset,
12
+ )
13
+
14
+
15
+ class TestCancerTypeMaps:
16
+ """Test cancer type mapping constants."""
17
+
18
+ def test_cancer_type_to_int_map_has_entries(self):
19
+ """Test that CANCER_TYPE_TO_INT_MAP has entries."""
20
+ assert len(CANCER_TYPE_TO_INT_MAP) > 0
21
+
22
+ def test_int_to_cancer_type_map_has_entries(self):
23
+ """Test that INT_TO_CANCER_TYPE_MAP has entries."""
24
+ assert len(INT_TO_CANCER_TYPE_MAP) > 0
25
+
26
+ def test_maps_are_inverse(self):
27
+ """Test that the two maps are inverses of each other."""
28
+ assert len(CANCER_TYPE_TO_INT_MAP) == len(INT_TO_CANCER_TYPE_MAP)
29
+ for cancer_type, idx in CANCER_TYPE_TO_INT_MAP.items():
30
+ assert INT_TO_CANCER_TYPE_MAP[idx] == cancer_type
31
+
32
+ def test_cancer_type_to_int_map_contains_known_types(self):
33
+ """Test that the map contains some known cancer types."""
34
+ known_types = ["LUAD", "BRCA", "PRAD", "COAD"]
35
+ for cancer_type in known_types:
36
+ assert cancer_type in CANCER_TYPE_TO_INT_MAP
37
+
38
+ def test_indices_are_unique(self):
39
+ """Test that all indices in CANCER_TYPE_TO_INT_MAP are unique."""
40
+ indices = list(CANCER_TYPE_TO_INT_MAP.values())
41
+ assert len(indices) == len(set(indices))
42
+
43
+
44
+ class TestSiteType:
45
+ """Test SiteType enum."""
46
+
47
+ def test_site_type_primary_value(self):
48
+ """Test that PRIMARY has correct value."""
49
+ assert SiteType.PRIMARY.value == "Primary"
50
+
51
+ def test_site_type_metastasis_value(self):
52
+ """Test that METASTASIS has correct value."""
53
+ assert SiteType.METASTASIS.value == "Metastasis"
54
+
55
+ def test_site_type_has_two_members(self):
56
+ """Test that SiteType enum has exactly two members."""
57
+ assert len(list(SiteType)) == 2
58
+
59
+
60
+ class TestTileFeatureTensorDataset:
61
+ """Test TileFeatureTensorDataset class."""
62
+
63
+ @pytest.fixture
64
+ def sample_features(self):
65
+ """Create sample features for testing."""
66
+ return np.random.rand(100, 768).astype(np.float32)
67
+
68
+ @pytest.fixture
69
+ def large_features(self):
70
+ """Create large sample features for testing padding/truncation."""
71
+ return np.random.rand(25000, 768).astype(np.float32)
72
+
73
+ @pytest.fixture
74
+ def small_features(self):
75
+ """Create small sample features for testing padding."""
76
+ return np.random.rand(50, 768).astype(np.float32)
77
+
78
+ def test_dataset_initialization(self, sample_features):
79
+ """Test basic dataset initialization."""
80
+ dataset = TileFeatureTensorDataset(
81
+ site_type=SiteType.PRIMARY,
82
+ tile_features=sample_features,
83
+ n_max_tiles=20000,
84
+ )
85
+ assert dataset.site_type == SiteType.PRIMARY
86
+ assert dataset.n_max_tiles == 20000
87
+ assert isinstance(dataset.features, torch.Tensor)
88
+
89
+ def test_dataset_length(self, sample_features):
90
+ """Test that dataset length is always 1."""
91
+ dataset = TileFeatureTensorDataset(
92
+ site_type=SiteType.PRIMARY,
93
+ tile_features=sample_features,
94
+ )
95
+ assert len(dataset) == 1
96
+
97
+ def test_dataset_getitem_structure(self, sample_features):
98
+ """Test that __getitem__ returns correct structure."""
99
+ dataset = TileFeatureTensorDataset(
100
+ site_type=SiteType.METASTASIS,
101
+ tile_features=sample_features,
102
+ )
103
+ item = dataset[0]
104
+ assert isinstance(item, dict)
105
+ assert "site" in item
106
+ assert "tile_tensor" in item
107
+ assert item["site"] == "Metastasis"
108
+ assert isinstance(item["tile_tensor"], torch.Tensor)
109
+
110
+ def test_features_are_padded_when_small(self, small_features):
111
+ """Test that features are padded when fewer than n_max_tiles."""
112
+ n_max_tiles = 1000
113
+ dataset = TileFeatureTensorDataset(
114
+ site_type=SiteType.PRIMARY,
115
+ tile_features=small_features,
116
+ n_max_tiles=n_max_tiles,
117
+ )
118
+ assert dataset.features.shape[0] == n_max_tiles
119
+ assert dataset.features.shape[1] == small_features.shape[1]
120
+
121
+ def test_features_are_truncated_when_large(self, large_features):
122
+ """Test that features are truncated when more than n_max_tiles."""
123
+ n_max_tiles = 20000
124
+ dataset = TileFeatureTensorDataset(
125
+ site_type=SiteType.PRIMARY,
126
+ tile_features=large_features,
127
+ n_max_tiles=n_max_tiles,
128
+ )
129
+ assert dataset.features.shape[0] == n_max_tiles
130
+ assert dataset.features.shape[1] == large_features.shape[1]
131
+
132
+ def test_features_dtype_is_float32(self, sample_features):
133
+ """Test that features are converted to float32."""
134
+ dataset = TileFeatureTensorDataset(
135
+ site_type=SiteType.PRIMARY,
136
+ tile_features=sample_features,
137
+ )
138
+ assert dataset.features.dtype == torch.float32
139
+
140
+ def test_site_type_primary(self, sample_features):
141
+ """Test dataset with PRIMARY site type."""
142
+ dataset = TileFeatureTensorDataset(
143
+ site_type=SiteType.PRIMARY,
144
+ tile_features=sample_features,
145
+ )
146
+ item = dataset[0]
147
+ assert item["site"] == "Primary"
148
+
149
+ def test_site_type_metastasis(self, sample_features):
150
+ """Test dataset with METASTASIS site type."""
151
+ dataset = TileFeatureTensorDataset(
152
+ site_type=SiteType.METASTASIS,
153
+ tile_features=sample_features,
154
+ )
155
+ item = dataset[0]
156
+ assert item["site"] == "Metastasis"
157
+
158
+ def test_features_exact_size(self):
159
+ """Test that features of exactly n_max_tiles are not modified."""
160
+ n_max_tiles = 100
161
+ features = np.random.rand(n_max_tiles, 768).astype(np.float32)
162
+ dataset = TileFeatureTensorDataset(
163
+ site_type=SiteType.PRIMARY,
164
+ tile_features=features,
165
+ n_max_tiles=n_max_tiles,
166
+ )
167
+ assert dataset.features.shape[0] == n_max_tiles
168
+ assert dataset.features.shape[1] == 768
169
+
170
+ def test_features_shape_preserved(self, sample_features):
171
+ """Test that feature dimensionality is preserved."""
172
+ original_dim = sample_features.shape[1]
173
+ dataset = TileFeatureTensorDataset(
174
+ site_type=SiteType.PRIMARY,
175
+ tile_features=sample_features,
176
+ )
177
+ assert dataset.features.shape[1] == original_dim
178
+
179
+ def test_different_feature_dimensions(self):
180
+ """Test dataset with different feature dimensions."""
181
+ for dim in [256, 512, 768, 1024]:
182
+ features = np.random.rand(100, dim).astype(np.float32)
183
+ dataset = TileFeatureTensorDataset(
184
+ site_type=SiteType.PRIMARY,
185
+ tile_features=features,
186
+ )
187
+ assert dataset.features.shape[1] == dim
tests/inference/test_paladin.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for mosaic.inference.paladin module."""
2
+
3
+ import csv
4
+ import tempfile
5
+ from pathlib import Path
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ import pytest
10
+
11
+ from mosaic.inference.paladin import (
12
+ UsageError,
13
+ load_aeon_scores,
14
+ load_model_map,
15
+ select_cancer_subtypes,
16
+ logits_to_point_estimates,
17
+ )
18
+ import torch
19
+
20
+
21
+ class TestLoadModelMap:
22
+ """Test load_model_map function."""
23
+
24
+ @pytest.fixture
25
+ def temp_model_map_csv(self):
26
+ """Create a temporary model map CSV file."""
27
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
28
+ writer = csv.writer(f)
29
+ writer.writerow(["cancer_subtype", "target_name", "model_path"])
30
+ writer.writerow(["LUAD", "PD-L1", "/path/to/luad_pdl1.pkl"])
31
+ writer.writerow(["LUAD", "EGFR", "/path/to/luad_egfr.pkl"])
32
+ writer.writerow(["BRCA", "HER2", "/path/to/brca_her2.pkl"])
33
+ writer.writerow(["COAD", "MSI_TYPE", "/path/to/coad_msi.pkl"])
34
+ temp_path = f.name
35
+ yield temp_path
36
+ Path(temp_path).unlink()
37
+
38
+ def test_load_model_map_structure(self, temp_model_map_csv):
39
+ """Test that load_model_map returns correct structure."""
40
+ model_map = load_model_map(temp_model_map_csv)
41
+ assert isinstance(model_map, dict)
42
+ assert "LUAD" in model_map
43
+ assert "BRCA" in model_map
44
+ assert "COAD" in model_map
45
+
46
+ def test_load_model_map_nested_dict(self, temp_model_map_csv):
47
+ """Test that model_map contains nested dictionaries."""
48
+ model_map = load_model_map(temp_model_map_csv)
49
+ assert isinstance(model_map["LUAD"], dict)
50
+ assert "PD-L1" in model_map["LUAD"]
51
+ assert "EGFR" in model_map["LUAD"]
52
+
53
+ def test_load_model_map_values(self, temp_model_map_csv):
54
+ """Test that model_map contains correct values."""
55
+ model_map = load_model_map(temp_model_map_csv)
56
+ assert model_map["LUAD"]["PD-L1"] == "/path/to/luad_pdl1.pkl"
57
+ assert model_map["LUAD"]["EGFR"] == "/path/to/luad_egfr.pkl"
58
+ assert model_map["BRCA"]["HER2"] == "/path/to/brca_her2.pkl"
59
+ assert model_map["COAD"]["MSI_TYPE"] == "/path/to/coad_msi.pkl"
60
+
61
+ def test_load_model_map_multiple_targets_per_subtype(self, temp_model_map_csv):
62
+ """Test that cancer subtypes can have multiple targets."""
63
+ model_map = load_model_map(temp_model_map_csv)
64
+ assert len(model_map["LUAD"]) == 2
65
+
66
+
67
+ class TestLoadAeonScores:
68
+ """Test load_aeon_scores function."""
69
+
70
+ @pytest.fixture
71
+ def sample_aeon_df(self):
72
+ """Create a sample Aeon results DataFrame."""
73
+ return pd.DataFrame(
74
+ {
75
+ "Cancer Subtype": ["LUAD", "BRCA", "COAD", "PRAD"],
76
+ "Confidence": [0.85, 0.10, 0.03, 0.02],
77
+ }
78
+ )
79
+
80
+ def test_load_aeon_scores_returns_dict(self, sample_aeon_df):
81
+ """Test that load_aeon_scores returns a dictionary."""
82
+ scores = load_aeon_scores(sample_aeon_df)
83
+ assert isinstance(scores, dict)
84
+
85
+ def test_load_aeon_scores_correct_mapping(self, sample_aeon_df):
86
+ """Test that scores are correctly mapped."""
87
+ scores = load_aeon_scores(sample_aeon_df)
88
+ assert scores["LUAD"] == 0.85
89
+ assert scores["BRCA"] == 0.10
90
+ assert scores["COAD"] == 0.03
91
+ assert scores["PRAD"] == 0.02
92
+
93
+ def test_load_aeon_scores_all_entries(self, sample_aeon_df):
94
+ """Test that all entries are loaded."""
95
+ scores = load_aeon_scores(sample_aeon_df)
96
+ assert len(scores) == 4
97
+
98
+ def test_load_aeon_scores_empty_dataframe(self):
99
+ """Test handling of empty DataFrame."""
100
+ empty_df = pd.DataFrame({"Cancer Subtype": [], "Confidence": []})
101
+ scores = load_aeon_scores(empty_df)
102
+ assert isinstance(scores, dict)
103
+ assert len(scores) == 0
104
+
105
+
106
+ class TestSelectCancerSubtypes:
107
+ """Test select_cancer_subtypes function."""
108
+
109
+ @pytest.fixture
110
+ def sample_scores(self):
111
+ """Create sample Aeon scores."""
112
+ return {
113
+ "LUAD": 0.85,
114
+ "BRCA": 0.10,
115
+ "COAD": 0.03,
116
+ "PRAD": 0.02,
117
+ }
118
+
119
+ def test_select_top_one_cancer_subtype(self, sample_scores):
120
+ """Test selecting the top cancer subtype."""
121
+ result = select_cancer_subtypes(sample_scores, k=1)
122
+ assert isinstance(result, list)
123
+ assert len(result) == 1
124
+ assert result[0] == "LUAD"
125
+
126
+ def test_select_top_three_cancer_subtypes(self, sample_scores):
127
+ """Test selecting the top three cancer subtypes."""
128
+ result = select_cancer_subtypes(sample_scores, k=3)
129
+ assert len(result) == 3
130
+ assert result[0] == "LUAD"
131
+ assert result[1] == "BRCA"
132
+ assert result[2] == "COAD"
133
+
134
+ def test_select_all_cancer_subtypes(self, sample_scores):
135
+ """Test selecting all cancer subtypes."""
136
+ result = select_cancer_subtypes(sample_scores, k=10)
137
+ assert len(result) == 4
138
+ assert result[0] == "LUAD"
139
+ assert result[-1] == "PRAD"
140
+
141
+ def test_select_default_k_value(self, sample_scores):
142
+ """Test that default k=1 is used."""
143
+ result = select_cancer_subtypes(sample_scores)
144
+ assert len(result) == 1
145
+ assert result[0] == "LUAD"
146
+
147
+ def test_select_with_empty_scores(self):
148
+ """Test handling of empty scores dictionary."""
149
+ result = select_cancer_subtypes({}, k=1)
150
+ assert isinstance(result, list)
151
+ assert len(result) == 0
152
+
153
+
154
+ class TestLogitsToPointEstimates:
155
+ """Test logits_to_point_estimates function."""
156
+
157
+ def test_logits_to_point_estimates_shape(self):
158
+ """Test that output shape is correct."""
159
+ # logits shape: (batch_size, 2 * n_tasks)
160
+ batch_size = 4
161
+ n_tasks = 5
162
+ logits = torch.rand(batch_size, 2 * n_tasks)
163
+ result = logits_to_point_estimates(logits)
164
+ assert result.shape == (batch_size, n_tasks)
165
+
166
+ def test_logits_to_point_estimates_values_in_range(self):
167
+ """Test that point estimates are in [0, 1] range."""
168
+ logits = torch.tensor([[1.0, 2.0, 3.0, 4.0], [0.5, 0.5, 1.0, 1.0]])
169
+ result = logits_to_point_estimates(logits)
170
+ assert torch.all(result >= 0.0)
171
+ assert torch.all(result <= 1.0)
172
+
173
+ def test_logits_to_point_estimates_calculation(self):
174
+ """Test that calculation is correct."""
175
+ logits = torch.tensor([[2.0, 4.0]]) # alpha=2, beta=4
176
+ result = logits_to_point_estimates(logits)
177
+ expected = 2.0 / (2.0 + 4.0)
178
+ assert torch.isclose(result[0, 0], torch.tensor(expected))
179
+
180
+ def test_logits_to_point_estimates_single_batch(self):
181
+ """Test with single batch."""
182
+ logits = torch.tensor([[1.0, 1.0, 2.0, 2.0, 3.0, 3.0]])
183
+ result = logits_to_point_estimates(logits)
184
+ assert result.shape == (1, 3)
185
+
186
+ def test_logits_to_point_estimates_multiple_batches(self):
187
+ """Test with multiple batches."""
188
+ logits = torch.rand(10, 8) # 10 batches, 4 tasks
189
+ result = logits_to_point_estimates(logits)
190
+ assert result.shape == (10, 4)
191
+
192
+
193
+ class TestUsageError:
194
+ """Test UsageError exception class."""
195
+
196
+ def test_usage_error_is_exception(self):
197
+ """Test that UsageError is an Exception."""
198
+ assert issubclass(UsageError, Exception)
199
+
200
+ def test_usage_error_can_be_raised(self):
201
+ """Test that UsageError can be raised."""
202
+ with pytest.raises(UsageError):
203
+ raise UsageError("Test error message")
204
+
205
+ def test_usage_error_message(self):
206
+ """Test that UsageError message is preserved."""
207
+ message = "Test error message"
208
+ with pytest.raises(UsageError, match=message):
209
+ raise UsageError(message)
tests/test_gradio_app.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for mosaic.gradio_app module."""
2
+
3
+ import tempfile
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ import pytest
8
+
9
+ from mosaic.gradio_app import (
10
+ IHC_SUBTYPES,
11
+ SETTINGS_COLUMNS,
12
+ load_settings,
13
+ validate_settings,
14
+ )
15
+
16
+
17
+ class TestConstants:
18
+ """Test constants in gradio_app."""
19
+
20
+ def test_ihc_subtypes_list(self):
21
+ """Test that IHC_SUBTYPES is a list."""
22
+ assert isinstance(IHC_SUBTYPES, list)
23
+
24
+ def test_ihc_subtypes_has_entries(self):
25
+ """Test that IHC_SUBTYPES has entries."""
26
+ assert len(IHC_SUBTYPES) > 0
27
+
28
+ def test_ihc_subtypes_contains_expected_values(self):
29
+ """Test that IHC_SUBTYPES contains expected breast cancer subtypes."""
30
+ expected_subtypes = ["HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
31
+ for subtype in expected_subtypes:
32
+ assert subtype in IHC_SUBTYPES
33
+
34
+ def test_ihc_subtypes_includes_empty_string(self):
35
+ """Test that IHC_SUBTYPES includes empty string for non-breast cancers."""
36
+ assert "" in IHC_SUBTYPES
37
+
38
+ def test_settings_columns_list(self):
39
+ """Test that SETTINGS_COLUMNS is a list."""
40
+ assert isinstance(SETTINGS_COLUMNS, list)
41
+
42
+ def test_settings_columns_required_fields(self):
43
+ """Test that SETTINGS_COLUMNS contains required fields."""
44
+ required_fields = [
45
+ "Slide",
46
+ "Site Type",
47
+ "Cancer Subtype",
48
+ "IHC Subtype",
49
+ "Segmentation Config",
50
+ ]
51
+ for field in required_fields:
52
+ assert field in SETTINGS_COLUMNS
53
+
54
+
55
+ class TestLoadSettings:
56
+ """Test load_settings function."""
57
+
58
+ @pytest.fixture
59
+ def temp_settings_csv(self):
60
+ """Create a temporary settings CSV file with all columns."""
61
+ with tempfile.NamedTemporaryFile(
62
+ mode="w", delete=False, suffix=".csv"
63
+ ) as f:
64
+ f.write("Slide,Site Type,Cancer Subtype,IHC Subtype,Segmentation Config\n")
65
+ f.write("slide1.svs,Primary,Unknown,,Biopsy\n")
66
+ f.write("slide2.svs,Metastatic,Unknown,,Resection\n")
67
+ temp_path = f.name
68
+ yield temp_path
69
+ Path(temp_path).unlink()
70
+
71
+ @pytest.fixture
72
+ def temp_minimal_settings_csv(self):
73
+ """Create a temporary settings CSV file with minimal columns."""
74
+ with tempfile.NamedTemporaryFile(
75
+ mode="w", delete=False, suffix=".csv"
76
+ ) as f:
77
+ f.write("Slide,Site Type\n")
78
+ f.write("slide1.svs,Primary\n")
79
+ f.write("slide2.svs,Metastatic\n")
80
+ temp_path = f.name
81
+ yield temp_path
82
+ Path(temp_path).unlink()
83
+
84
+ def test_load_settings_returns_dataframe(self, temp_settings_csv):
85
+ """Test that load_settings returns a DataFrame."""
86
+ df = load_settings(temp_settings_csv)
87
+ assert isinstance(df, pd.DataFrame)
88
+
89
+ def test_load_settings_has_all_columns(self, temp_settings_csv):
90
+ """Test that all required columns are present."""
91
+ df = load_settings(temp_settings_csv)
92
+ for col in SETTINGS_COLUMNS:
93
+ assert col in df.columns
94
+
95
+ def test_load_settings_adds_missing_columns(self, temp_minimal_settings_csv):
96
+ """Test that missing columns are added with defaults."""
97
+ df = load_settings(temp_minimal_settings_csv)
98
+ assert "Segmentation Config" in df.columns
99
+ assert "Cancer Subtype" in df.columns
100
+ assert "IHC Subtype" in df.columns
101
+ assert df["Segmentation Config"].iloc[0] == "Biopsy"
102
+ assert df["Cancer Subtype"].iloc[0] == "Unknown"
103
+ assert df["IHC Subtype"].iloc[0] == ""
104
+
105
+ def test_load_settings_preserves_data(self, temp_settings_csv):
106
+ """Test that data is preserved correctly."""
107
+ df = load_settings(temp_settings_csv)
108
+ assert len(df) == 2
109
+ assert df["Slide"].iloc[0] == "slide1.svs"
110
+ assert df["Site Type"].iloc[0] == "Primary"
111
+
112
+ def test_load_settings_missing_required_column_raises_error(self):
113
+ """Test that missing required column raises ValueError."""
114
+ with tempfile.NamedTemporaryFile(
115
+ mode="w", delete=False, suffix=".csv"
116
+ ) as f:
117
+ f.write("RandomColumn\n")
118
+ f.write("value\n")
119
+ temp_path = f.name
120
+
121
+ try:
122
+ with pytest.raises(ValueError, match="Missing required column"):
123
+ load_settings(temp_path)
124
+ finally:
125
+ Path(temp_path).unlink()
126
+
127
+ def test_load_settings_filters_to_settings_columns(self, temp_settings_csv):
128
+ """Test that only SETTINGS_COLUMNS are returned."""
129
+ df = load_settings(temp_settings_csv)
130
+ assert list(df.columns) == SETTINGS_COLUMNS
131
+
132
+
133
+ class TestGetOncotreeCodeName:
134
+ """Test get_oncotree_code_name function."""
135
+
136
+ def test_oncotree_code_name_caching(self, mocker):
137
+ """Test that oncotree code names are cached."""
138
+ from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
139
+
140
+ # Mock the requests.get call
141
+ mock_response = mocker.Mock()
142
+ mock_response.status_code = 200
143
+ mock_response.json.return_value = [{"name": "Lung Adenocarcinoma"}]
144
+ mocker.patch("requests.get", return_value=mock_response)
145
+
146
+ # Clear the cache
147
+ oncotree_code_map.clear()
148
+
149
+ # First call should populate cache
150
+ code = "LUAD"
151
+ result1 = get_oncotree_code_name(code)
152
+
153
+ # Cache should now contain the code
154
+ assert code in oncotree_code_map
155
+
156
+ # Second call should use cache
157
+ result2 = get_oncotree_code_name(code)
158
+ assert result1 == result2
159
+
160
+ def test_oncotree_code_name_returns_string(self, mocker):
161
+ """Test that function returns a string."""
162
+ from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
163
+
164
+ # Mock the requests.get call
165
+ mock_response = mocker.Mock()
166
+ mock_response.status_code = 200
167
+ mock_response.json.return_value = [{"name": "Lung Adenocarcinoma"}]
168
+ mocker.patch("requests.get", return_value=mock_response)
169
+
170
+ # Clear cache first
171
+ oncotree_code_map.clear()
172
+
173
+ result = get_oncotree_code_name("LUAD")
174
+ assert isinstance(result, str)
175
+
176
+ def test_oncotree_invalid_code_returns_unknown(self, mocker):
177
+ """Test that invalid code returns 'Unknown'."""
178
+ from mosaic.gradio_app import get_oncotree_code_name, oncotree_code_map
179
+
180
+ # Mock the requests.get call to return 404
181
+ mock_response = mocker.Mock()
182
+ mock_response.status_code = 404
183
+ mock_response.json.return_value = []
184
+ mocker.patch("requests.get", return_value=mock_response)
185
+
186
+ # Clear cache and use an invalid code
187
+ oncotree_code_map.clear()
188
+ result = get_oncotree_code_name("INVALID_CODE_XYZ123")
189
+ assert result == "Unknown"
190
+
191
+
192
+ class TestExportToCsv:
193
+ """Test export_to_csv function."""
194
+
195
+ def test_export_to_csv_returns_path(self):
196
+ """Test that export_to_csv returns a file path."""
197
+ from mosaic.gradio_app import export_to_csv
198
+
199
+ df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
200
+ result = export_to_csv(df)
201
+ assert isinstance(result, str)
202
+ assert result.endswith(".csv")
203
+ # Clean up
204
+ Path(result).unlink(missing_ok=True)
205
+
206
+ def test_export_to_csv_creates_file(self):
207
+ """Test that export_to_csv creates a CSV file."""
208
+ from mosaic.gradio_app import export_to_csv
209
+
210
+ df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
211
+ result = export_to_csv(df)
212
+ assert Path(result).exists()
213
+ # Clean up
214
+ Path(result).unlink()
215
+
216
+ def test_export_to_csv_with_empty_dataframe_raises_error(self):
217
+ """Test that exporting empty DataFrame raises error."""
218
+ from mosaic.gradio_app import export_to_csv
219
+ import gradio as gr
220
+
221
+ df = pd.DataFrame()
222
+ with pytest.raises(gr.Error):
223
+ export_to_csv(df)
224
+
225
+ def test_export_to_csv_with_none_raises_error(self):
226
+ """Test that exporting None raises error."""
227
+ from mosaic.gradio_app import export_to_csv
228
+ import gradio as gr
229
+
230
+ with pytest.raises(gr.Error):
231
+ export_to_csv(None)