raylim commited on
Commit
4780d8d
·
unverified ·
1 Parent(s): c2c8715

Add GitHub Actions workflows and comprehensive test suite

Browse files

- Add CI/CD workflows for tests, code quality, and Docker builds
- Add comprehensive Makefile with test, lint, format, and Docker targets
- Add new test files for CLI, fixtures, UI components, and UI events
- Refactor batch analysis into main analysis module
- Update model manager and inference modules
- Add .dockerignore for cleaner Docker builds
- Update dependencies in uv.lock

.dockerignore ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ ENV/
16
+ env/
17
+
18
+ # Testing
19
+ .pytest_cache/
20
+ .coverage
21
+ htmlcov/
22
+ .tox/
23
+ *.cover
24
+
25
+ # IDE
26
+ .vscode/
27
+ .idea/
28
+ *.swp
29
+ *.swo
30
+ *~
31
+
32
+ # Git
33
+ .git/
34
+ .gitignore
35
+ .gitattributes
36
+
37
+ # CI/CD
38
+ .github/
39
+ .gitlab-ci.yml
40
+
41
+ # Documentation
42
+ docs/
43
+ *.md
44
+ !README.md
45
+
46
+ # Data and outputs
47
+ data/
48
+ output/
49
+ *.svs
50
+ *.tiff
51
+ *.tif
52
+ *.png
53
+ *.jpg
54
+ *.jpeg
55
+
56
+ # Logs
57
+ *.log
58
+ logs/
59
+
60
+ # OS
61
+ .DS_Store
62
+ Thumbs.db
63
+
64
+ # Project specific
65
+ tests/
66
+ *.csv
67
+ profile.stats
68
+ benchmark_output/
69
+ profile_output/
70
+
71
+ # Lock files (we use uv.lock)
72
+ poetry.lock
73
+ Pipfile.lock
74
+ requirements*.txt
75
+
76
+ # Makefile and CI configs
77
+ Makefile
78
+ .dockerignore
79
+ Dockerfile*
.github/workflows/code-quality.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Code Quality
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, dev ]
6
+ pull_request:
7
+ branches: [ main, dev ]
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ format-check:
12
+ name: Check Code Formatting
13
+ runs-on: ubuntu-latest
14
+
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.10"
23
+
24
+ - name: Install uv
25
+ uses: astral-sh/setup-uv@v4
26
+ with:
27
+ enable-cache: true
28
+
29
+ - name: Install dependencies
30
+ run: |
31
+ uv sync
32
+
33
+ - name: Check formatting with black
34
+ run: |
35
+ make format-check
36
+
37
+ - name: Format Summary
38
+ if: always()
39
+ run: |
40
+ echo "## Code Formatting :art:" >> $GITHUB_STEP_SUMMARY
41
+ echo "" >> $GITHUB_STEP_SUMMARY
42
+ echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
43
+ if [ "${{ job.status }}" == "failure" ]; then
44
+ echo "" >> $GITHUB_STEP_SUMMARY
45
+ echo "Run \`make format\` to auto-fix formatting issues." >> $GITHUB_STEP_SUMMARY
46
+ fi
47
+
48
+ lint:
49
+ name: Lint Code
50
+ runs-on: ubuntu-latest
51
+
52
+ steps:
53
+ - name: Checkout code
54
+ uses: actions/checkout@v4
55
+
56
+ - name: Set up Python
57
+ uses: actions/setup-python@v5
58
+ with:
59
+ python-version: "3.10"
60
+
61
+ - name: Install uv
62
+ uses: astral-sh/setup-uv@v4
63
+ with:
64
+ enable-cache: true
65
+
66
+ - name: Install dependencies
67
+ run: |
68
+ uv sync
69
+
70
+ - name: Lint with pylint
71
+ run: |
72
+ make lint
73
+ continue-on-error: true # Don't fail CI on pylint warnings
74
+
75
+ - name: Lint Summary
76
+ if: always()
77
+ run: |
78
+ echo "## Linting Results :mag:" >> $GITHUB_STEP_SUMMARY
79
+ echo "" >> $GITHUB_STEP_SUMMARY
80
+ echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
.github/workflows/docker.yml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Docker Build
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, dev ]
6
+ tags:
7
+ - 'v*'
8
+ pull_request:
9
+ branches: [ main ]
10
+ workflow_dispatch:
11
+
12
+ env:
13
+ REGISTRY: ghcr.io
14
+ IMAGE_NAME: ${{ github.repository }}
15
+
16
+ jobs:
17
+ build:
18
+ name: Build Docker Image
19
+ runs-on: ubuntu-latest
20
+ permissions:
21
+ contents: read
22
+ packages: write
23
+
24
+ steps:
25
+ - name: Checkout code
26
+ uses: actions/checkout@v4
27
+
28
+ - name: Set up Docker Buildx
29
+ uses: docker/setup-buildx-action@v3
30
+
31
+ - name: Log in to Container Registry
32
+ if: github.event_name != 'pull_request'
33
+ uses: docker/login-action@v3
34
+ with:
35
+ registry: ${{ env.REGISTRY }}
36
+ username: ${{ github.actor }}
37
+ password: ${{ secrets.GITHUB_TOKEN }}
38
+
39
+ - name: Extract metadata
40
+ id: meta
41
+ uses: docker/metadata-action@v5
42
+ with:
43
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
44
+ tags: |
45
+ type=ref,event=branch
46
+ type=ref,event=pr
47
+ type=semver,pattern={{version}}
48
+ type=semver,pattern={{major}}.{{minor}}
49
+ type=sha,prefix={{branch}}-
50
+
51
+ - name: Build and push Docker image
52
+ uses: docker/build-push-action@v5
53
+ with:
54
+ context: .
55
+ push: ${{ github.event_name != 'pull_request' }}
56
+ tags: ${{ steps.meta.outputs.tags }}
57
+ labels: ${{ steps.meta.outputs.labels }}
58
+ cache-from: type=gha
59
+ cache-to: type=gha,mode=max
60
+ secret-files: |
61
+ "github_token=${{ secrets.GITHUB_TOKEN }}"
62
+
63
+ - name: Docker Summary
64
+ run: |
65
+ echo "## Docker Build :whale:" >> $GITHUB_STEP_SUMMARY
66
+ echo "" >> $GITHUB_STEP_SUMMARY
67
+ echo "Registry: ${{ env.REGISTRY }}" >> $GITHUB_STEP_SUMMARY
68
+ echo "Image: ${{ env.IMAGE_NAME }}" >> $GITHUB_STEP_SUMMARY
69
+ echo "" >> $GITHUB_STEP_SUMMARY
70
+ echo "### Tags" >> $GITHUB_STEP_SUMMARY
71
+ echo '```' >> $GITHUB_STEP_SUMMARY
72
+ echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
73
+ echo '```' >> $GITHUB_STEP_SUMMARY
.github/workflows/tests.yml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches: [ main, dev ]
6
+ pull_request:
7
+ branches: [ main, dev ]
8
+ workflow_dispatch: # Allow manual trigger
9
+
10
+ jobs:
11
+ test:
12
+ name: Run Tests (Python ${{ matrix.python-version }})
13
+ runs-on: ubuntu-latest
14
+ strategy:
15
+ fail-fast: false
16
+ matrix:
17
+ python-version: ["3.10", "3.11"]
18
+
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+ with:
23
+ fetch-depth: 0 # Full history for better coverage reports
24
+
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install uv
31
+ uses: astral-sh/setup-uv@v4
32
+ with:
33
+ enable-cache: true
34
+ cache-dependency-glob: "uv.lock"
35
+
36
+ - name: Install dependencies
37
+ run: |
38
+ uv sync
39
+
40
+ - name: Run tests with coverage
41
+ run: |
42
+ make test-coverage
43
+
44
+ - name: Generate coverage badge
45
+ if: matrix.python-version == '3.10'
46
+ run: |
47
+ COVERAGE=$(uv run coverage report | grep TOTAL | awk '{print $NF}' | sed 's/%//')
48
+ echo "COVERAGE=$COVERAGE" >> $GITHUB_ENV
49
+ echo "Coverage: $COVERAGE%"
50
+
51
+ - name: Upload coverage reports to Codecov
52
+ if: matrix.python-version == '3.10'
53
+ uses: codecov/codecov-action@v4
54
+ with:
55
+ file: ./coverage.xml
56
+ fail_ci_if_error: false
57
+ token: ${{ secrets.CODECOV_TOKEN }}
58
+ continue-on-error: true
59
+
60
+ - name: Upload coverage HTML report
61
+ if: matrix.python-version == '3.10'
62
+ uses: actions/upload-artifact@v4
63
+ with:
64
+ name: coverage-report
65
+ path: htmlcov/
66
+ retention-days: 30
67
+
68
+ - name: Test Summary
69
+ if: always()
70
+ run: |
71
+ echo "## Test Results :test_tube:" >> $GITHUB_STEP_SUMMARY
72
+ echo "" >> $GITHUB_STEP_SUMMARY
73
+ echo "Python Version: ${{ matrix.python-version }}" >> $GITHUB_STEP_SUMMARY
74
+ echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
MAKEFILE_QUICK_REF.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Makefile Quick Reference
2
+
3
+ ## Most Common Commands
4
+
5
+ ```bash
6
+ # Setup
7
+ make install-dev # Install dev dependencies
8
+ make help # Show all available commands
9
+
10
+ # Testing
11
+ make test # Run tests with coverage
12
+ make test-fast # Run tests quickly (no coverage)
13
+ make test-ui # Test UI components only
14
+ make test-cli # Test CLI only
15
+
16
+ # Code Quality
17
+ make format # Format code with black
18
+ make format-check # Check formatting
19
+ make quality # Run all quality checks
20
+
21
+ # Running
22
+ make run-ui # Launch web interface
23
+ make run-single SLIDE=x.svs OUTPUT=out/ # Process single slide
24
+ make run-batch CSV=s.csv OUTPUT=out/ # Process batch
25
+
26
+ # Docker
27
+ make docker-build # Build image
28
+ make docker-run # Run web UI in container
29
+ make docker-shell # Shell into container
30
+
31
+ # Cleanup
32
+ make clean # Remove cache files
33
+ make clean-all # Remove everything
34
+ ```
35
+
36
+ ## Development Workflow
37
+
38
+ ```bash
39
+ # 1. Initial setup
40
+ make install-dev
41
+
42
+ # 2. Make changes to code
43
+ # ... edit files ...
44
+
45
+ # 3. Format and test
46
+ make format
47
+ make test
48
+
49
+ # 4. Before committing
50
+ make quality
51
+ make test-coverage
52
+
53
+ # 5. Optional: Install pre-commit hooks
54
+ make pre-commit-install
55
+ ```
56
+
57
+ ## Docker Workflow
58
+
59
+ ```bash
60
+ # Build and test locally
61
+ make docker-build
62
+ make docker-run
63
+
64
+ # Process slides with Docker
65
+ make docker-run-single SLIDE=my_slide.svs
66
+ make docker-run-batch CSV=settings.csv
67
+
68
+ # Push to registry
69
+ make docker-tag DOCKER_REGISTRY=myregistry.com/user
70
+ make docker-push DOCKER_REGISTRY=myregistry.com/user
71
+ ```
72
+
73
+ ## CI/CD
74
+
75
+ ```bash
76
+ # Run all CI checks
77
+ make ci-test # Tests + format check (fast)
78
+ make ci-test-strict # Tests + format check + pylint (slow)
79
+ make ci-docker # Build Docker for CI
80
+ ```
81
+
82
+ ## Tips
83
+
84
+ - Use `make help` to see all available commands
85
+ - Use `make test-specific TEST=path/to/test` for debugging
86
+ - Use `make test-verbose` to see print statements
87
+ - Use `make info` to see project information
88
+ - Set environment variables to customize Docker:
89
+ ```bash
90
+ export DOCKER_REGISTRY=myregistry.com/user
91
+ export DOCKER_TAG=v1.0.0
92
+ make docker-build
93
+ ```
MAKEFILE_USAGE.md ADDED
@@ -0,0 +1,459 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Makefile Usage Guide
2
+
3
+ This document provides detailed information about the Makefile targets available in the Mosaic project.
4
+
5
+ ## Quick Start
6
+
7
+ ```bash
8
+ # See all available commands
9
+ make help
10
+
11
+ # Setup development environment
12
+ make install-dev
13
+
14
+ # Run tests
15
+ make test
16
+
17
+ # Launch web interface
18
+ make run-ui
19
+ ```
20
+
21
+ ## Development Setup
22
+
23
+ ### `make install`
24
+ Install production dependencies only (no dev tools).
25
+ ```bash
26
+ make install
27
+ ```
28
+
29
+ ### `make install-dev`
30
+ Install all dependencies including development tools (pytest, ruff, etc.).
31
+ ```bash
32
+ make install-dev
33
+ ```
34
+
35
+ ## Testing
36
+
37
+ ### `make test`
38
+ Run full test suite with coverage reporting.
39
+ ```bash
40
+ make test
41
+ ```
42
+
43
+ ### `make test-fast`
44
+ Run tests without coverage (faster execution).
45
+ ```bash
46
+ make test-fast
47
+ ```
48
+
49
+ ### `make test-coverage`
50
+ Run tests with detailed coverage report (terminal + HTML).
51
+ ```bash
52
+ make test-coverage
53
+ # View HTML report at: htmlcov/index.html
54
+ ```
55
+
56
+ ### `make test-ui`
57
+ Run only UI-related tests.
58
+ ```bash
59
+ make test-ui
60
+ ```
61
+
62
+ ### `make test-cli`
63
+ Run only CLI-related tests.
64
+ ```bash
65
+ make test-cli
66
+ ```
67
+
68
+ ### `make test-verbose`
69
+ Run tests with verbose output and show print statements.
70
+ ```bash
71
+ make test-verbose
72
+ ```
73
+
74
+ ### `make test-specific`
75
+ Run a specific test file, class, or method.
76
+ ```bash
77
+ # Run specific test file
78
+ make test-specific TEST=tests/test_cli.py
79
+
80
+ # Run specific test class
81
+ make test-specific TEST=tests/test_cli.py::TestArgumentParsing
82
+
83
+ # Run specific test method
84
+ make test-specific TEST=tests/test_cli.py::TestArgumentParsing::test_no_arguments_launches_web_interface
85
+ ```
86
+
87
+ ## Code Quality
88
+
89
+ ### `make lint`
90
+ Check code for linting issues using pylint (src only for speed).
91
+ ```bash
92
+ make lint
93
+ ```
94
+
95
+ ### `make lint-strict`
96
+ Run pylint on both src and tests (slower but comprehensive).
97
+ ```bash
98
+ make lint-strict
99
+ ```
100
+
101
+ ### `make format`
102
+ Format code using black formatter.
103
+ ```bash
104
+ make format
105
+ ```
106
+
107
+ ### `make format-check`
108
+ Check if code is properly formatted without making changes.
109
+ ```bash
110
+ make format-check
111
+ ```
112
+
113
+ ### `make quality`
114
+ Run all code quality checks (format-check + lint).
115
+ ```bash
116
+ make quality
117
+ ```
118
+
119
+ ## Running the Application
120
+
121
+ ### `make run-ui`
122
+ Launch the Gradio web interface locally.
123
+ ```bash
124
+ make run-ui
125
+ # Open browser to http://localhost:7860
126
+ ```
127
+
128
+ ### `make run-ui-public`
129
+ Launch Gradio web interface with public sharing enabled.
130
+ ```bash
131
+ make run-ui-public
132
+ # Returns a public gradio.app URL for sharing
133
+ ```
134
+
135
+ ### `make run-single`
136
+ Process a single slide from the command line.
137
+ ```bash
138
+ make run-single SLIDE=data/my_slide.svs OUTPUT=output/
139
+ ```
140
+
141
+ ### `make run-batch`
142
+ Process multiple slides from a CSV file.
143
+ ```bash
144
+ make run-batch CSV=data/settings.csv OUTPUT=output/
145
+ ```
146
+
147
+ ## Docker
148
+
149
+ ### `make docker-build`
150
+ Build Docker image for Mosaic.
151
+ ```bash
152
+ make docker-build
153
+
154
+ # Build with custom tag
155
+ make docker-build DOCKER_TAG=v1.0.0
156
+
157
+ # Build with custom image name
158
+ make docker-build DOCKER_IMAGE_NAME=my-mosaic DOCKER_TAG=latest
159
+ ```
160
+
161
+ ### `make docker-build-no-cache`
162
+ Build Docker image without using cache (useful for clean builds).
163
+ ```bash
164
+ make docker-build-no-cache
165
+ ```
166
+
167
+ ### `make docker-run`
168
+ Run Docker container in web UI mode.
169
+ ```bash
170
+ make docker-run
171
+ # Access at http://localhost:7860
172
+ ```
173
+
174
+ ### `make docker-run-single`
175
+ Run Docker container to process a single slide.
176
+ ```bash
177
+ # Place your slide in ./data directory first
178
+ make docker-run-single SLIDE=my_slide.svs
179
+ # Results will be in ./output directory
180
+ ```
181
+
182
+ ### `make docker-run-batch`
183
+ Run Docker container for batch processing.
184
+ ```bash
185
+ # Place CSV and slides in ./data directory
186
+ make docker-run-batch CSV=settings.csv
187
+ # Results will be in ./output directory
188
+ ```
189
+
190
+ ### `make docker-shell`
191
+ Open an interactive shell inside the Docker container.
192
+ ```bash
193
+ make docker-shell
194
+ ```
195
+
196
+ ### `make docker-tag`
197
+ Tag Docker image for pushing to a registry.
198
+ ```bash
199
+ make docker-tag DOCKER_REGISTRY=docker.io/myusername
200
+ ```
201
+
202
+ ### `make docker-push`
203
+ Push Docker image to registry.
204
+ ```bash
205
+ # Set your registry first
206
+ make docker-push DOCKER_REGISTRY=docker.io/myusername DOCKER_TAG=latest
207
+ ```
208
+
209
+ ### `make docker-clean`
210
+ Remove local Docker image.
211
+ ```bash
212
+ make docker-clean
213
+ ```
214
+
215
+ ### `make docker-prune`
216
+ Clean up Docker build cache to free space.
217
+ ```bash
218
+ make docker-prune
219
+ ```
220
+
221
+ ## Cleanup
222
+
223
+ ### `make clean`
224
+ Remove Python cache files and build artifacts.
225
+ ```bash
226
+ make clean
227
+ ```
228
+
229
+ ### `make clean-outputs`
230
+ Remove generated output files (masks, CSVs).
231
+ ```bash
232
+ make clean-outputs
233
+ ```
234
+
235
+ ### `make clean-all`
236
+ Remove all artifacts, cache, and Docker images.
237
+ ```bash
238
+ make clean-all
239
+ ```
240
+
241
+ ## Model Management
242
+
243
+ ### `make download-models`
244
+ Explicitly download required models from HuggingFace.
245
+ ```bash
246
+ make download-models
247
+ # Note: Models are automatically downloaded on first run
248
+ ```
249
+
250
+ ## CI/CD
251
+
252
+ ### `make ci-test`
253
+ Run complete CI test suite (install deps, test with coverage, lint).
254
+ ```bash
255
+ make ci-test
256
+ ```
257
+
258
+ ### `make ci-docker`
259
+ Build Docker image for CI pipeline.
260
+ ```bash
261
+ make ci-docker
262
+ ```
263
+
264
+ ## Development Utilities
265
+
266
+ ### `make shell`
267
+ Open Python shell with project in path.
268
+ ```bash
269
+ make shell
270
+ ```
271
+
272
+ ### `make ipython`
273
+ Open IPython shell with project in path.
274
+ ```bash
275
+ make ipython
276
+ ```
277
+
278
+ ### `make notebook`
279
+ Start Jupyter notebook server.
280
+ ```bash
281
+ make notebook
282
+ ```
283
+
284
+ ### `make check-deps`
285
+ Check for outdated dependencies.
286
+ ```bash
287
+ make check-deps
288
+ ```
289
+
290
+ ### `make update-deps`
291
+ Update all dependencies (use with caution).
292
+ ```bash
293
+ make update-deps
294
+ ```
295
+
296
+ ### `make lock`
297
+ Update uv.lock file.
298
+ ```bash
299
+ make lock
300
+ ```
301
+
302
+ ## Git Hooks
303
+
304
+ ### `make pre-commit-install`
305
+ Install pre-commit hooks that run lint, format-check, and test-fast before each commit.
306
+ ```bash
307
+ make pre-commit-install
308
+ ```
309
+
310
+ ### `make pre-commit-uninstall`
311
+ Remove pre-commit hooks.
312
+ ```bash
313
+ make pre-commit-uninstall
314
+ ```
315
+
316
+ ## Information
317
+
318
+ ### `make info`
319
+ Display project information and key commands.
320
+ ```bash
321
+ make info
322
+ ```
323
+
324
+ ### `make version`
325
+ Show version information.
326
+ ```bash
327
+ make version
328
+ ```
329
+
330
+ ### `make tree`
331
+ Show project directory structure (requires `tree` command).
332
+ ```bash
333
+ make tree
334
+ ```
335
+
336
+ ## Performance
337
+
338
+ ### `make profile`
339
+ Profile single slide analysis to identify performance bottlenecks.
340
+ ```bash
341
+ make profile SLIDE=tests/testdata/948176.svs
342
+ # Creates profile.stats file with profiling data
343
+ ```
344
+
345
+ ### `make benchmark`
346
+ Run performance benchmarks on test slide.
347
+ ```bash
348
+ make benchmark
349
+ # Times full analysis pipeline
350
+ ```
351
+
352
+ ## Common Workflows
353
+
354
+ ### Setting up for development
355
+ ```bash
356
+ # 1. Install dependencies
357
+ make install-dev
358
+
359
+ # 2. Run tests to ensure everything works
360
+ make test
361
+
362
+ # 3. Install pre-commit hooks
363
+ make pre-commit-install
364
+ ```
365
+
366
+ ### Before committing changes
367
+ ```bash
368
+ # Run quality checks
369
+ make quality
370
+
371
+ # Run tests
372
+ make test
373
+
374
+ # Clean up
375
+ make clean
376
+ ```
377
+
378
+ ### Preparing a release
379
+ ```bash
380
+ # Run full CI suite
381
+ make ci-test
382
+
383
+ # Build Docker image
384
+ make docker-build DOCKER_TAG=v1.0.0
385
+
386
+ # Test Docker image
387
+ make docker-run DOCKER_TAG=v1.0.0
388
+
389
+ # Push to registry
390
+ make docker-push DOCKER_REGISTRY=your-registry DOCKER_TAG=v1.0.0
391
+ ```
392
+
393
+ ### Processing slides
394
+ ```bash
395
+ # Web UI (recommended for exploration)
396
+ make run-ui
397
+
398
+ # Single slide (CLI)
399
+ make run-single SLIDE=data/sample.svs OUTPUT=results/
400
+
401
+ # Batch processing (CLI)
402
+ make run-batch CSV=data/batch_settings.csv OUTPUT=results/
403
+
404
+ # Using Docker
405
+ make docker-build
406
+ make docker-run-batch CSV=batch_settings.csv
407
+ ```
408
+
409
+ ## Customization
410
+
411
+ You can customize Makefile behavior by setting environment variables or editing the Makefile:
412
+
413
+ ```bash
414
+ # Custom Docker registry
415
+ export DOCKER_REGISTRY=my-registry.com/username
416
+
417
+ # Custom image name
418
+ export DOCKER_IMAGE_NAME=my-custom-mosaic
419
+
420
+ # Then use make commands as normal
421
+ make docker-build
422
+ make docker-push
423
+ ```
424
+
425
+ ## Troubleshooting
426
+
427
+ ### Tests fail
428
+ ```bash
429
+ # Run with verbose output
430
+ make test-verbose
431
+
432
+ # Run specific failing test
433
+ make test-specific TEST=tests/test_file.py::test_name
434
+ ```
435
+
436
+ ### Docker build fails
437
+ ```bash
438
+ # Build without cache
439
+ make docker-build-no-cache
440
+
441
+ # Check Docker logs
442
+ docker logs <container-id>
443
+ ```
444
+
445
+ ### Permission errors
446
+ ```bash
447
+ # Clean and rebuild
448
+ make clean-all
449
+ make install-dev
450
+ ```
451
+
452
+ ### Out of disk space
453
+ ```bash
454
+ # Clean Docker cache
455
+ make docker-prune
456
+
457
+ # Clean project artifacts
458
+ make clean
459
+ ```
Makefile ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-push docker-clean run-ui run-cli
2
+
3
+ # Default target
4
+ .DEFAULT_GOAL := help
5
+
6
+ # Variables
7
+ DOCKER_IMAGE_NAME := mosaic
8
+ DOCKER_TAG := latest
9
+ DOCKER_REGISTRY := # Set your registry here (e.g., docker.io/username)
10
+ PYTHON := uv run python
11
+ PYTEST := uv run pytest
12
+ BLACK := uv run black
13
+ PYLINT := uv run pylint
14
+
15
+ ##@ General
16
+
17
+ help: ## Display this help message
18
+ @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
19
+
20
+ ##@ Development Setup
21
+
22
+ install: ## Install production dependencies using uv
23
+ uv sync --no-dev
24
+
25
+ install-dev: ## Install development dependencies using uv
26
+ uv sync
27
+
28
+ ##@ Testing
29
+
30
+ test: ## Run all tests
31
+ $(PYTEST) tests/ -v
32
+
33
+ test-fast: ## Run tests without coverage (faster)
34
+ $(PYTEST) tests/ -v --no-cov
35
+
36
+ test-coverage: ## Run tests with detailed coverage report
37
+ $(PYTEST) tests/ -v --cov=src/mosaic --cov-report=term-missing --cov-report=html
38
+
39
+ test-ui: ## Run only UI tests
40
+ $(PYTEST) tests/test_ui_components.py tests/test_ui_events.py -v
41
+
42
+ test-cli: ## Run only CLI tests
43
+ $(PYTEST) tests/test_cli.py -v
44
+
45
+ test-verbose: ## Run tests with verbose output and show print statements
46
+ $(PYTEST) tests/ -vv -s
47
+
48
+ test-specific: ## Run specific test (usage: make test-specific TEST=tests/test_cli.py::TestClass::test_method)
49
+ $(PYTEST) $(TEST) -v
50
+
51
+ test-watch: ## Run tests in watch mode (requires pytest-watch)
52
+ $(PYTEST) tests/ --watch
53
+
54
+ ##@ Code Quality
55
+
56
+ lint: ## Run linting checks with pylint
57
+ $(PYLINT) src/mosaic/
58
+
59
+ lint-strict: ## Run pylint on both src and tests
60
+ $(PYLINT) src/mosaic/ tests/
61
+
62
+ format: ## Format code with black
63
+ $(BLACK) src/ tests/
64
+
65
+ format-check: ## Check code formatting without making changes
66
+ $(BLACK) --check src/ tests/
67
+
68
+ quality: format-check lint ## Run all code quality checks
69
+
70
+ ##@ Application
71
+
72
+ run-ui: ## Launch Gradio web interface
73
+ $(PYTHON) -m mosaic.gradio_app
74
+
75
+ run-ui-public: ## Launch Gradio web interface with public sharing
76
+ $(PYTHON) -m mosaic.gradio_app --share
77
+
78
+ run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir)
79
+ $(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT)
80
+
81
+ run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir)
82
+ $(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT)
83
+
84
+ ##@ Docker
85
+
86
+ docker-build: ## Build Docker image
87
+ docker build -t $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) .
88
+
89
+ docker-build-no-cache: ## Build Docker image without cache
90
+ docker build --no-cache -t $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) .
91
+
92
+ docker-run: ## Run Docker container (web UI mode)
93
+ docker run -it --rm \
94
+ --gpus all \
95
+ -p 7860:7860 \
96
+ -v $(PWD)/data:/app/data \
97
+ -v $(PWD)/output:/app/output \
98
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
99
+
100
+ docker-run-single: ## Run Docker container (single slide mode)
101
+ docker run -it --rm \
102
+ --gpus all \
103
+ -v $(PWD)/data:/app/data \
104
+ -v $(PWD)/output:/app/output \
105
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
106
+ --slide-path /app/data/$(SLIDE) \
107
+ --output-dir /app/output
108
+
109
+ docker-run-batch: ## Run Docker container (batch mode)
110
+ docker run -it --rm \
111
+ --gpus all \
112
+ -v $(PWD)/data:/app/data \
113
+ -v $(PWD)/output:/app/output \
114
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
115
+ --slide-csv /app/data/$(CSV) \
116
+ --output-dir /app/output
117
+
118
+ docker-shell: ## Open shell in Docker container
119
+ docker run -it --rm \
120
+ --gpus all \
121
+ -v $(PWD)/data:/app/data \
122
+ -v $(PWD)/output:/app/output \
123
+ $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
124
+ /bin/bash
125
+
126
+ docker-tag: ## Tag Docker image for registry
127
+ docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
128
+
129
+ docker-push: docker-tag ## Push Docker image to registry
130
+ docker push $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
131
+
132
+ docker-clean: ## Remove Docker image
133
+ docker rmi $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) || true
134
+
135
+ docker-prune: ## Clean up Docker build cache
136
+ docker system prune -f
137
+ docker builder prune -f
138
+
139
+ ##@ Cleanup
140
+
141
+ clean: ## Remove build artifacts and cache files
142
+ find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
143
+ find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
144
+ find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
145
+ find . -type d -name ".ruff_cache" -exec rm -rf {} + 2>/dev/null || true
146
+ find . -type f -name "*.pyc" -delete
147
+ find . -type f -name "*.pyo" -delete
148
+ find . -type f -name ".coverage" -delete
149
+ rm -rf htmlcov/
150
+ rm -rf dist/
151
+ rm -rf build/
152
+
153
+ clean-outputs: ## Remove output files (masks, results CSVs)
154
+ rm -rf output/*
155
+ @echo "Output directory cleaned"
156
+
157
+ clean-all: clean docker-clean ## Remove all build artifacts, cache, and Docker images
158
+
159
+ ##@ Model Management
160
+
161
+ download-models: ## Download required models from HuggingFace
162
+ @echo "Models will be downloaded automatically on first run"
163
+ $(PYTHON) -c "from mosaic.gradio_app import download_and_process_models; download_and_process_models()"
164
+
165
+ ##@ Documentation
166
+
167
+ docs-requirements: ## Show what needs to be documented
168
+ @echo "Documentation TODO:"
169
+ @echo " - API documentation"
170
+ @echo " - Model architecture details"
171
+ @echo " - CLI usage examples"
172
+ @echo " - Docker deployment guide"
173
+
174
+ ##@ CI/CD
175
+
176
+ ci-test: install-dev test-coverage format-check ## Run all CI checks (no lint to save time)
177
+ @echo "All CI checks passed!"
178
+
179
+ ci-test-strict: install-dev test-coverage format-check lint ## Run all CI checks including pylint
180
+ @echo "All strict CI checks passed!"
181
+
182
+ ci-docker: docker-build ## Build Docker image for CI
183
+ @echo "Docker image built successfully"
184
+
185
+ ##@ Development Utilities
186
+
187
+ shell: ## Open Python shell with project in path
188
+ $(PYTHON)
189
+
190
+ ipython: ## Open IPython shell with project in path
191
+ uv run ipython
192
+
193
+ notebook: ## Start Jupyter notebook server
194
+ uv run jupyter notebook
195
+
196
+ check-deps: ## Check for outdated dependencies
197
+ uv pip list --outdated
198
+
199
+ update-deps: ## Update dependencies (be careful!)
200
+ uv sync --upgrade
201
+
202
+ lock: ## Update lock file
203
+ uv lock
204
+
205
+ ##@ Git Hooks
206
+
207
+ pre-commit-install: ## Install pre-commit hooks
208
+ @echo "Setting up pre-commit hooks..."
209
+ @echo "#!/bin/sh" > .git/hooks/pre-commit
210
+ @echo "make format-check test-fast" >> .git/hooks/pre-commit
211
+ @chmod +x .git/hooks/pre-commit
212
+ @echo "Pre-commit hooks installed (format-check + test-fast)"
213
+
214
+ pre-commit-uninstall: ## Uninstall pre-commit hooks
215
+ rm -f .git/hooks/pre-commit
216
+ @echo "Pre-commit hooks uninstalled"
217
+
218
+ ##@ Information
219
+
220
+ info: ## Display project information
221
+ @echo "Mosaic - H&E Whole Slide Image Analysis"
222
+ @echo "========================================"
223
+ @echo ""
224
+ @echo "Python version:"
225
+ @$(PYTHON) --version
226
+ @echo ""
227
+ @echo "UV version:"
228
+ @uv --version
229
+ @echo ""
230
+ @echo "Project structure:"
231
+ @echo " src/mosaic/ - Main application code"
232
+ @echo " tests/ - Test suite"
233
+ @echo " data/ - Input data directory"
234
+ @echo " output/ - Analysis results"
235
+ @echo ""
236
+ @echo "Key commands:"
237
+ @echo " make install-dev - Setup development environment"
238
+ @echo " make test - Run test suite"
239
+ @echo " make run-ui - Launch web interface"
240
+ @echo " make docker-build - Build Docker image"
241
+
242
+ version: ## Show version information
243
+ @$(PYTHON) -c "import mosaic; print(f'Mosaic version: {mosaic.__version__}')" 2>/dev/null || echo "Version info not available"
244
+
245
+ tree: ## Show project directory tree (requires tree command)
246
+ @tree -L 3 -I '__pycache__|*.pyc|*.egg-info|.pytest_cache|.ruff_cache|htmlcov|.venv' . || echo "tree command not found. Install with: apt-get install tree"
247
+
248
+ ##@ Performance
249
+
250
+ profile: ## Profile a single slide analysis (usage: make profile SLIDE=path/to/slide.svs)
251
+ $(PYTHON) -m cProfile -o profile.stats -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir profile_output
252
+ $(PYTHON) -c "import pstats; p = pstats.Stats('profile.stats'); p.sort_stats('cumulative'); p.print_stats(20)"
253
+
254
+ benchmark: ## Run performance benchmarks
255
+ @echo "Running benchmark suite..."
256
+ @echo "This will process the test slide and measure performance"
257
+ time $(PYTHON) -m mosaic.gradio_app --slide-path tests/testdata/948176.svs --output-dir benchmark_output
src/mosaic/analysis.py CHANGED
@@ -26,8 +26,10 @@ except ImportError:
26
  return lambda f: f
27
  return fn
28
 
 
29
  # Detect T4 hardware by checking actual GPU
30
  import torch
 
31
  IS_T4_GPU = False
32
  GPU_NAME = "Unknown"
33
  if not IS_ZEROGPU and torch.cuda.is_available():
@@ -64,18 +66,21 @@ from mosaic.inference import run_aeon, run_paladin
64
  from mosaic.data_directory import get_data_directory
65
 
66
  # Log hardware detection at module load
67
- logger.info(f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}")
 
 
 
68
 
 
 
69
 
70
- def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
71
- """Extract CTransPath features on GPU.
72
-
73
  Args:
74
  coords: Tissue tile coordinates
75
  slide_path: Path to the whole slide image file
76
  attrs: Slide attributes
77
  num_workers: Number of worker processes
78
-
 
79
  Returns:
80
  tuple: (ctranspath_features, coords)
81
  """
@@ -86,87 +91,92 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
86
  elif IS_T4_GPU:
87
  num_workers = DEFAULT_NUM_WORKERS
88
  batch_size = DEFAULT_BATCH_SIZE
89
- logger.info(f"Running CTransPath on T4: processing {len(coords)} tiles with batch_size={batch_size}")
 
 
90
  else:
91
  num_workers = max(num_workers, 8)
92
  batch_size = 64
93
  logger.info(f"Running CTransPath with {num_workers} workers")
94
-
95
  start_time = pd.Timestamp.now()
96
 
97
- data_dir = get_data_directory()
98
  ctranspath_features, _ = get_features(
99
  coords,
100
  slide_path,
101
  attrs,
102
- model_type=ModelType.CTRANSPATH,
103
- model_path=str(data_dir / "ctranspath.pth"),
104
  num_workers=num_workers,
105
  batch_size=batch_size,
106
  use_gpu=True,
107
  )
108
-
109
  end_time = pd.Timestamp.now()
110
  logger.info(f"CTransPath extraction took {end_time - start_time}")
111
-
112
  return ctranspath_features, coords
113
 
114
 
115
- def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
116
- """Extract Optimus features on GPU.
117
-
118
  Args:
119
  filtered_coords: Filtered tissue tile coordinates
120
  slide_path: Path to the whole slide image file
121
  attrs: Slide attributes
122
  num_workers: Number of worker processes
123
-
 
124
  Returns:
125
  Optimus features
126
  """
127
  if IS_ZEROGPU:
128
  num_workers = 0
129
  batch_size = 128
130
- logger.info(f"Running Optimus on ZeroGPU: processing {len(filtered_coords)} tiles")
 
 
131
  elif IS_T4_GPU:
132
  num_workers = DEFAULT_NUM_WORKERS
133
  batch_size = DEFAULT_BATCH_SIZE
134
- logger.info(f"Running Optimus on T4: processing {len(filtered_coords)} tiles with batch_size={batch_size}")
 
 
135
  else:
136
  num_workers = max(num_workers, 8)
137
  batch_size = 64
138
  logger.info(f"Running Optimus with {num_workers} workers")
139
-
140
  start_time = pd.Timestamp.now()
141
 
142
- data_dir = get_data_directory()
143
  features, _ = get_features(
144
  filtered_coords,
145
  slide_path,
146
  attrs,
147
- model_type=ModelType.OPTIMUS,
148
- model_path=str(data_dir / "optimus.pkl"),
149
  num_workers=num_workers,
150
  batch_size=batch_size,
151
  use_gpu=True,
152
  )
153
-
154
  end_time = pd.Timestamp.now()
155
  logger.info(f"Optimus extraction took {end_time - start_time}")
156
-
157
  return features
158
 
159
 
160
- def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_idx=None):
 
 
161
  """Run Aeon cancer subtype inference on GPU.
162
-
163
  Args:
164
  features: Optimus features
165
  site_type: Site type ("Primary" or "Metastatic")
166
  num_workers: Number of worker processes
167
  sex: Patient sex (0=Male, 1=Female), optional
168
  tissue_site_idx: Tissue site index (0-56), optional
169
-
170
  Returns:
171
  Aeon results DataFrame
172
  """
@@ -179,7 +189,7 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
179
  else:
180
  num_workers = max(num_workers, 8)
181
  logger.info(f"Running Aeon with num_workers={num_workers}")
182
-
183
  start_time = pd.Timestamp.now()
184
  logger.info("Running Aeon for cancer subtype inference")
185
  data_dir = get_data_directory()
@@ -194,7 +204,7 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
194
  use_cpu=False,
195
  )
196
  end_time = pd.Timestamp.now()
197
-
198
  # Log memory stats if CUDA is available
199
  if torch.cuda.is_available():
200
  try:
@@ -207,19 +217,19 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
207
  logger.info(f"Aeon inference took {end_time - start_time}")
208
  else:
209
  logger.info(f"Aeon inference took {end_time - start_time}")
210
-
211
  return aeon_results
212
 
213
 
214
  def _run_paladin_inference(features, aeon_results, site_type, num_workers):
215
  """Run Paladin biomarker inference on GPU.
216
-
217
  Args:
218
  features: Optimus features
219
  aeon_results: Aeon results DataFrame
220
  site_type: Site type ("Primary" or "Metastatic")
221
  num_workers: Number of worker processes
222
-
223
  Returns:
224
  Paladin results DataFrame
225
  """
@@ -232,7 +242,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
232
  else:
233
  num_workers = max(num_workers, 8)
234
  logger.info(f"Running Paladin with num_workers={num_workers}")
235
-
236
  start_time = pd.Timestamp.now()
237
  logger.info("Running Paladin for biomarker inference")
238
  data_dir = get_data_directory()
@@ -246,7 +256,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
246
  use_cpu=False,
247
  )
248
  end_time = pd.Timestamp.now()
249
-
250
  # Log memory stats if CUDA is available
251
  if torch.cuda.is_available():
252
  try:
@@ -259,7 +269,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
259
  logger.info(f"Paladin inference took {end_time - start_time}")
260
  else:
261
  logger.info(f"Paladin inference took {end_time - start_time}")
262
-
263
  return paladin_results
264
 
265
 
@@ -278,8 +288,16 @@ def _run_inference_pipeline_free(
278
  ):
279
  """Run inference pipeline with 60s GPU limit (for free users)."""
280
  return _run_inference_pipeline_impl(
281
- coords, slide_path, attrs, site_type, sex, tissue_site_idx,
282
- cancer_subtype, cancer_subtype_name_map, num_workers, progress
 
 
 
 
 
 
 
 
283
  )
284
 
285
 
@@ -298,8 +316,16 @@ def _run_inference_pipeline_pro(
298
  ):
299
  """Run inference pipeline with 300s GPU limit (for PRO users)."""
300
  return _run_inference_pipeline_impl(
301
- coords, slide_path, attrs, site_type, sex, tissue_site_idx,
302
- cancer_subtype, cancer_subtype_name_map, num_workers, progress
 
 
 
 
 
 
 
 
303
  )
304
 
305
 
@@ -315,11 +341,10 @@ def _run_inference_pipeline_impl(
315
  num_workers,
316
  progress,
317
  ):
318
- """Run complete inference pipeline with separate GPU calls.
319
 
320
- This function orchestrates the GPU operations by calling separate functions
321
- for each GPU-intensive task, allowing HF Spaces to allocate GPU resources
322
- independently for each operation.
323
 
324
  Args:
325
  coords: Tissue tile coordinates
@@ -336,59 +361,84 @@ def _run_inference_pipeline_impl(
336
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
337
  - paladin_results: DataFrame with biomarker predictions
338
  """
339
- # Step 2: Extract CTransPath features
340
- progress(0.3, desc="Extracting CTransPath features")
341
- ctranspath_features, coords = _extract_ctranspath_features(
342
- coords, slide_path, attrs, num_workers
343
- )
344
 
345
- # Step 3: Filter features using marker classifier (CPU operation)
346
- start_time = pd.Timestamp.now()
347
- data_dir = get_data_directory()
348
- marker_classifier = pickle.load(open(data_dir / "marker_classifier.pkl", "rb"))
349
- progress(0.35, desc="Filtering features with marker classifier")
350
- logger.info("Filtering features with marker classifier")
351
- _, filtered_coords = filter_features(
352
- ctranspath_features,
353
- coords,
354
- marker_classifier,
355
- threshold=0.25,
356
- )
357
- end_time = pd.Timestamp.now()
358
- logger.info(f"Feature filtering took {end_time - start_time}")
359
- logger.info(
360
- f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
361
- )
362
 
363
- # Step 4: Extract Optimus features
364
- progress(0.4, desc="Extracting Optimus features")
365
- features = _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers)
 
 
 
366
 
367
- # Step 5: Run Aeon to predict histology if not supplied
368
- if cancer_subtype == "Unknown":
369
- progress(0.9, desc="Running Aeon for cancer subtype inference")
370
- aeon_results = _run_aeon_inference(features, site_type, num_workers, sex, tissue_site_idx)
371
- else:
372
- cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
373
- aeon_results = pd.DataFrame(
374
- {
375
- "Cancer Subtype": [cancer_subtype_code],
376
- "Confidence": [1.0],
377
- }
 
 
 
378
  )
379
- logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
380
 
381
- # Step 6: Run Paladin to predict biomarkers
382
- if len(aeon_results) == 0:
383
- logger.warning("No Aeon results, skipping Paladin inference")
384
- return None, None
385
-
386
- progress(0.95, desc="Running Paladin for biomarker inference")
387
- paladin_results = _run_paladin_inference(features, aeon_results, site_type, num_workers)
 
 
388
 
389
- aeon_results.set_index("Cancer Subtype", inplace=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
 
391
- return aeon_results, paladin_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
 
394
  # ============================================================================
@@ -531,11 +581,10 @@ def _run_inference_pipeline_with_models(
531
  Returns:
532
  Tuple of (aeon_results, paladin_results)
533
  """
534
- # Step 1: Extract CTransPath features (still uses mussel's get_features)
535
- # Note: Feature extraction optimization can be added later if needed
536
  progress(0.3, desc="Extracting CTransPath features")
537
  ctranspath_features, coords = _extract_ctranspath_features(
538
- coords, slide_path, attrs, num_workers
539
  )
540
 
541
  # Step 2: Filter features using pre-loaded marker classifier
@@ -554,9 +603,11 @@ def _run_inference_pipeline_with_models(
554
  f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
555
  )
556
 
557
- # Step 3: Extract Optimus features (still uses mussel's get_features)
558
  progress(0.5, desc="Extracting Optimus features")
559
- features = _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers)
 
 
560
 
561
  # Step 4: Run Aeon inference with pre-loaded model (if cancer subtype unknown)
562
  aeon_results = None
@@ -564,7 +615,9 @@ def _run_inference_pipeline_with_models(
564
 
565
  # Check if cancer subtype is unknown
566
  if cancer_subtype in ["Unknown", None]:
567
- logger.info("Running Aeon inference with PRE-LOADED model (cancer subtype unknown)")
 
 
568
  aeon_results = _run_aeon_inference_with_model(
569
  features,
570
  model_cache.aeon_model, # Use pre-loaded Aeon model
@@ -593,116 +646,7 @@ def _run_inference_pipeline_with_models(
593
  return aeon_results, paladin_results
594
 
595
 
596
- def analyze_slide_with_models(
597
- slide_path,
598
- seg_config,
599
- site_type,
600
- sex,
601
- tissue_site,
602
- cancer_subtype,
603
- cancer_subtype_name_map,
604
- model_cache,
605
- ihc_subtype="",
606
- num_workers=4,
607
- progress=None,
608
- ):
609
- """Analyze a slide using pre-loaded models (batch-optimized version).
610
-
611
- This function is optimized for batch processing where models are loaded once
612
- in a ModelCache and reused across multiple slides.
613
-
614
- Args:
615
- slide_path: Path to the slide file
616
- seg_config: Segmentation configuration ("Biopsy", "Resection", or "TCGA")
617
- site_type: "Primary" or "Metastatic"
618
- sex: Patient sex ("Unknown", "Male", "Female")
619
- tissue_site: Tissue site name
620
- cancer_subtype: Known cancer subtype or "Unknown"
621
- cancer_subtype_name_map: Dict mapping display names to OncoTree codes
622
- model_cache: ModelCache instance with pre-loaded models
623
- ihc_subtype: IHC subtype for breast cancer (optional)
624
- num_workers: Number of workers for data loading
625
- progress: Gradio progress tracker
626
-
627
- Returns:
628
- Tuple of (slide_mask, aeon_results, paladin_results)
629
- """
630
- from mosaic.inference.data import encode_sex, encode_tissue_site
631
-
632
- if progress is None:
633
- progress = lambda frac, desc: None # No-op progress function
634
-
635
- # Encode sex and tissue site
636
- sex_idx = encode_sex(sex) if sex else None
637
- tissue_site_idx = encode_tissue_site(tissue_site) if tissue_site else None
638
-
639
- # Step 1: Convert seg_config string to config object
640
- if isinstance(seg_config, str):
641
- if seg_config == "Biopsy":
642
- seg_config = BiopsySegConfig()
643
- elif seg_config == "Resection":
644
- seg_config = ResectionSegConfig()
645
- elif seg_config == "TCGA":
646
- seg_config = TcgaSegConfig()
647
- else:
648
- raise ValueError(f"Unknown segmentation configuration: {seg_config}")
649
-
650
- # Step 2: Tissue segmentation (CPU operation, not affected by model caching)
651
- progress(0.0, desc="Segmenting tissue")
652
- logger.info(f"Segmenting tissue for slide: {slide_path}")
653
- start_time = pd.Timestamp.now()
654
-
655
- if values := segment_tissue(
656
- slide_path=slide_path,
657
- patch_size=224,
658
- mpp=0.5,
659
- seg_level=-1,
660
- segment_threshold=seg_config.segment_threshold,
661
- median_blur_ksize=seg_config.median_blur_ksize,
662
- morphology_ex_kernel=seg_config.morphology_ex_kernel,
663
- tissue_area_threshold=seg_config.tissue_area_threshold,
664
- hole_area_threshold=seg_config.hole_area_threshold,
665
- max_num_holes=seg_config.max_num_holes,
666
- ):
667
- polygon, _, coords, attrs = values
668
- else:
669
- logger.warning("No tissue detected in slide")
670
- return None, None, None
671
-
672
- end_time = pd.Timestamp.now()
673
- logger.info(f"Tissue segmentation took {end_time - start_time}")
674
- logger.info(f"Found {len(coords)} tissue tiles")
675
-
676
- if len(coords) == 0:
677
- logger.warning("No tissue tiles found in slide")
678
- return None, None, None
679
-
680
- # Step 2: Create slide mask visualization (CPU operation)
681
- progress(0.2, desc="Creating slide mask")
682
- logger.info("Drawing slide mask")
683
- slide_mask = draw_slide_mask(
684
- slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
685
- )
686
- logger.info("Slide mask drawn")
687
-
688
- # Step 3: Run inference pipeline with pre-loaded models
689
- aeon_results, paladin_results = _run_inference_pipeline_with_models(
690
- coords,
691
- slide_path,
692
- attrs,
693
- site_type,
694
- sex_idx,
695
- tissue_site_idx,
696
- cancer_subtype,
697
- cancer_subtype_name_map,
698
- model_cache,
699
- num_workers,
700
- progress,
701
- )
702
-
703
- progress(1.0, desc="Analysis complete")
704
-
705
- return slide_mask, aeon_results, paladin_results
706
 
707
 
708
  def analyze_slide(
@@ -717,26 +661,27 @@ def analyze_slide(
717
  num_workers=4,
718
  progress=gr.Progress(track_tqdm=True),
719
  request: gr.Request = None,
 
720
  ):
721
  """Analyze a whole slide image for cancer subtype and biomarker prediction.
722
 
723
- This function performs a complete analysis pipeline including:
724
- 1. Tissue segmentation (CPU-only, no GPU required)
725
- 2. GPU-intensive feature extraction and model inference
726
-
727
- The GPU-intensive operations are handled by a separate function decorated
728
- with @spaces.GPU to efficiently manage GPU resources on Hugging Face Spaces.
729
- Tissue segmentation runs on CPU and is not included in the GPU allocation.
730
 
731
  Args:
732
  slide_path: Path to the whole slide image file
733
  seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
734
  site_type: Site type, either "Primary" or "Metastatic"
 
 
735
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
736
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
737
  ihc_subtype: IHC subtype for breast cancer (optional)
738
  num_workers: Number of worker processes for feature extraction
739
  progress: Gradio progress tracker for UI updates
 
 
740
 
741
  Returns:
742
  tuple: (slide_mask, aeon_results, paladin_results)
@@ -795,51 +740,6 @@ def analyze_slide(
795
  )
796
  logger.info("Slide mask drawn")
797
 
798
- # Step 2-6: Run inference pipeline with GPU
799
- # Check if user is logged in for longer GPU duration
800
- is_logged_in = False
801
- username = "anonymous"
802
- if request is not None:
803
- try:
804
- # Check if user is logged in via JWT token in referer
805
- # HF Spaces doesn't populate request.username but includes JWT in URL
806
- if hasattr(request, 'headers'):
807
- referer = request.headers.get('referer', '')
808
- if '__sign=' in referer:
809
- # Extract and decode JWT token
810
- import re
811
- import json
812
- import base64
813
-
814
- match = re.search(r'__sign=([^&]+)', referer)
815
- if match:
816
- token = match.group(1)
817
- try:
818
- # JWT format: header.payload.signature
819
- # We only need the payload (middle part)
820
- parts = token.split('.')
821
- if len(parts) == 3:
822
- # Decode base64 payload (add padding if needed)
823
- payload = parts[1]
824
- payload += '=' * (4 - len(payload) % 4)
825
- decoded = base64.urlsafe_b64decode(payload)
826
- token_data = json.loads(decoded)
827
-
828
- # Check if user is in token
829
- if 'onBehalfOf' in token_data and 'user' in token_data['onBehalfOf']:
830
- username = token_data['onBehalfOf']['user']
831
- is_logged_in = True
832
- logger.info(f"Found user in JWT token: {username}")
833
- except Exception as e:
834
- logger.warning(f"Failed to decode JWT: {e}")
835
-
836
- if IS_ZEROGPU:
837
- logger.info(f"User: {username} | Logged in: {is_logged_in}")
838
- except Exception as e:
839
- logger.warning(f"Failed to detect user: {e}")
840
- import traceback
841
- logger.warning(traceback.format_exc())
842
-
843
  # Convert sex and tissue_site to indices for Aeon model
844
  from mosaic.inference.data import encode_sex, encode_tissue_site
845
 
@@ -851,10 +751,11 @@ def analyze_slide(
851
  if tissue_site is not None:
852
  tissue_site_idx = encode_tissue_site(tissue_site)
853
 
854
- if is_logged_in:
855
- if IS_ZEROGPU:
856
- logger.info("Using 300s GPU allocation (logged-in user)")
857
- aeon_results, paladin_results = _run_inference_pipeline_pro(
 
858
  coords,
859
  slide_path,
860
  attrs,
@@ -863,23 +764,91 @@ def analyze_slide(
863
  tissue_site_idx,
864
  cancer_subtype,
865
  cancer_subtype_name_map,
 
866
  num_workers,
867
  progress,
868
  )
869
  else:
870
- if IS_ZEROGPU:
871
- logger.info("Using 60s GPU allocation (anonymous user)")
872
- aeon_results, paladin_results = _run_inference_pipeline_free(
873
- coords,
874
- slide_path,
875
- attrs,
876
- site_type,
877
- sex_idx,
878
- tissue_site_idx,
879
- cancer_subtype,
880
- cancer_subtype_name_map,
881
- num_workers,
882
- progress,
883
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
884
 
885
  return slide_mask, aeon_results, paladin_results
 
26
  return lambda f: f
27
  return fn
28
 
29
+
30
  # Detect T4 hardware by checking actual GPU
31
  import torch
32
+
33
  IS_T4_GPU = False
34
  GPU_NAME = "Unknown"
35
  if not IS_ZEROGPU and torch.cuda.is_available():
 
66
  from mosaic.data_directory import get_data_directory
67
 
68
  # Log hardware detection at module load
69
+ logger.info(
70
+ f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}"
71
+ )
72
+
73
 
74
+ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers, model):
75
+ """Extract CTransPath features on GPU using pre-loaded model.
76
 
 
 
 
77
  Args:
78
  coords: Tissue tile coordinates
79
  slide_path: Path to the whole slide image file
80
  attrs: Slide attributes
81
  num_workers: Number of worker processes
82
+ model: Pre-loaded CTransPath model from ModelCache
83
+
84
  Returns:
85
  tuple: (ctranspath_features, coords)
86
  """
 
91
  elif IS_T4_GPU:
92
  num_workers = DEFAULT_NUM_WORKERS
93
  batch_size = DEFAULT_BATCH_SIZE
94
+ logger.info(
95
+ f"Running CTransPath on T4: processing {len(coords)} tiles with batch_size={batch_size}"
96
+ )
97
  else:
98
  num_workers = max(num_workers, 8)
99
  batch_size = 64
100
  logger.info(f"Running CTransPath with {num_workers} workers")
101
+
102
  start_time = pd.Timestamp.now()
103
 
 
104
  ctranspath_features, _ = get_features(
105
  coords,
106
  slide_path,
107
  attrs,
108
+ model=model,
 
109
  num_workers=num_workers,
110
  batch_size=batch_size,
111
  use_gpu=True,
112
  )
113
+
114
  end_time = pd.Timestamp.now()
115
  logger.info(f"CTransPath extraction took {end_time - start_time}")
116
+
117
  return ctranspath_features, coords
118
 
119
 
120
+ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers, model):
121
+ """Extract Optimus features on GPU using pre-loaded model.
122
+
123
  Args:
124
  filtered_coords: Filtered tissue tile coordinates
125
  slide_path: Path to the whole slide image file
126
  attrs: Slide attributes
127
  num_workers: Number of worker processes
128
+ model: Pre-loaded Optimus model from ModelCache
129
+
130
  Returns:
131
  Optimus features
132
  """
133
  if IS_ZEROGPU:
134
  num_workers = 0
135
  batch_size = 128
136
+ logger.info(
137
+ f"Running Optimus on ZeroGPU: processing {len(filtered_coords)} tiles"
138
+ )
139
  elif IS_T4_GPU:
140
  num_workers = DEFAULT_NUM_WORKERS
141
  batch_size = DEFAULT_BATCH_SIZE
142
+ logger.info(
143
+ f"Running Optimus on T4: processing {len(filtered_coords)} tiles with batch_size={batch_size}"
144
+ )
145
  else:
146
  num_workers = max(num_workers, 8)
147
  batch_size = 64
148
  logger.info(f"Running Optimus with {num_workers} workers")
149
+
150
  start_time = pd.Timestamp.now()
151
 
 
152
  features, _ = get_features(
153
  filtered_coords,
154
  slide_path,
155
  attrs,
156
+ model=model,
 
157
  num_workers=num_workers,
158
  batch_size=batch_size,
159
  use_gpu=True,
160
  )
161
+
162
  end_time = pd.Timestamp.now()
163
  logger.info(f"Optimus extraction took {end_time - start_time}")
164
+
165
  return features
166
 
167
 
168
+ def _run_aeon_inference(
169
+ features, site_type, num_workers, sex=None, tissue_site_idx=None
170
+ ):
171
  """Run Aeon cancer subtype inference on GPU.
172
+
173
  Args:
174
  features: Optimus features
175
  site_type: Site type ("Primary" or "Metastatic")
176
  num_workers: Number of worker processes
177
  sex: Patient sex (0=Male, 1=Female), optional
178
  tissue_site_idx: Tissue site index (0-56), optional
179
+
180
  Returns:
181
  Aeon results DataFrame
182
  """
 
189
  else:
190
  num_workers = max(num_workers, 8)
191
  logger.info(f"Running Aeon with num_workers={num_workers}")
192
+
193
  start_time = pd.Timestamp.now()
194
  logger.info("Running Aeon for cancer subtype inference")
195
  data_dir = get_data_directory()
 
204
  use_cpu=False,
205
  )
206
  end_time = pd.Timestamp.now()
207
+
208
  # Log memory stats if CUDA is available
209
  if torch.cuda.is_available():
210
  try:
 
217
  logger.info(f"Aeon inference took {end_time - start_time}")
218
  else:
219
  logger.info(f"Aeon inference took {end_time - start_time}")
220
+
221
  return aeon_results
222
 
223
 
224
  def _run_paladin_inference(features, aeon_results, site_type, num_workers):
225
  """Run Paladin biomarker inference on GPU.
226
+
227
  Args:
228
  features: Optimus features
229
  aeon_results: Aeon results DataFrame
230
  site_type: Site type ("Primary" or "Metastatic")
231
  num_workers: Number of worker processes
232
+
233
  Returns:
234
  Paladin results DataFrame
235
  """
 
242
  else:
243
  num_workers = max(num_workers, 8)
244
  logger.info(f"Running Paladin with num_workers={num_workers}")
245
+
246
  start_time = pd.Timestamp.now()
247
  logger.info("Running Paladin for biomarker inference")
248
  data_dir = get_data_directory()
 
256
  use_cpu=False,
257
  )
258
  end_time = pd.Timestamp.now()
259
+
260
  # Log memory stats if CUDA is available
261
  if torch.cuda.is_available():
262
  try:
 
269
  logger.info(f"Paladin inference took {end_time - start_time}")
270
  else:
271
  logger.info(f"Paladin inference took {end_time - start_time}")
272
+
273
  return paladin_results
274
 
275
 
 
288
  ):
289
  """Run inference pipeline with 60s GPU limit (for free users)."""
290
  return _run_inference_pipeline_impl(
291
+ coords,
292
+ slide_path,
293
+ attrs,
294
+ site_type,
295
+ sex,
296
+ tissue_site_idx,
297
+ cancer_subtype,
298
+ cancer_subtype_name_map,
299
+ num_workers,
300
+ progress,
301
  )
302
 
303
 
 
316
  ):
317
  """Run inference pipeline with 300s GPU limit (for PRO users)."""
318
  return _run_inference_pipeline_impl(
319
+ coords,
320
+ slide_path,
321
+ attrs,
322
+ site_type,
323
+ sex,
324
+ tissue_site_idx,
325
+ cancer_subtype,
326
+ cancer_subtype_name_map,
327
+ num_workers,
328
+ progress,
329
  )
330
 
331
 
 
341
  num_workers,
342
  progress,
343
  ):
344
+ """Run complete inference pipeline using model cache.
345
 
346
+ This function loads models once and reuses them throughout the pipeline,
347
+ orchestrating GPU operations for feature extraction and inference.
 
348
 
349
  Args:
350
  coords: Tissue tile coordinates
 
361
  - aeon_results: DataFrame with cancer subtype predictions and confidence scores
362
  - paladin_results: DataFrame with biomarker predictions
363
  """
364
+ # Load all models once for the entire pipeline
365
+ from mosaic.model_manager import load_all_models
 
 
 
366
 
367
+ progress(0.1, desc="Loading models")
368
+ logger.info("Loading models for inference pipeline")
369
+ model_cache = load_all_models(use_gpu=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
+ try:
372
+ # Step 2: Extract CTransPath features using cached model
373
+ progress(0.3, desc="Extracting CTransPath features")
374
+ ctranspath_features, coords = _extract_ctranspath_features(
375
+ coords, slide_path, attrs, num_workers, model=model_cache.ctranspath_model
376
+ )
377
 
378
+ # Step 3: Filter features using cached marker classifier
379
+ start_time = pd.Timestamp.now()
380
+ progress(0.35, desc="Filtering features with marker classifier")
381
+ logger.info("Filtering features with marker classifier")
382
+ _, filtered_coords = filter_features(
383
+ ctranspath_features,
384
+ coords,
385
+ model_cache.marker_classifier,
386
+ threshold=0.25,
387
+ )
388
+ end_time = pd.Timestamp.now()
389
+ logger.info(f"Feature filtering took {end_time - start_time}")
390
+ logger.info(
391
+ f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
392
  )
 
393
 
394
+ # Step 4: Extract Optimus features using cached model
395
+ progress(0.4, desc="Extracting Optimus features")
396
+ features = _extract_optimus_features(
397
+ filtered_coords,
398
+ slide_path,
399
+ attrs,
400
+ num_workers,
401
+ model=model_cache.optimus_model,
402
+ )
403
 
404
+ # Step 5: Run Aeon to predict histology if not supplied
405
+ if cancer_subtype == "Unknown":
406
+ progress(0.9, desc="Running Aeon for cancer subtype inference")
407
+ aeon_results = _run_aeon_inference_with_model(
408
+ features,
409
+ model_cache.aeon_model,
410
+ model_cache.device,
411
+ site_type,
412
+ num_workers,
413
+ sex,
414
+ tissue_site_idx,
415
+ )
416
+ else:
417
+ cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
418
+ aeon_results = pd.DataFrame(
419
+ {
420
+ "Cancer Subtype": [cancer_subtype_code],
421
+ "Confidence": [1.0],
422
+ }
423
+ )
424
+ logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
425
 
426
+ # Step 6: Run Paladin to predict biomarkers
427
+ if len(aeon_results) == 0:
428
+ logger.warning("No Aeon results, skipping Paladin inference")
429
+ return None, None
430
+
431
+ progress(0.95, desc="Running Paladin for biomarker inference")
432
+ paladin_results = _run_paladin_inference_with_models(
433
+ features, aeon_results, site_type, model_cache, num_workers
434
+ )
435
+
436
+ aeon_results.set_index("Cancer Subtype", inplace=True)
437
+
438
+ return aeon_results, paladin_results
439
+ finally:
440
+ # Clean up models to free GPU memory
441
+ model_cache.cleanup()
442
 
443
 
444
  # ============================================================================
 
581
  Returns:
582
  Tuple of (aeon_results, paladin_results)
583
  """
584
+ # Step 1: Extract CTransPath features with PRE-LOADED model
 
585
  progress(0.3, desc="Extracting CTransPath features")
586
  ctranspath_features, coords = _extract_ctranspath_features(
587
+ coords, slide_path, attrs, num_workers, model=model_cache.ctranspath_model
588
  )
589
 
590
  # Step 2: Filter features using pre-loaded marker classifier
 
603
  f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
604
  )
605
 
606
+ # Step 3: Extract Optimus features with PRE-LOADED model
607
  progress(0.5, desc="Extracting Optimus features")
608
+ features = _extract_optimus_features(
609
+ filtered_coords, slide_path, attrs, num_workers, model=model_cache.optimus_model
610
+ )
611
 
612
  # Step 4: Run Aeon inference with pre-loaded model (if cancer subtype unknown)
613
  aeon_results = None
 
615
 
616
  # Check if cancer subtype is unknown
617
  if cancer_subtype in ["Unknown", None]:
618
+ logger.info(
619
+ "Running Aeon inference with PRE-LOADED model (cancer subtype unknown)"
620
+ )
621
  aeon_results = _run_aeon_inference_with_model(
622
  features,
623
  model_cache.aeon_model, # Use pre-loaded Aeon model
 
646
  return aeon_results, paladin_results
647
 
648
 
649
+ # Removed: analyze_slide_with_models merged into analyze_slide below
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
 
651
 
652
  def analyze_slide(
 
661
  num_workers=4,
662
  progress=gr.Progress(track_tqdm=True),
663
  request: gr.Request = None,
664
+ model_cache=None,
665
  ):
666
  """Analyze a whole slide image for cancer subtype and biomarker prediction.
667
 
668
+ This function works in two modes:
669
+ 1. **Single-slide mode** (model_cache=None): Loads models, analyzes one slide, cleans up
670
+ 2. **Batch mode** (model_cache provided): Uses pre-loaded models for efficiency
 
 
 
 
671
 
672
  Args:
673
  slide_path: Path to the whole slide image file
674
  seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
675
  site_type: Site type, either "Primary" or "Metastatic"
676
+ sex: Patient sex ("Unknown", "Male", "Female")
677
+ tissue_site: Tissue site name
678
  cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
679
  cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
680
  ihc_subtype: IHC subtype for breast cancer (optional)
681
  num_workers: Number of worker processes for feature extraction
682
  progress: Gradio progress tracker for UI updates
683
+ request: Gradio request object (for HF Spaces authentication)
684
+ model_cache: Optional ModelCache with pre-loaded models (for batch processing)
685
 
686
  Returns:
687
  tuple: (slide_mask, aeon_results, paladin_results)
 
740
  )
741
  logger.info("Slide mask drawn")
742
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
743
  # Convert sex and tissue_site to indices for Aeon model
744
  from mosaic.inference.data import encode_sex, encode_tissue_site
745
 
 
751
  if tissue_site is not None:
752
  tissue_site_idx = encode_tissue_site(tissue_site)
753
 
754
+ # Run inference pipeline - two modes based on model_cache
755
+ if model_cache is not None:
756
+ # Batch mode: use pre-loaded models
757
+ logger.info("Using pre-loaded models from ModelCache (batch mode)")
758
+ aeon_results, paladin_results = _run_inference_pipeline_with_models(
759
  coords,
760
  slide_path,
761
  attrs,
 
764
  tissue_site_idx,
765
  cancer_subtype,
766
  cancer_subtype_name_map,
767
+ model_cache,
768
  num_workers,
769
  progress,
770
  )
771
  else:
772
+ # Single-slide mode: load models on-demand
773
+ # Check if user is logged in for longer GPU duration (HF Spaces only)
774
+ is_logged_in = False
775
+ username = "anonymous"
776
+ if request is not None:
777
+ try:
778
+ # Check if user is logged in via JWT token in referer
779
+ # HF Spaces doesn't populate request.username but includes JWT in URL
780
+ if hasattr(request, "headers"):
781
+ referer = request.headers.get("referer", "")
782
+ if "__sign=" in referer:
783
+ # Extract and decode JWT token
784
+ import re
785
+ import json
786
+ import base64
787
+
788
+ match = re.search(r"__sign=([^&]+)", referer)
789
+ if match:
790
+ token = match.group(1)
791
+ try:
792
+ # JWT format: header.payload.signature
793
+ # We only need the payload (middle part)
794
+ parts = token.split(".")
795
+ if len(parts) == 3:
796
+ # Decode base64 payload (add padding if needed)
797
+ payload = parts[1]
798
+ payload += "=" * (4 - len(payload) % 4)
799
+ decoded = base64.urlsafe_b64decode(payload)
800
+ token_data = json.loads(decoded)
801
+
802
+ # Check if user is in token
803
+ if (
804
+ "onBehalfOf" in token_data
805
+ and "user" in token_data["onBehalfOf"]
806
+ ):
807
+ username = token_data["onBehalfOf"]["user"]
808
+ is_logged_in = True
809
+ logger.info(
810
+ f"Found user in JWT token: {username}"
811
+ )
812
+ except Exception as e:
813
+ logger.warning(f"Failed to decode JWT: {e}")
814
+
815
+ if IS_ZEROGPU:
816
+ logger.info(f"User: {username} | Logged in: {is_logged_in}")
817
+ except Exception as e:
818
+ logger.warning(f"Failed to detect user: {e}")
819
+ import traceback
820
+
821
+ logger.warning(traceback.format_exc())
822
+
823
+ if is_logged_in:
824
+ if IS_ZEROGPU:
825
+ logger.info("Using 300s GPU allocation (logged-in user)")
826
+ aeon_results, paladin_results = _run_inference_pipeline_pro(
827
+ coords,
828
+ slide_path,
829
+ attrs,
830
+ site_type,
831
+ sex_idx,
832
+ tissue_site_idx,
833
+ cancer_subtype,
834
+ cancer_subtype_name_map,
835
+ num_workers,
836
+ progress,
837
+ )
838
+ else:
839
+ if IS_ZEROGPU:
840
+ logger.info("Using 60s GPU allocation (anonymous user)")
841
+ aeon_results, paladin_results = _run_inference_pipeline_free(
842
+ coords,
843
+ slide_path,
844
+ attrs,
845
+ site_type,
846
+ sex_idx,
847
+ tissue_site_idx,
848
+ cancer_subtype,
849
+ cancer_subtype_name_map,
850
+ num_workers,
851
+ progress,
852
+ )
853
 
854
  return slide_mask, aeon_results, paladin_results
src/mosaic/batch_analysis.py DELETED
@@ -1,238 +0,0 @@
1
- """Batch processing coordinator for multi-slide analysis.
2
-
3
- This module provides optimized batch processing functionality that loads
4
- models once and reuses them across multiple slides, significantly reducing
5
- overhead compared to processing slides individually.
6
- """
7
-
8
- from typing import Dict, List, Optional, Tuple
9
- import pandas as pd
10
- import time
11
- from loguru import logger
12
-
13
- from mosaic.model_manager import load_all_models
14
- from mosaic.analysis import analyze_slide_with_models
15
-
16
-
17
- def analyze_slides_batch(
18
- slides: List[str],
19
- settings_df: pd.DataFrame,
20
- cancer_subtype_name_map: Dict[str, str],
21
- num_workers: int = 4,
22
- aggressive_memory_mgmt: Optional[bool] = None,
23
- progress=None,
24
- ) -> Tuple[List[Tuple], List[pd.DataFrame], List[pd.DataFrame]]:
25
- """Analyze multiple slides with models loaded once for batch processing.
26
-
27
- This function provides significant performance improvements over sequential
28
- processing by loading all models once at the start, processing all slides
29
- with the pre-loaded models, and cleaning up at the end.
30
-
31
- Performance Benefits:
32
- - ~90% reduction in model loading operations
33
- - 25-45% overall speedup depending on model loading overhead
34
- - Memory-efficient: same peak memory as single-slide processing
35
-
36
- Args:
37
- slides: List of slide file paths
38
- settings_df: DataFrame with columns matching SETTINGS_COLUMNS from ui/utils.py
39
- cancer_subtype_name_map: Dict mapping cancer subtype display names to OncoTree codes
40
- num_workers: Number of CPU workers for data loading (default: 4)
41
- aggressive_memory_mgmt: Memory management strategy:
42
- - None: Auto-detect based on GPU type (T4 = True, A100 = False)
43
- - True: T4-style aggressive cleanup (load/delete Paladin models per slide)
44
- - False: Cache Paladin models across slides (requires >40GB GPU memory)
45
- progress: Optional Gradio progress tracker
46
-
47
- Returns:
48
- Tuple of (all_slide_masks, all_aeon_results, all_paladin_results):
49
- - all_slide_masks: List of (slide_mask_image, slide_name) tuples
50
- - all_aeon_results: List of DataFrames with Aeon cancer subtype predictions
51
- - all_paladin_results: List of DataFrames with Paladin biomarker predictions
52
-
53
- Example:
54
- ```python
55
- slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
56
- settings_df = pd.DataFrame({
57
- "Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
58
- "Site Type": ["Primary", "Primary", "Metastatic"],
59
- "Sex": ["Male", "Female", "Unknown"],
60
- "Tissue Site": ["Lung", "Breast", "Unknown"],
61
- "Cancer Subtype": ["Unknown", "Unknown", "LUAD"],
62
- "IHC Subtype": ["", "HR+/HER2-", ""],
63
- "Segmentation Config": ["Biopsy", "Resection", "Biopsy"],
64
- })
65
-
66
- masks, aeon, paladin = analyze_slides_batch(
67
- slides, settings_df, cancer_subtype_name_map
68
- )
69
- ```
70
-
71
- Notes:
72
- - GPU memory requirements: ~9-15GB for typical batches
73
- - T4 GPUs (16GB): Uses aggressive memory management automatically
74
- - A100 GPUs (80GB): Can cache Paladin models for better performance
75
- - Maintains backward compatibility: single slides can still use analyze_slide()
76
- """
77
- if progress is None:
78
- progress = lambda frac, desc: None # No-op progress function
79
-
80
- num_slides = len(slides)
81
- batch_start_time = time.time()
82
-
83
- logger.info("=" * 80)
84
- logger.info(f"BATCH PROCESSING: Starting analysis of {num_slides} slides")
85
- logger.info("=" * 80)
86
-
87
- # Step 1: Load all models once
88
- progress(0.0, desc="Loading models for batch processing")
89
- model_load_start = time.time()
90
-
91
- try:
92
- model_cache = load_all_models(
93
- use_gpu=True,
94
- aggressive_memory_mgmt=aggressive_memory_mgmt,
95
- )
96
-
97
- model_load_time = time.time() - model_load_start
98
- logger.info(f"Model loading completed in {model_load_time:.2f}s")
99
- logger.info("")
100
-
101
- # Log memory strategy
102
- if model_cache.aggressive_memory_mgmt:
103
- logger.info(
104
- "Memory strategy: AGGRESSIVE (T4-style) - "
105
- "Paladin models loaded/freed per slide"
106
- )
107
- else:
108
- logger.info(
109
- "Using caching strategy (A100-style): "
110
- "Paladin models will be cached across slides"
111
- )
112
-
113
- except Exception as e:
114
- logger.error(f"Failed to load models: {e}")
115
- raise
116
-
117
- # Step 2: Process each slide with pre-loaded models
118
- all_slide_masks = []
119
- all_aeon_results = []
120
- all_paladin_results = []
121
- slide_times = []
122
-
123
- logger.info("=" * 80)
124
- logger.info("Processing slides with PRE-LOADED models (no model reloading!)")
125
- logger.info("=" * 80)
126
-
127
- try:
128
- for idx, (slide_path, (_, row)) in enumerate(zip(slides, settings_df.iterrows())):
129
- slide_name = slide_path.split("/")[-1] if "/" in slide_path else slide_path
130
-
131
- # Update progress
132
- progress_frac = (idx + 0.1) / num_slides
133
- progress(progress_frac, desc=f"Analyzing slide {idx + 1}/{num_slides}: {slide_name}")
134
-
135
- logger.info("")
136
- logger.info(f"[{idx + 1}/{num_slides}] Processing: {slide_name}")
137
- logger.info(f" Using pre-loaded models (no disk I/O for core models)")
138
- slide_start_time = time.time()
139
-
140
- try:
141
- # Use batch-optimized analysis with pre-loaded models
142
- slide_mask, aeon_results, paladin_results = analyze_slide_with_models(
143
- slide_path=slide_path,
144
- seg_config=row["Segmentation Config"],
145
- site_type=row["Site Type"],
146
- sex=row.get("Sex", "Unknown"),
147
- tissue_site=row.get("Tissue Site", "Unknown"),
148
- cancer_subtype=row["Cancer Subtype"],
149
- cancer_subtype_name_map=cancer_subtype_name_map,
150
- model_cache=model_cache,
151
- ihc_subtype=row.get("IHC Subtype", ""),
152
- num_workers=num_workers,
153
- progress=progress,
154
- )
155
-
156
- slide_time = time.time() - slide_start_time
157
- slide_times.append(slide_time)
158
-
159
- # Collect results
160
- if slide_mask is not None:
161
- all_slide_masks.append((slide_mask, slide_name))
162
-
163
- if aeon_results is not None:
164
- # Add slide name to results for multi-slide batches
165
- if num_slides > 1:
166
- aeon_results.columns = [f"{slide_name}"]
167
- all_aeon_results.append(aeon_results)
168
-
169
- if paladin_results is not None:
170
- # Add slide name column
171
- paladin_results.insert(
172
- 0, "Slide", pd.Series([slide_name] * len(paladin_results))
173
- )
174
- all_paladin_results.append(paladin_results)
175
-
176
- logger.info(f"[{idx + 1}/{num_slides}] ✓ Completed in {slide_time:.2f}s")
177
-
178
- except Exception as e:
179
- slide_time = time.time() - slide_start_time
180
- slide_times.append(slide_time)
181
- logger.exception(f"[{idx + 1}/{num_slides}] ✗ Failed after {slide_time:.2f}s: {e}")
182
- # Continue with next slide instead of failing entire batch
183
- continue
184
-
185
- finally:
186
- # Step 3: Always cleanup models (even if there were errors)
187
- logger.info("")
188
- logger.info("=" * 80)
189
- logger.info("Cleaning up models...")
190
- progress(0.99, desc="Cleaning up models")
191
- model_cache.cleanup()
192
- logger.info("✓ Model cleanup complete")
193
-
194
- # Calculate batch statistics
195
- batch_total_time = time.time() - batch_start_time
196
- num_successful = len(all_slide_masks)
197
- num_failed = num_slides - num_successful
198
-
199
- # Log comprehensive summary
200
- logger.info("=" * 80)
201
- logger.info("BATCH PROCESSING SUMMARY")
202
- logger.info("=" * 80)
203
- logger.info(f"Total slides: {num_slides}")
204
- logger.info(f"Successfully processed: {num_successful}")
205
- logger.info(f"Failed: {num_failed}")
206
- logger.info("")
207
- logger.info(f"Model loading time: {model_load_time:.2f}s (done ONCE for entire batch)")
208
- logger.info(f"Total batch time: {batch_total_time:.2f}s")
209
-
210
- if slide_times:
211
- avg_slide_time = sum(slide_times) / len(slide_times)
212
- min_slide_time = min(slide_times)
213
- max_slide_time = max(slide_times)
214
- total_slide_time = sum(slide_times)
215
-
216
- logger.info("")
217
- logger.info("Per-slide processing times:")
218
- logger.info(f" Average: {avg_slide_time:.2f}s")
219
- logger.info(f" Min: {min_slide_time:.2f}s")
220
- logger.info(f" Max: {max_slide_time:.2f}s")
221
- logger.info(f" Total: {total_slide_time:.2f}s")
222
-
223
- # Calculate efficiency
224
- overhead_time = batch_total_time - total_slide_time
225
- logger.info("")
226
- logger.info(f"Batch overhead: {overhead_time:.2f}s ({overhead_time/batch_total_time*100:.1f}%)")
227
- logger.info(f"Slide processing: {total_slide_time:.2f}s ({total_slide_time/batch_total_time*100:.1f}%)")
228
-
229
- logger.info("")
230
- logger.info("✓ Batch processing optimization benefits:")
231
- logger.info(" - Models loaded ONCE (not once per slide)")
232
- logger.info(" - Reduced disk I/O for model loading")
233
- logger.info(f" - Processed {num_slides} slides with shared model cache")
234
- logger.info("=" * 80)
235
-
236
- progress(1.0, desc=f"Batch analysis complete ({num_successful}/{num_slides} successful)")
237
-
238
- return all_slide_masks, all_aeon_results, all_paladin_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/mosaic/gradio_app.py CHANGED
@@ -25,15 +25,15 @@ from mosaic.ui.utils import (
25
  SEX_OPTIONS,
26
  )
27
  from mosaic.analysis import analyze_slide
28
- from mosaic.batch_analysis import analyze_slides_batch
29
 
30
 
31
  def download_and_process_models():
32
- """Download models from HuggingFace and initialize cancer subtype mappings.
33
 
34
- Downloads the Paladin and Aeon models from the PDM-Group HuggingFace repository
35
- to the HuggingFace cache directory and creates mappings between cancer subtype
36
- names and OncoTree codes.
37
 
38
  Returns:
39
  tuple: (cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
@@ -41,47 +41,69 @@ def download_and_process_models():
41
  - reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
42
  - cancer_subtypes: List of all supported cancer subtype codes
43
  """
44
- # Download to HF cache directory (not local_dir)
45
- # This returns the path to the cached snapshot
46
- logger.info("Downloading models from HuggingFace Hub to cache directory...")
 
 
47
  cache_dir = snapshot_download(
48
  repo_id="PDM-Group/paladin-aeon-models",
 
 
 
 
 
 
 
49
  # No local_dir - use HF cache
50
  )
51
- logger.info(f"Models downloaded to: {cache_dir}")
52
 
53
  # Set the data directory for other modules to use
54
  set_data_directory(cache_dir)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  model_map = pd.read_csv(
57
  Path(cache_dir) / "paladin_model_map.csv",
58
  )
59
  cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
60
  cancer_subtype_name_map = {"Unknown": "UNK"}
61
- cancer_subtype_name_map.update({
62
- f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes
63
- })
64
  reversed_cancer_subtype_name_map = {
65
  value: key for key, value in cancer_subtype_name_map.items()
66
  }
67
-
68
- # Set the global maps in the UI module
69
- set_cancer_subtype_maps(cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
70
-
71
- return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
72
-
73
 
 
 
 
 
74
 
 
75
 
76
 
77
  def main():
78
  """Main entry point for the Mosaic application.
79
-
80
  Parses command-line arguments and routes to the appropriate mode:
81
  - Single slide processing (--slide-path)
82
  - Batch processing (--slide-csv)
83
  - Web interface (default, no slide arguments)
84
-
85
  Command-line arguments control analysis parameters like site type,
86
  cancer subtype, segmentation configuration, and output directory.
87
  """
@@ -160,7 +182,9 @@ def main():
160
  logger.add("debug.log", level="DEBUG")
161
  logger.debug("Debug logging enabled")
162
 
163
- cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes = download_and_process_models()
 
 
164
 
165
  if args.slide_path and not args.slide_csv:
166
  # Single slide processing mode
@@ -180,7 +204,12 @@ def main():
180
  ],
181
  columns=SETTINGS_COLUMNS,
182
  )
183
- settings_df = validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map)
 
 
 
 
 
184
  slide_mask, aeon_results, paladin_results = analyze_slide(
185
  args.slide_path,
186
  args.segmentation_config,
@@ -218,24 +247,62 @@ def main():
218
  # Load and validate settings
219
  settings_df = load_settings(args.slide_csv)
220
  settings_df = validate_settings(
221
- settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
 
 
 
222
  )
223
 
224
  # Extract slide paths
225
  slides = settings_df["Slide"].tolist()
226
 
227
- logger.info(f"Processing {len(slides)} slides in batch mode with models loaded once")
228
-
229
- # Use batch processing (models loaded once)
230
- all_slide_masks, all_aeon_results, all_paladin_results = analyze_slides_batch(
231
- slides=slides,
232
- settings_df=settings_df,
233
- cancer_subtype_name_map=cancer_subtype_name_map,
234
- num_workers=args.num_workers,
235
- aggressive_memory_mgmt=None, # Auto-detect GPU type
236
- progress=None,
237
  )
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  # Save individual slide results
240
  for idx, (slide_mask, slide_name) in enumerate(all_slide_masks):
241
  mask_path = output_dir / f"{slide_name}_mask.png"
@@ -252,7 +319,9 @@ def main():
252
  if all_paladin_results:
253
  combined_paladin = pd.concat(all_paladin_results, ignore_index=True)
254
  for slide_name in combined_paladin["Slide"].unique():
255
- slide_paladin = combined_paladin[combined_paladin["Slide"] == slide_name]
 
 
256
  paladin_output_path = output_dir / f"{slide_name}_paladin_results.csv"
257
  slide_paladin.to_csv(paladin_output_path, index=False)
258
  logger.info(f"Saved Paladin results to {paladin_output_path}")
 
25
  SEX_OPTIONS,
26
  )
27
  from mosaic.analysis import analyze_slide
28
+ from mosaic.model_manager import load_all_models
29
 
30
 
31
  def download_and_process_models():
32
+ """Download essential models from HuggingFace and initialize cancer subtype mappings.
33
 
34
+ Downloads only the core models (CTransPath, Optimus, Aeon, marker classifier) and
35
+ metadata files from the PDM-Group HuggingFace repository. Paladin models are
36
+ downloaded on-demand when needed for inference.
37
 
38
  Returns:
39
  tuple: (cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
 
41
  - reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
42
  - cancer_subtypes: List of all supported cancer subtype codes
43
  """
44
+ # Download only essential files to HF cache directory
45
+ # Paladin models will be downloaded on-demand
46
+ logger.info(
47
+ "Downloading essential models from HuggingFace Hub (Paladin models loaded on-demand)..."
48
+ )
49
  cache_dir = snapshot_download(
50
  repo_id="PDM-Group/paladin-aeon-models",
51
+ allow_patterns=[
52
+ "*.csv", # Model maps and metadata
53
+ "ctranspath.pth", # CTransPath model
54
+ "aeon_model.pkl", # Aeon model
55
+ "marker_classifier.pkl", # Marker classifier
56
+ "tissue_site_*", # Tissue site mappings
57
+ ],
58
  # No local_dir - use HF cache
59
  )
60
+ logger.info(f"Essential models downloaded to: {cache_dir}")
61
 
62
  # Set the data directory for other modules to use
63
  set_data_directory(cache_dir)
64
 
65
+ # Pre-download Optimus model from bioptimus/H-optimus-0
66
+ # This ensures it's cached at startup since it's needed for every slide
67
+ logger.info("Pre-downloading Optimus model from bioptimus/H-optimus-0...")
68
+ from mussel.models import ModelType, get_model_factory
69
+
70
+ optimus_factory = get_model_factory(ModelType.OPTIMUS)
71
+ # This will trigger the download and cache the model
72
+ _ = optimus_factory.get_model(
73
+ model_path="hf-hub:bioptimus/H-optimus-0",
74
+ use_gpu=False, # Just download, don't load to GPU yet
75
+ gpu_device_id=None,
76
+ )
77
+ logger.info("✓ Optimus model cached")
78
+
79
  model_map = pd.read_csv(
80
  Path(cache_dir) / "paladin_model_map.csv",
81
  )
82
  cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
83
  cancer_subtype_name_map = {"Unknown": "UNK"}
84
+ cancer_subtype_name_map.update(
85
+ {f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes}
86
+ )
87
  reversed_cancer_subtype_name_map = {
88
  value: key for key, value in cancer_subtype_name_map.items()
89
  }
 
 
 
 
 
 
90
 
91
+ # Set the global maps in the UI module
92
+ set_cancer_subtype_maps(
93
+ cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
94
+ )
95
 
96
+ return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
97
 
98
 
99
  def main():
100
  """Main entry point for the Mosaic application.
101
+
102
  Parses command-line arguments and routes to the appropriate mode:
103
  - Single slide processing (--slide-path)
104
  - Batch processing (--slide-csv)
105
  - Web interface (default, no slide arguments)
106
+
107
  Command-line arguments control analysis parameters like site type,
108
  cancer subtype, segmentation configuration, and output directory.
109
  """
 
182
  logger.add("debug.log", level="DEBUG")
183
  logger.debug("Debug logging enabled")
184
 
185
+ cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes = (
186
+ download_and_process_models()
187
+ )
188
 
189
  if args.slide_path and not args.slide_csv:
190
  # Single slide processing mode
 
204
  ],
205
  columns=SETTINGS_COLUMNS,
206
  )
207
+ settings_df = validate_settings(
208
+ settings_df,
209
+ cancer_subtype_name_map,
210
+ cancer_subtypes,
211
+ reversed_cancer_subtype_name_map,
212
+ )
213
  slide_mask, aeon_results, paladin_results = analyze_slide(
214
  args.slide_path,
215
  args.segmentation_config,
 
247
  # Load and validate settings
248
  settings_df = load_settings(args.slide_csv)
249
  settings_df = validate_settings(
250
+ settings_df,
251
+ cancer_subtype_name_map,
252
+ cancer_subtypes,
253
+ reversed_cancer_subtype_name_map,
254
  )
255
 
256
  # Extract slide paths
257
  slides = settings_df["Slide"].tolist()
258
 
259
+ logger.info(
260
+ f"Processing {len(slides)} slides in batch mode with models loaded once"
 
 
 
 
 
 
 
 
261
  )
262
 
263
+ # Load models once for batch processing
264
+ model_cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
265
+
266
+ all_slide_masks = []
267
+ all_aeon_results = []
268
+ all_paladin_results = []
269
+
270
+ try:
271
+ # Process each slide with pre-loaded models
272
+ for idx, slide_path in enumerate(slides):
273
+ row = settings_df.iloc[idx]
274
+ slide_name = row["Slide"]
275
+
276
+ logger.info(f"[{idx + 1}/{len(slides)}] Processing: {slide_name}")
277
+
278
+ slide_mask, aeon_results, paladin_results = analyze_slide(
279
+ slide_path=slide_path,
280
+ seg_config=row["Segmentation Config"],
281
+ site_type=row["Site Type"],
282
+ sex=row.get("Sex", "Unknown"),
283
+ tissue_site=row.get("Tissue Site", "Unknown"),
284
+ cancer_subtype=row["Cancer Subtype"],
285
+ cancer_subtype_name_map=cancer_subtype_name_map,
286
+ ihc_subtype=row.get("IHC Subtype", ""),
287
+ num_workers=args.num_workers,
288
+ progress=lambda frac, desc: None, # No-op progress for CLI
289
+ request=None,
290
+ model_cache=model_cache,
291
+ )
292
+
293
+ if slide_mask is not None:
294
+ all_slide_masks.append((slide_mask, slide_name))
295
+ if aeon_results is not None:
296
+ all_aeon_results.append(aeon_results)
297
+ if paladin_results is not None:
298
+ paladin_results.insert(
299
+ 0, "Slide", pd.Series([slide_name] * len(paladin_results))
300
+ )
301
+ all_paladin_results.append(paladin_results)
302
+ finally:
303
+ logger.info("Cleaning up model cache")
304
+ model_cache.cleanup()
305
+
306
  # Save individual slide results
307
  for idx, (slide_mask, slide_name) in enumerate(all_slide_masks):
308
  mask_path = output_dir / f"{slide_name}_mask.png"
 
319
  if all_paladin_results:
320
  combined_paladin = pd.concat(all_paladin_results, ignore_index=True)
321
  for slide_name in combined_paladin["Slide"].unique():
322
+ slide_paladin = combined_paladin[
323
+ combined_paladin["Slide"] == slide_name
324
+ ]
325
  paladin_output_path = output_dir / f"{slide_name}_paladin_results.csv"
326
  slide_paladin.to_csv(paladin_output_path, index=False)
327
  logger.info(f"Saved Paladin results to {paladin_output_path}")
src/mosaic/inference/aeon.py CHANGED
@@ -80,8 +80,12 @@ def run_with_model(
80
  target_dict = json.loads(target_dict_str)
81
 
82
  histologies = target_dict["histologies"]
83
- INT_TO_CANCER_TYPE_MAP_LOCAL = {i: histology for i, histology in enumerate(histologies)}
84
- CANCER_TYPE_TO_INT_MAP_LOCAL = {v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()}
 
 
 
 
85
 
86
  # Calculate col_indices_to_drop using local mapping
87
  col_indices_to_drop_local = [
@@ -100,7 +104,9 @@ def run_with_model(
100
  tissue_site_idx=tissue_site_idx,
101
  n_max_tiles=20000,
102
  )
103
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
 
 
104
 
105
  results = []
106
  batch = next(iter(dataloader))
@@ -140,8 +146,14 @@ def run_with_model(
140
 
141
 
142
  def run(
143
- features, model_path, metastatic=False, batch_size=8, num_workers=8, use_cpu=False,
144
- sex=None, tissue_site_idx=None
 
 
 
 
 
 
145
  ):
146
  """Run Aeon model inference for cancer subtype prediction.
147
 
@@ -176,12 +188,20 @@ def run(
176
  target_dict_str = f.read().strip().replace("'", '"')
177
  target_dict = json.loads(target_dict_str)
178
 
179
- histologies = target_dict['histologies']
180
- INT_TO_CANCER_TYPE_MAP_LOCAL = {i: histology for i, histology in enumerate(histologies)}
181
- CANCER_TYPE_TO_INT_MAP_LOCAL = {v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()}
 
 
 
 
182
 
183
  # Calculate col_indices_to_drop using local mapping
184
- col_indices_to_drop_local = [CANCER_TYPE_TO_INT_MAP_LOCAL[x] for x in CANCER_TYPES_TO_DROP if x in CANCER_TYPE_TO_INT_MAP_LOCAL]
 
 
 
 
185
 
186
  site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
187
 
@@ -306,7 +326,9 @@ def main():
306
  tissue_site_idx = None
307
  if opt.tissue_site:
308
  tissue_site_idx = encode_tissue_site(opt.tissue_site)
309
- logger.info(f"Using tissue site: {opt.tissue_site} (encoded as {tissue_site_idx})")
 
 
310
 
311
  results_df, part_embedding = run(
312
  features=features,
 
80
  target_dict = json.loads(target_dict_str)
81
 
82
  histologies = target_dict["histologies"]
83
+ INT_TO_CANCER_TYPE_MAP_LOCAL = {
84
+ i: histology for i, histology in enumerate(histologies)
85
+ }
86
+ CANCER_TYPE_TO_INT_MAP_LOCAL = {
87
+ v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()
88
+ }
89
 
90
  # Calculate col_indices_to_drop using local mapping
91
  col_indices_to_drop_local = [
 
104
  tissue_site_idx=tissue_site_idx,
105
  n_max_tiles=20000,
106
  )
107
+ dataloader = DataLoader(
108
+ dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers
109
+ )
110
 
111
  results = []
112
  batch = next(iter(dataloader))
 
146
 
147
 
148
  def run(
149
+ features,
150
+ model_path,
151
+ metastatic=False,
152
+ batch_size=8,
153
+ num_workers=8,
154
+ use_cpu=False,
155
+ sex=None,
156
+ tissue_site_idx=None,
157
  ):
158
  """Run Aeon model inference for cancer subtype prediction.
159
 
 
188
  target_dict_str = f.read().strip().replace("'", '"')
189
  target_dict = json.loads(target_dict_str)
190
 
191
+ histologies = target_dict["histologies"]
192
+ INT_TO_CANCER_TYPE_MAP_LOCAL = {
193
+ i: histology for i, histology in enumerate(histologies)
194
+ }
195
+ CANCER_TYPE_TO_INT_MAP_LOCAL = {
196
+ v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()
197
+ }
198
 
199
  # Calculate col_indices_to_drop using local mapping
200
+ col_indices_to_drop_local = [
201
+ CANCER_TYPE_TO_INT_MAP_LOCAL[x]
202
+ for x in CANCER_TYPES_TO_DROP
203
+ if x in CANCER_TYPE_TO_INT_MAP_LOCAL
204
+ ]
205
 
206
  site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
207
 
 
326
  tissue_site_idx = None
327
  if opt.tissue_site:
328
  tissue_site_idx = encode_tissue_site(opt.tissue_site)
329
+ logger.info(
330
+ f"Using tissue site: {opt.tissue_site} (encoded as {tissue_site_idx})"
331
+ )
332
 
333
  results_df, part_embedding = run(
334
  features=features,
src/mosaic/inference/data.py CHANGED
@@ -212,10 +212,10 @@ DEFAULT_TISSUE_SITE_IDX = 8
212
 
213
  def get_tissue_site_map():
214
  """Load tissue site name → index mapping from CSV.
215
-
216
  Returns:
217
  dict: Mapping of tissue site names to indices (0-56)
218
-
219
  Raises:
220
  FileNotFoundError: If the tissue site CSV file is not found
221
  """
@@ -232,17 +232,17 @@ def get_tissue_site_map():
232
  f"Tissue site mapping file not found at {csv_path}. "
233
  f"Please ensure the data directory contains 'tissue_site_original_to_idx.csv'."
234
  ) from e
235
-
236
  _TISSUE_SITE_MAP = {}
237
  for _, row in df.iterrows():
238
- _TISSUE_SITE_MAP[row['TISSUE_SITE']] = int(row['idx'])
239
-
240
  return _TISSUE_SITE_MAP
241
 
242
 
243
  def get_tissue_site_options():
244
  """Get sorted unique tissue site names for UI dropdowns.
245
-
246
  Returns:
247
  list: Sorted list of unique tissue site names
248
  """
@@ -258,7 +258,7 @@ def get_sex_map():
258
 
259
  Returns:
260
  dict: Mapping of sex values to indices (0-2)
261
-
262
  Raises:
263
  FileNotFoundError: If the sex mapping CSV file is not found
264
  """
@@ -278,7 +278,7 @@ def get_sex_map():
278
 
279
  _SEX_MAP = {}
280
  for _, row in df.iterrows():
281
- _SEX_MAP[row['SEX']] = int(row['idx'])
282
 
283
  return _SEX_MAP
284
 
@@ -299,10 +299,10 @@ def encode_sex(sex):
299
 
300
  def encode_tissue_site(site_name):
301
  """Convert tissue site name to index (0-56).
302
-
303
  Args:
304
  site_name: Tissue site name from CSV
305
-
306
  Returns:
307
  int: Tissue site index, defaults to DEFAULT_TISSUE_SITE_IDX ("Not Applicable")
308
  """
@@ -312,11 +312,11 @@ def encode_tissue_site(site_name):
312
 
313
  def tissue_site_to_one_hot(site_idx, num_classes=57):
314
  """Convert tissue site index to one-hot vector.
315
-
316
  Args:
317
  site_idx: Index value (0-56 for tissue site, 0-2 for sex)
318
  num_classes: Number of classes (57 for tissue site, 3 for sex)
319
-
320
  Returns:
321
  list: One-hot encoded vector
322
  """
@@ -395,22 +395,18 @@ class TileFeatureTensorDataset(Dataset):
395
  Returns:
396
  dict: the item
397
  """
398
- result = {
399
- "site": self.site_type.value,
400
- "tile_tensor": self.features
401
- }
402
-
403
  # Add sex and tissue_site if provided (for Aeon)
404
  if self.sex is not None:
405
  result["SEX"] = torch.tensor(
406
- tissue_site_to_one_hot(self.sex, num_classes=3),
407
- dtype=torch.float32
408
  )
409
-
410
  if self.tissue_site_idx is not None:
411
  result["TISSUE_SITE"] = torch.tensor(
412
  tissue_site_to_one_hot(self.tissue_site_idx, num_classes=57),
413
- dtype=torch.float32
414
  )
415
-
416
  return result
 
212
 
213
  def get_tissue_site_map():
214
  """Load tissue site name → index mapping from CSV.
215
+
216
  Returns:
217
  dict: Mapping of tissue site names to indices (0-56)
218
+
219
  Raises:
220
  FileNotFoundError: If the tissue site CSV file is not found
221
  """
 
232
  f"Tissue site mapping file not found at {csv_path}. "
233
  f"Please ensure the data directory contains 'tissue_site_original_to_idx.csv'."
234
  ) from e
235
+
236
  _TISSUE_SITE_MAP = {}
237
  for _, row in df.iterrows():
238
+ _TISSUE_SITE_MAP[row["TISSUE_SITE"]] = int(row["idx"])
239
+
240
  return _TISSUE_SITE_MAP
241
 
242
 
243
  def get_tissue_site_options():
244
  """Get sorted unique tissue site names for UI dropdowns.
245
+
246
  Returns:
247
  list: Sorted list of unique tissue site names
248
  """
 
258
 
259
  Returns:
260
  dict: Mapping of sex values to indices (0-2)
261
+
262
  Raises:
263
  FileNotFoundError: If the sex mapping CSV file is not found
264
  """
 
278
 
279
  _SEX_MAP = {}
280
  for _, row in df.iterrows():
281
+ _SEX_MAP[row["SEX"]] = int(row["idx"])
282
 
283
  return _SEX_MAP
284
 
 
299
 
300
  def encode_tissue_site(site_name):
301
  """Convert tissue site name to index (0-56).
302
+
303
  Args:
304
  site_name: Tissue site name from CSV
305
+
306
  Returns:
307
  int: Tissue site index, defaults to DEFAULT_TISSUE_SITE_IDX ("Not Applicable")
308
  """
 
312
 
313
  def tissue_site_to_one_hot(site_idx, num_classes=57):
314
  """Convert tissue site index to one-hot vector.
315
+
316
  Args:
317
  site_idx: Index value (0-56 for tissue site, 0-2 for sex)
318
  num_classes: Number of classes (57 for tissue site, 3 for sex)
319
+
320
  Returns:
321
  list: One-hot encoded vector
322
  """
 
395
  Returns:
396
  dict: the item
397
  """
398
+ result = {"site": self.site_type.value, "tile_tensor": self.features}
399
+
 
 
 
400
  # Add sex and tissue_site if provided (for Aeon)
401
  if self.sex is not None:
402
  result["SEX"] = torch.tensor(
403
+ tissue_site_to_one_hot(self.sex, num_classes=3), dtype=torch.float32
 
404
  )
405
+
406
  if self.tissue_site_idx is not None:
407
  result["TISSUE_SITE"] = torch.tensor(
408
  tissue_site_to_one_hot(self.tissue_site_idx, num_classes=57),
409
+ dtype=torch.float32,
410
  )
411
+
412
  return result
src/mosaic/inference/paladin.py CHANGED
@@ -38,10 +38,10 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
38
 
39
  A dict is returned, mapping each cancer_subtype to a table mapping a
40
  target to the pathname for the model that predicts it.
41
-
42
  Args:
43
  model_map_path: Path to the CSV file containing the model map
44
-
45
  Returns:
46
  Dictionary mapping cancer subtypes to their target-specific models
47
  """
@@ -58,10 +58,10 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
58
 
59
  def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
60
  """Load Aeon output table with cancer subtypes and confidence values.
61
-
62
  Args:
63
  df: DataFrame with columns 'Cancer Subtype' and 'Confidence'
64
-
65
  Returns:
66
  Dictionary mapping cancer subtypes to their confidence scores
67
  """
@@ -75,11 +75,11 @@ def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
75
 
76
  def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
77
  """Select the top k cancer subtypes based on Aeon confidence scores.
78
-
79
  Args:
80
  aeon_scores: Dictionary mapping cancer subtypes to confidence scores
81
  k: Number of top subtypes to select (default: 1)
82
-
83
  Returns:
84
  List of cancer subtype codes sorted by confidence (highest first)
85
  """
@@ -91,11 +91,11 @@ def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
91
 
92
  def select_models(cancer_subtypes: list[str], model_map: dict[Any, Any]) -> list[Any]:
93
  """Select Paladin models for the given cancer subtypes.
94
-
95
  Args:
96
  cancer_subtypes: List of cancer subtype codes
97
  model_map: Dictionary mapping cancer subtypes to their models
98
-
99
  Returns:
100
  List of tuples (cancer_subtype, target, model_path)
101
  """
@@ -188,13 +188,13 @@ def run_model(device, dataset, model_path: str, num_workers, batch_size) -> floa
188
 
189
  def logits_to_point_estimates(logits):
190
  """Convert model logits to point estimates for beta-binomial distribution.
191
-
192
  The logits tensor contains alpha and beta parameters interleaved.
193
  This function computes the mean of the beta-binomial distribution: alpha/(alpha+beta).
194
-
195
  Args:
196
  logits: Tensor of shape (batch_size, 2*(n_tasks)) with alpha/beta parameters
197
-
198
  Returns:
199
  Tensor of shape (batch_size, n_tasks) with point estimates
200
  """
@@ -215,10 +215,10 @@ def run(
215
  use_cpu: bool = False,
216
  ):
217
  """Run Paladin inference for biomarker prediction on a single slide.
218
-
219
  Uses either Aeon predictions or user-provided cancer subtype codes to select
220
  the appropriate Paladin models for biomarker prediction.
221
-
222
  Args:
223
  features: NumPy array of tile features extracted from the WSI
224
  aeon_results: DataFrame with Aeon predictions (Cancer Subtype, Confidence)
@@ -229,10 +229,10 @@ def run(
229
  batch_size: Batch size for inference
230
  num_workers: Number of workers for data loading
231
  use_cpu: Force CPU usage instead of GPU
232
-
233
  Returns:
234
  DataFrame with columns: Cancer Subtype, Target, Score
235
-
236
  Note:
237
  Either aeon_results or cancer_subtype_codes must be provided, but not both.
238
  Either model_map_path or model_path must be provided, but not both.
 
38
 
39
  A dict is returned, mapping each cancer_subtype to a table mapping a
40
  target to the pathname for the model that predicts it.
41
+
42
  Args:
43
  model_map_path: Path to the CSV file containing the model map
44
+
45
  Returns:
46
  Dictionary mapping cancer subtypes to their target-specific models
47
  """
 
58
 
59
  def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
60
  """Load Aeon output table with cancer subtypes and confidence values.
61
+
62
  Args:
63
  df: DataFrame with columns 'Cancer Subtype' and 'Confidence'
64
+
65
  Returns:
66
  Dictionary mapping cancer subtypes to their confidence scores
67
  """
 
75
 
76
  def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
77
  """Select the top k cancer subtypes based on Aeon confidence scores.
78
+
79
  Args:
80
  aeon_scores: Dictionary mapping cancer subtypes to confidence scores
81
  k: Number of top subtypes to select (default: 1)
82
+
83
  Returns:
84
  List of cancer subtype codes sorted by confidence (highest first)
85
  """
 
91
 
92
  def select_models(cancer_subtypes: list[str], model_map: dict[Any, Any]) -> list[Any]:
93
  """Select Paladin models for the given cancer subtypes.
94
+
95
  Args:
96
  cancer_subtypes: List of cancer subtype codes
97
  model_map: Dictionary mapping cancer subtypes to their models
98
+
99
  Returns:
100
  List of tuples (cancer_subtype, target, model_path)
101
  """
 
188
 
189
  def logits_to_point_estimates(logits):
190
  """Convert model logits to point estimates for beta-binomial distribution.
191
+
192
  The logits tensor contains alpha and beta parameters interleaved.
193
  This function computes the mean of the beta-binomial distribution: alpha/(alpha+beta).
194
+
195
  Args:
196
  logits: Tensor of shape (batch_size, 2*(n_tasks)) with alpha/beta parameters
197
+
198
  Returns:
199
  Tensor of shape (batch_size, n_tasks) with point estimates
200
  """
 
215
  use_cpu: bool = False,
216
  ):
217
  """Run Paladin inference for biomarker prediction on a single slide.
218
+
219
  Uses either Aeon predictions or user-provided cancer subtype codes to select
220
  the appropriate Paladin models for biomarker prediction.
221
+
222
  Args:
223
  features: NumPy array of tile features extracted from the WSI
224
  aeon_results: DataFrame with Aeon predictions (Cancer Subtype, Confidence)
 
229
  batch_size: Batch size for inference
230
  num_workers: Number of workers for data loading
231
  use_cpu: Force CPU usage instead of GPU
232
+
233
  Returns:
234
  DataFrame with columns: Cancer Subtype, Target, Score
235
+
236
  Note:
237
  Either aeon_results or cancer_subtype_codes must be provided, but not both.
238
  Either model_map_path or model_path must be provided, but not both.
src/mosaic/model_manager.py CHANGED
@@ -13,6 +13,7 @@ import torch
13
  from loguru import logger
14
 
15
  from mosaic.data_directory import get_data_directory
 
16
 
17
 
18
  class ModelCache:
@@ -50,7 +51,9 @@ class ModelCache:
50
  self.paladin_models: Dict[tuple, torch.nn.Module] = {}
51
  self.is_t4_gpu = is_t4_gpu
52
  self.aggressive_memory_mgmt = aggressive_memory_mgmt
53
- self.device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
 
54
 
55
  def cleanup_paladin(self):
56
  """Aggressively free all Paladin models from memory.
@@ -78,15 +81,18 @@ class ModelCache:
78
  self.cleanup_paladin()
79
 
80
  # Clean up core models
81
- del self.ctranspath_model
82
- del self.optimus_model
83
- del self.marker_classifier
84
- del self.aeon_model
85
-
86
- self.ctranspath_model = None
87
- self.optimus_model = None
88
- self.marker_classifier = None
89
- self.aeon_model = None
 
 
 
90
 
91
  # Force garbage collection and GPU cache clearing
92
  gc.collect()
@@ -147,7 +153,9 @@ def load_all_models(
147
  if is_t4_gpu:
148
  logger.info(" → Paladin models will be loaded and freed per slide")
149
  else:
150
- logger.info(" → Paladin models will be cached and reused across slides")
 
 
151
  elif use_gpu and not torch.cuda.is_available():
152
  logger.warning("GPU requested but CUDA not available, falling back to CPU")
153
  use_gpu = False
@@ -165,24 +173,37 @@ def load_all_models(
165
  if not ctranspath_path.exists():
166
  raise FileNotFoundError(f"CTransPath model not found at {ctranspath_path}")
167
 
168
- # Note: CTransPath loading is handled by mussel, so we just store the path for now
169
- # We'll integrate with mussel's model factory in the feature extraction wrappers
170
- ctranspath_model = ctranspath_path
 
 
171
 
172
- # Load Optimus model
173
- logger.info("Loading Optimus model...")
174
- optimus_path = data_dir / "optimus.pkl"
175
- if not optimus_path.exists():
176
- raise FileNotFoundError(f"Optimus model not found at {optimus_path}")
177
 
178
- # Note: Same as CTransPath, Optimus loading is handled by mussel
179
- optimus_model = optimus_path
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Load Marker Classifier
182
  logger.info("Loading Marker Classifier...")
183
  marker_classifier_path = data_dir / "marker_classifier.pkl"
184
  if not marker_classifier_path.exists():
185
- raise FileNotFoundError(f"Marker classifier not found at {marker_classifier_path}")
 
 
186
 
187
  with open(marker_classifier_path, "rb") as f:
188
  marker_classifier = pickle.load(f) # nosec
@@ -238,12 +259,14 @@ def load_paladin_model_for_inference(
238
  cache: ModelCache,
239
  model_path: Path,
240
  ) -> torch.nn.Module:
241
- """Load a single Paladin model for inference.
242
 
243
  Implements adaptive loading strategy:
244
  - T4 GPU (aggressive mode): Load model fresh, caller must delete after use
245
  - A100 GPU (caching mode): Check cache, load if needed, return cached model
246
 
 
 
247
  Args:
248
  cache: ModelCache instance managing loaded models
249
  model_path: Path to the Paladin model file
@@ -255,6 +278,8 @@ def load_paladin_model_for_inference(
255
  On T4 GPUs, caller MUST delete the model and call torch.cuda.empty_cache()
256
  after inference to avoid OOM errors.
257
  """
 
 
258
  model_key = str(model_path)
259
 
260
  # Check cache first (only used in non-aggressive mode)
@@ -262,11 +287,32 @@ def load_paladin_model_for_inference(
262
  logger.info(f" ✓ Using CACHED Paladin model: {model_path.name} (no disk I/O!)")
263
  return cache.paladin_models[model_key]
264
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  # Load model from disk
266
  if cache.aggressive_memory_mgmt:
267
- logger.info(f" → Loading Paladin model: {model_path.name} (will free after use)")
 
 
268
  else:
269
- logger.info(f" → Loading Paladin model: {model_path.name} (will cache for reuse)")
 
 
270
 
271
  with open(model_path, "rb") as f:
272
  model = pickle.load(f) # nosec
 
13
  from loguru import logger
14
 
15
  from mosaic.data_directory import get_data_directory
16
+ from mussel.models import ModelType, get_model_factory
17
 
18
 
19
  class ModelCache:
 
51
  self.paladin_models: Dict[tuple, torch.nn.Module] = {}
52
  self.is_t4_gpu = is_t4_gpu
53
  self.aggressive_memory_mgmt = aggressive_memory_mgmt
54
+ self.device = device or torch.device(
55
+ "cuda" if torch.cuda.is_available() else "cpu"
56
+ )
57
 
58
  def cleanup_paladin(self):
59
  """Aggressively free all Paladin models from memory.
 
81
  self.cleanup_paladin()
82
 
83
  # Clean up core models
84
+ if self.ctranspath_model is not None:
85
+ del self.ctranspath_model
86
+ self.ctranspath_model = None
87
+ if self.optimus_model is not None:
88
+ del self.optimus_model
89
+ self.optimus_model = None
90
+ if self.marker_classifier is not None:
91
+ del self.marker_classifier
92
+ self.marker_classifier = None
93
+ if self.aeon_model is not None:
94
+ del self.aeon_model
95
+ self.aeon_model = None
96
 
97
  # Force garbage collection and GPU cache clearing
98
  gc.collect()
 
153
  if is_t4_gpu:
154
  logger.info(" → Paladin models will be loaded and freed per slide")
155
  else:
156
+ logger.info(
157
+ " → Paladin models will be cached and reused across slides"
158
+ )
159
  elif use_gpu and not torch.cuda.is_available():
160
  logger.warning("GPU requested but CUDA not available, falling back to CPU")
161
  use_gpu = False
 
173
  if not ctranspath_path.exists():
174
  raise FileNotFoundError(f"CTransPath model not found at {ctranspath_path}")
175
 
176
+ ctranspath_factory = get_model_factory(ModelType.CTRANSPATH)
177
+ ctranspath_model = ctranspath_factory.get_model(
178
+ str(ctranspath_path), use_gpu=use_gpu, gpu_device_id=0 if use_gpu else None
179
+ )
180
+ logger.info("✓ CTransPath model loaded")
181
 
182
+ if use_gpu and torch.cuda.is_available():
183
+ mem = torch.cuda.memory_allocated() / (1024**3)
184
+ logger.info(f" GPU memory: {mem:.2f} GB")
 
 
185
 
186
+ # Load Optimus model from Hugging Face Hub
187
+ logger.info("Loading Optimus model from bioptimus/H-optimus-0...")
188
+ optimus_factory = get_model_factory(ModelType.OPTIMUS)
189
+ optimus_model = optimus_factory.get_model(
190
+ model_path="hf-hub:bioptimus/H-optimus-0",
191
+ use_gpu=use_gpu,
192
+ gpu_device_id=0 if use_gpu else None,
193
+ )
194
+ logger.info("✓ Optimus model loaded")
195
+
196
+ if use_gpu and torch.cuda.is_available():
197
+ mem = torch.cuda.memory_allocated() / (1024**3)
198
+ logger.info(f" GPU memory: {mem:.2f} GB")
199
 
200
  # Load Marker Classifier
201
  logger.info("Loading Marker Classifier...")
202
  marker_classifier_path = data_dir / "marker_classifier.pkl"
203
  if not marker_classifier_path.exists():
204
+ raise FileNotFoundError(
205
+ f"Marker classifier not found at {marker_classifier_path}"
206
+ )
207
 
208
  with open(marker_classifier_path, "rb") as f:
209
  marker_classifier = pickle.load(f) # nosec
 
259
  cache: ModelCache,
260
  model_path: Path,
261
  ) -> torch.nn.Module:
262
+ """Load a single Paladin model for inference, downloading on-demand if needed.
263
 
264
  Implements adaptive loading strategy:
265
  - T4 GPU (aggressive mode): Load model fresh, caller must delete after use
266
  - A100 GPU (caching mode): Check cache, load if needed, return cached model
267
 
268
+ If the model file doesn't exist locally, downloads it from HuggingFace Hub.
269
+
270
  Args:
271
  cache: ModelCache instance managing loaded models
272
  model_path: Path to the Paladin model file
 
278
  On T4 GPUs, caller MUST delete the model and call torch.cuda.empty_cache()
279
  after inference to avoid OOM errors.
280
  """
281
+ from huggingface_hub import hf_hub_download
282
+
283
  model_key = str(model_path)
284
 
285
  # Check cache first (only used in non-aggressive mode)
 
287
  logger.info(f" ✓ Using CACHED Paladin model: {model_path.name} (no disk I/O!)")
288
  return cache.paladin_models[model_key]
289
 
290
+ # Download model from HF Hub if it doesn't exist locally
291
+ if not model_path.exists():
292
+ logger.info(
293
+ f" ⬇ Downloading Paladin model from HuggingFace Hub: {model_path.name}"
294
+ )
295
+ # Extract the relative path from the data directory
296
+ data_dir = get_data_directory()
297
+ relative_path = model_path.relative_to(data_dir)
298
+
299
+ downloaded_path = hf_hub_download(
300
+ repo_id="PDM-Group/paladin-aeon-models",
301
+ filename=str(relative_path),
302
+ cache_dir=data_dir.parent.parent, # Use HF cache directory
303
+ )
304
+ model_path = Path(downloaded_path)
305
+ logger.info(f" ✓ Downloaded to: {model_path}")
306
+
307
  # Load model from disk
308
  if cache.aggressive_memory_mgmt:
309
+ logger.info(
310
+ f" → Loading Paladin model: {model_path.name} (will free after use)"
311
+ )
312
  else:
313
+ logger.info(
314
+ f" → Loading Paladin model: {model_path.name} (will cache for reuse)"
315
+ )
316
 
317
  with open(model_path, "rb") as f:
318
  model = pickle.load(f) # nosec
src/mosaic/ui/app.py CHANGED
@@ -24,7 +24,7 @@ from mosaic.ui.utils import (
24
  SETTINGS_COLUMNS,
25
  )
26
  from mosaic.analysis import analyze_slide
27
- from mosaic.batch_analysis import analyze_slides_batch
28
 
29
  current_dir = Path(__file__).parent.parent
30
 
@@ -45,6 +45,12 @@ def set_cancer_subtype_maps(csn_map, rcsn_map, cs):
45
  def analyze_slides(
46
  slides,
47
  settings_input,
 
 
 
 
 
 
48
  user_dir,
49
  progress=gr.Progress(track_tqdm=True),
50
  request: gr.Request = None,
@@ -52,61 +58,112 @@ def analyze_slides(
52
  if slides is None or len(slides) == 0:
53
  raise gr.Error("Please upload at least one slide.")
54
  if user_dir is None:
55
- user_dir = create_user_directory(None, gr.Request())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  settings_input = validate_settings(
57
- settings_input, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
 
 
 
58
  )
59
  if len(slides) != len(settings_input):
60
  raise gr.Error("Missing settings for uploaded slides")
61
 
62
- # Use batch processing for multiple slides (models loaded once)
63
- # Use single-slide processing for 1 slide (maintains exact same behavior)
 
 
 
 
64
  if len(slides) > 1:
65
- logger.info(f"Using batch processing for {len(slides)} slides")
66
- progress(0.0, desc=f"Starting batch analysis ({len(slides)} slides)")
67
-
68
- all_slide_masks, all_aeon_results, all_paladin_results = analyze_slides_batch(
69
- slides=slides,
70
- settings_df=settings_input,
71
- cancer_subtype_name_map=cancer_subtype_name_map,
72
- num_workers=4,
73
- aggressive_memory_mgmt=None, # Auto-detect GPU type
74
- progress=progress,
75
- )
76
  else:
77
- # Single slide: use existing analyze_slide() for backward compatibility
78
- logger.info("Using single-slide processing (1 slide)")
79
- progress(0.0, desc="Starting single-slide analysis")
80
-
81
- all_slide_masks = []
82
- all_aeon_results = []
83
- all_paladin_results = []
84
-
85
- row = settings_input.iloc[0]
86
- slide_name = row["Slide"]
87
-
88
- slide_mask, aeon_results, paladin_results = analyze_slide(
89
- slides[0],
90
- row["Segmentation Config"],
91
- row["Site Type"],
92
- row["Sex"],
93
- row["Tissue Site"],
94
- row["Cancer Subtype"],
95
- cancer_subtype_name_map,
96
- row["IHC Subtype"],
97
- progress=progress,
98
- request=request,
99
- )
 
 
 
100
 
101
- if slide_mask is not None:
102
- all_slide_masks.append((slide_mask, slide_name))
103
- if aeon_results is not None:
104
- all_aeon_results.append(aeon_results)
105
- if paladin_results is not None:
106
- paladin_results.insert(
107
- 0, "Slide", pd.Series([slide_name] * len(paladin_results))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  )
109
- all_paladin_results.append(paladin_results)
 
 
 
 
 
110
 
111
  progress(0.99, desc="Analysis complete, wrapping up results")
112
 
@@ -155,7 +212,8 @@ def analyze_slides(
155
 
156
  progress(1.0, desc="All done!")
157
 
158
- return (
 
159
  all_slide_masks,
160
  combined_aeon_results,
161
  aeon_output,
@@ -273,17 +331,20 @@ def launch_gradio(server_name, server_port, share):
273
  )
274
  def clear_fn():
275
  return (
276
- None,
277
- None,
278
- None,
279
- None,
280
- gr.Dataframe(visible=False),
281
- gr.DownloadButton(visible=False),
282
- gr.Dataframe(visible=False),
283
- gr.File(visible=False),
284
  )
285
 
286
- def get_settings(files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config):
 
 
 
287
  if files is None:
288
  return pd.DataFrame()
289
  settings = []
@@ -291,22 +352,30 @@ def launch_gradio(server_name, server_port, share):
291
  filename = file.name if hasattr(file, "name") else file
292
  slide_name = filename.split("/")[-1]
293
  settings.append(
294
- [slide_name, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config]
 
 
 
 
 
 
 
 
295
  )
296
  df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
297
  return df
298
 
299
- # Only display settings table and upload button if multiple slides are uploaded
300
- @gr.on(
301
- [
302
- input_slides.change,
303
- site_dropdown.change,
304
- sex_dropdown.change,
305
- tissue_site_dropdown.change,
306
- cancer_subtype_dropdown.change,
307
- ihc_subtype_dropdown.change,
308
- seg_config_dropdown.change,
309
- ],
310
  inputs=[
311
  input_slides,
312
  site_dropdown,
@@ -318,22 +387,103 @@ def launch_gradio(server_name, server_port, share):
318
  ],
319
  outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
320
  )
321
- def update_settings(files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config):
 
 
 
322
  has_ihc = "Breast" in cancer_subtype
323
  if not files:
324
  return None, None, gr.Dropdown(visible=has_ihc)
325
  settings_df = get_settings(
326
- files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
 
 
 
 
 
 
327
  )
328
  if settings_df is not None:
329
  has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
330
  visible = files and len(files) > 1
331
  return (
332
- gr.Dataframe(settings_df, visible=visible),
333
  gr.File(visible=visible),
334
  gr.Dropdown(visible=has_ihc),
335
  )
336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
  @settings_csv.upload(
338
  inputs=[settings_csv],
339
  outputs=[settings_input],
@@ -349,6 +499,12 @@ def launch_gradio(server_name, server_port, share):
349
  inputs=[
350
  input_slides,
351
  settings_input,
 
 
 
 
 
 
352
  user_dir_state,
353
  ],
354
  outputs=[
@@ -363,9 +519,14 @@ def launch_gradio(server_name, server_port, share):
363
  show_progress_on=paladin_output_table,
364
  )
365
  settings_input.change(
366
- lambda df: validate_settings(df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map),
 
 
 
 
 
367
  inputs=[settings_input],
368
- outputs=[settings_input]
369
  )
370
  demo.load(
371
  create_user_directory,
 
24
  SETTINGS_COLUMNS,
25
  )
26
  from mosaic.analysis import analyze_slide
27
+ from mosaic.model_manager import load_all_models
28
 
29
  current_dir = Path(__file__).parent.parent
30
 
 
45
  def analyze_slides(
46
  slides,
47
  settings_input,
48
+ site_type,
49
+ sex,
50
+ tissue_site,
51
+ cancer_subtype,
52
+ ihc_subtype,
53
+ seg_config,
54
  user_dir,
55
  progress=gr.Progress(track_tqdm=True),
56
  request: gr.Request = None,
 
58
  if slides is None or len(slides) == 0:
59
  raise gr.Error("Please upload at least one slide.")
60
  if user_dir is None:
61
+ if request is not None:
62
+ user_dir = create_user_directory(None, request)
63
+ if user_dir is None:
64
+ # Fallback to temp directory if session hash not available
65
+ import tempfile
66
+
67
+ user_dir = Path(tempfile.mkdtemp(prefix="mosaic_"))
68
+
69
+ # Handle empty settings_input (e.g., when dataframe is hidden for single slide)
70
+ # Regenerate settings from dropdowns if settings_input is empty
71
+ if settings_input is None or len(settings_input) == 0:
72
+ logger.info("Settings dataframe is empty, regenerating from dropdown values")
73
+ settings = []
74
+ for file in slides:
75
+ filename = file.name if hasattr(file, "name") else file
76
+ slide_name = filename.split("/")[-1]
77
+ settings.append(
78
+ [
79
+ slide_name,
80
+ site_type,
81
+ sex,
82
+ tissue_site,
83
+ cancer_subtype,
84
+ ihc_subtype,
85
+ seg_config,
86
+ ]
87
+ )
88
+ settings_input = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
89
+
90
  settings_input = validate_settings(
91
+ settings_input,
92
+ cancer_subtype_name_map,
93
+ cancer_subtypes,
94
+ reversed_cancer_subtype_name_map,
95
  )
96
  if len(slides) != len(settings_input):
97
  raise gr.Error("Missing settings for uploaded slides")
98
 
99
+ all_slide_masks = []
100
+ all_aeon_results = []
101
+ all_paladin_results = []
102
+
103
+ # Load models once (for batch) or per-slide (for single)
104
+ model_cache = None
105
  if len(slides) > 1:
106
+ logger.info(f"Batch mode: Loading models once for {len(slides)} slides")
107
+ progress(0.0, desc=f"Loading models for batch processing")
108
+ model_cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
 
 
 
 
 
 
 
 
109
  else:
110
+ logger.info("Single-slide mode: models loaded within analyze_slide")
111
+
112
+ try:
113
+ # Process all slides with unified analyze_slide function
114
+ for idx, slide_path in enumerate(slides):
115
+ row = settings_input.iloc[idx]
116
+ slide_name = row["Slide"]
117
+
118
+ logger.info(f"[{idx + 1}/{len(slides)}] Processing: {slide_name}")
119
+ slide_progress = idx / len(slides)
120
+ progress(slide_progress, desc=f"Analyzing slide {idx + 1}/{len(slides)}")
121
+
122
+ slide_mask, aeon_results, paladin_results = analyze_slide(
123
+ slide_path=slide_path,
124
+ seg_config=row["Segmentation Config"],
125
+ site_type=row["Site Type"],
126
+ sex=row.get("Sex", "Unknown"),
127
+ tissue_site=row.get("Tissue Site", "Unknown"),
128
+ cancer_subtype=row["Cancer Subtype"],
129
+ cancer_subtype_name_map=cancer_subtype_name_map,
130
+ ihc_subtype=row.get("IHC Subtype", ""),
131
+ num_workers=4,
132
+ progress=progress,
133
+ request=request,
134
+ model_cache=model_cache, # Pre-loaded for batch, None for single
135
+ )
136
 
137
+ if slide_mask is not None:
138
+ all_slide_masks.append((slide_mask, slide_name))
139
+ if aeon_results is not None:
140
+ all_aeon_results.append(aeon_results)
141
+ if paladin_results is not None:
142
+ paladin_results.insert(
143
+ 0, "Slide", pd.Series([slide_name] * len(paladin_results))
144
+ )
145
+ all_paladin_results.append(paladin_results)
146
+
147
+ # Yield intermediate update to show slide masks as they're generated
148
+ # This allows the UI to update incrementally during processing
149
+ yield (
150
+ all_slide_masks.copy(), # Current slide masks
151
+ gr.DataFrame(visible=False), # aeon_output_table (not ready yet)
152
+ gr.DownloadButton(
153
+ visible=False
154
+ ), # aeon_download_button (not ready yet)
155
+ None, # paladin_output_table (not ready yet)
156
+ gr.DownloadButton(
157
+ visible=False
158
+ ), # paladin_download_button (not ready yet)
159
+ user_dir, # user_dir_state
160
  )
161
+
162
+ finally:
163
+ # Clean up model cache if it was loaded for batch processing
164
+ if model_cache is not None:
165
+ logger.info("Cleaning up model cache")
166
+ model_cache.cleanup()
167
 
168
  progress(0.99, desc="Analysis complete, wrapping up results")
169
 
 
212
 
213
  progress(1.0, desc="All done!")
214
 
215
+ # Final yield with complete results
216
+ yield (
217
  all_slide_masks,
218
  combined_aeon_results,
219
  aeon_output,
 
331
  )
332
  def clear_fn():
333
  return (
334
+ None, # input_slides
335
+ None, # slide_masks
336
+ None, # paladin_output_table
337
+ gr.DownloadButton(visible=False), # paladin_download_button
338
+ gr.Dataframe(visible=False), # aeon_output_table
339
+ gr.DownloadButton(visible=False), # aeon_download_button
340
+ gr.Dataframe(visible=False), # settings_input
341
+ gr.File(visible=False), # settings_csv
342
  )
343
 
344
+ def get_settings(
345
+ files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
346
+ ):
347
+ """Generate initial settings DataFrame from uploaded files and dropdown values."""
348
  if files is None:
349
  return pd.DataFrame()
350
  settings = []
 
352
  filename = file.name if hasattr(file, "name") else file
353
  slide_name = filename.split("/")[-1]
354
  settings.append(
355
+ [
356
+ slide_name,
357
+ site_type,
358
+ sex,
359
+ tissue_site,
360
+ cancer_subtype,
361
+ ihc_subtype,
362
+ seg_config,
363
+ ]
364
  )
365
  df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
366
  return df
367
 
368
+ def update_settings_column(settings_df, column_name, new_value):
369
+ """Update a specific column in the settings DataFrame."""
370
+ if settings_df is None or len(settings_df) == 0:
371
+ return settings_df
372
+ # Create a copy to avoid modifying the original
373
+ updated_df = settings_df.copy()
374
+ updated_df[column_name] = new_value
375
+ return updated_df
376
+
377
+ # Handle file uploads - regenerate entire settings table
378
+ @input_slides.change(
379
  inputs=[
380
  input_slides,
381
  site_dropdown,
 
387
  ],
388
  outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
389
  )
390
+ def update_files(
391
+ files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
392
+ ):
393
+ """Handle file upload - regenerate settings table from scratch."""
394
  has_ihc = "Breast" in cancer_subtype
395
  if not files:
396
  return None, None, gr.Dropdown(visible=has_ihc)
397
  settings_df = get_settings(
398
+ files,
399
+ site_type,
400
+ sex,
401
+ tissue_site,
402
+ cancer_subtype,
403
+ ihc_subtype,
404
+ seg_config,
405
  )
406
  if settings_df is not None:
407
  has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
408
  visible = files and len(files) > 1
409
  return (
410
+ gr.Dataframe(value=settings_df, visible=visible),
411
  gr.File(visible=visible),
412
  gr.Dropdown(visible=has_ihc),
413
  )
414
 
415
+ # Handle individual dropdown changes - only update the relevant column
416
+ @site_dropdown.change(
417
+ inputs=[settings_input, site_dropdown],
418
+ outputs=[settings_input],
419
+ )
420
+ def update_site_type(settings_df, site_type):
421
+ """Update Site Type column when dropdown changes."""
422
+ if settings_df is None or len(settings_df) == 0:
423
+ return settings_df
424
+ updated_df = update_settings_column(settings_df, "Site Type", site_type)
425
+ return gr.Dataframe(value=updated_df)
426
+
427
+ @sex_dropdown.change(
428
+ inputs=[settings_input, sex_dropdown],
429
+ outputs=[settings_input],
430
+ )
431
+ def update_sex(settings_df, sex):
432
+ """Update Sex column when dropdown changes."""
433
+ if settings_df is None or len(settings_df) == 0:
434
+ return settings_df
435
+ updated_df = update_settings_column(settings_df, "Sex", sex)
436
+ return gr.Dataframe(value=updated_df)
437
+
438
+ @tissue_site_dropdown.change(
439
+ inputs=[settings_input, tissue_site_dropdown],
440
+ outputs=[settings_input],
441
+ )
442
+ def update_tissue_site(settings_df, tissue_site):
443
+ """Update Tissue Site column when dropdown changes."""
444
+ if settings_df is None or len(settings_df) == 0:
445
+ return settings_df
446
+ updated_df = update_settings_column(settings_df, "Tissue Site", tissue_site)
447
+ return gr.Dataframe(value=updated_df)
448
+
449
+ @cancer_subtype_dropdown.change(
450
+ inputs=[settings_input, cancer_subtype_dropdown],
451
+ outputs=[settings_input, ihc_subtype_dropdown],
452
+ )
453
+ def update_cancer_subtype(settings_df, cancer_subtype):
454
+ """Update Cancer Subtype column when dropdown changes."""
455
+ has_ihc = "Breast" in cancer_subtype
456
+ if settings_df is None or len(settings_df) == 0:
457
+ return settings_df, gr.Dropdown(visible=has_ihc)
458
+ updated_df = update_settings_column(
459
+ settings_df, "Cancer Subtype", cancer_subtype
460
+ )
461
+ return gr.Dataframe(value=updated_df), gr.Dropdown(visible=has_ihc)
462
+
463
+ @ihc_subtype_dropdown.change(
464
+ inputs=[settings_input, ihc_subtype_dropdown],
465
+ outputs=[settings_input],
466
+ )
467
+ def update_ihc_subtype(settings_df, ihc_subtype):
468
+ """Update IHC Subtype column when dropdown changes."""
469
+ if settings_df is None or len(settings_df) == 0:
470
+ return settings_df
471
+ updated_df = update_settings_column(settings_df, "IHC Subtype", ihc_subtype)
472
+ return gr.Dataframe(value=updated_df)
473
+
474
+ @seg_config_dropdown.change(
475
+ inputs=[settings_input, seg_config_dropdown],
476
+ outputs=[settings_input],
477
+ )
478
+ def update_seg_config(settings_df, seg_config):
479
+ """Update Segmentation Config column when dropdown changes."""
480
+ if settings_df is None or len(settings_df) == 0:
481
+ return settings_df
482
+ updated_df = update_settings_column(
483
+ settings_df, "Segmentation Config", seg_config
484
+ )
485
+ return gr.Dataframe(value=updated_df)
486
+
487
  @settings_csv.upload(
488
  inputs=[settings_csv],
489
  outputs=[settings_input],
 
499
  inputs=[
500
  input_slides,
501
  settings_input,
502
+ site_dropdown,
503
+ sex_dropdown,
504
+ tissue_site_dropdown,
505
+ cancer_subtype_dropdown,
506
+ ihc_subtype_dropdown,
507
+ seg_config_dropdown,
508
  user_dir_state,
509
  ],
510
  outputs=[
 
519
  show_progress_on=paladin_output_table,
520
  )
521
  settings_input.change(
522
+ lambda df: validate_settings(
523
+ df,
524
+ cancer_subtype_name_map,
525
+ cancer_subtypes,
526
+ reversed_cancer_subtype_name_map,
527
+ ),
528
  inputs=[settings_input],
529
+ outputs=[settings_input],
530
  )
531
  demo.load(
532
  create_user_directory,
src/mosaic/ui/utils.py CHANGED
@@ -61,13 +61,13 @@ def get_tissue_sites():
61
 
62
  def get_oncotree_code_name(code):
63
  """Retrieve the human-readable name for an OncoTree code.
64
-
65
  Queries the OncoTree API to get the cancer subtype name corresponding
66
  to the given code. Results are cached to avoid repeated API calls.
67
-
68
  Args:
69
  code: OncoTree code (e.g., "LUAD", "BRCA")
70
-
71
  Returns:
72
  Human-readable cancer subtype name, or "Unknown" if not found
73
  """
@@ -108,16 +108,16 @@ def create_user_directory(state, request: gr.Request):
108
 
109
  def load_settings(slide_csv_path):
110
  """Load slide analysis settings from CSV file.
111
-
112
  Loads the CSV and ensures all required columns are present, adding defaults
113
  for optional columns if they are missing.
114
-
115
  Args:
116
  slide_csv_path: Path to the CSV file containing slide settings
117
-
118
  Returns:
119
  DataFrame with columns: Slide, Site Type, Cancer Subtype, IHC Subtype, Segmentation Config
120
-
121
  Raises:
122
  ValueError: If required columns are missing from the CSV
123
  """
@@ -138,21 +138,26 @@ def load_settings(slide_csv_path):
138
  return settings_df
139
 
140
 
141
- def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map):
 
 
 
 
 
142
  """Validate and normalize slide analysis settings.
143
-
144
  Checks each row for valid values and normalizes cancer subtype names.
145
  Generates warnings for invalid entries and replaces them with defaults.
146
-
147
  Args:
148
  settings_df: DataFrame with slide settings to validate
149
  cancer_subtype_name_map: Dict mapping subtype display names to codes
150
  cancer_subtypes: List of valid cancer subtype codes
151
  reversed_cancer_subtype_name_map: Dict mapping codes to display names
152
-
153
  Returns:
154
  Validated DataFrame with normalized values
155
-
156
  Note:
157
  Invalid entries are replaced with defaults and warnings are displayed
158
  to the user via Gradio warnings.
@@ -215,13 +220,13 @@ def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, rev
215
 
216
  def export_to_csv(df):
217
  """Export a DataFrame to CSV file for download.
218
-
219
  Args:
220
  df: DataFrame to export
221
-
222
  Returns:
223
  Path to the exported CSV file
224
-
225
  Raises:
226
  gr.Error: If the DataFrame is None or empty
227
  """
 
61
 
62
  def get_oncotree_code_name(code):
63
  """Retrieve the human-readable name for an OncoTree code.
64
+
65
  Queries the OncoTree API to get the cancer subtype name corresponding
66
  to the given code. Results are cached to avoid repeated API calls.
67
+
68
  Args:
69
  code: OncoTree code (e.g., "LUAD", "BRCA")
70
+
71
  Returns:
72
  Human-readable cancer subtype name, or "Unknown" if not found
73
  """
 
108
 
109
  def load_settings(slide_csv_path):
110
  """Load slide analysis settings from CSV file.
111
+
112
  Loads the CSV and ensures all required columns are present, adding defaults
113
  for optional columns if they are missing.
114
+
115
  Args:
116
  slide_csv_path: Path to the CSV file containing slide settings
117
+
118
  Returns:
119
  DataFrame with columns: Slide, Site Type, Cancer Subtype, IHC Subtype, Segmentation Config
120
+
121
  Raises:
122
  ValueError: If required columns are missing from the CSV
123
  """
 
138
  return settings_df
139
 
140
 
141
+ def validate_settings(
142
+ settings_df,
143
+ cancer_subtype_name_map,
144
+ cancer_subtypes,
145
+ reversed_cancer_subtype_name_map,
146
+ ):
147
  """Validate and normalize slide analysis settings.
148
+
149
  Checks each row for valid values and normalizes cancer subtype names.
150
  Generates warnings for invalid entries and replaces them with defaults.
151
+
152
  Args:
153
  settings_df: DataFrame with slide settings to validate
154
  cancer_subtype_name_map: Dict mapping subtype display names to codes
155
  cancer_subtypes: List of valid cancer subtype codes
156
  reversed_cancer_subtype_name_map: Dict mapping codes to display names
157
+
158
  Returns:
159
  Validated DataFrame with normalized values
160
+
161
  Note:
162
  Invalid entries are replaced with defaults and warnings are displayed
163
  to the user via Gradio warnings.
 
220
 
221
  def export_to_csv(df):
222
  """Export a DataFrame to CSV file for download.
223
+
224
  Args:
225
  df: DataFrame to export
226
+
227
  Returns:
228
  Path to the exported CSV file
229
+
230
  Raises:
231
  gr.Error: If the DataFrame is None or empty
232
  """
tests/benchmark_batch_performance.py CHANGED
@@ -21,7 +21,9 @@ from mosaic.batch_analysis import analyze_slides_batch
21
  from mosaic.ui.utils import load_settings, validate_settings
22
 
23
 
24
- def benchmark_sequential_processing(slides, settings_df, cancer_subtype_name_map, num_workers):
 
 
25
  """Benchmark traditional sequential processing (models loaded per slide)."""
26
  logger.info("=" * 80)
27
  logger.info("BENCHMARKING: Sequential Processing (OLD METHOD)")
@@ -51,13 +53,15 @@ def benchmark_sequential_processing(slides, settings_df, cancer_subtype_name_map
51
  slide_time = time.time() - slide_start
52
  logger.info(f"Slide {idx + 1} completed in {slide_time:.2f}s")
53
 
54
- results.append({
55
- "slide": slide_path,
56
- "time": slide_time,
57
- "has_mask": slide_mask is not None,
58
- "has_aeon": aeon_results is not None,
59
- "has_paladin": paladin_results is not None,
60
- })
 
 
61
 
62
  total_time = time.time() - start_time
63
  peak_memory = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
@@ -79,7 +83,9 @@ def benchmark_sequential_processing(slides, settings_df, cancer_subtype_name_map
79
  }
80
 
81
 
82
- def benchmark_batch_processing(slides, settings_df, cancer_subtype_name_map, num_workers):
 
 
83
  """Benchmark optimized batch processing (models loaded once)."""
84
  logger.info("=" * 80)
85
  logger.info("BENCHMARKING: Batch Processing (NEW METHOD)")
@@ -128,7 +134,9 @@ def compare_results(sequential_stats, batch_stats):
128
 
129
  speedup = sequential_stats["total_time"] / batch_stats["total_time"]
130
  time_saved = sequential_stats["total_time"] - batch_stats["total_time"]
131
- percent_faster = (1 - (batch_stats["total_time"] / sequential_stats["total_time"])) * 100
 
 
132
 
133
  logger.info(f"Number of slides: {sequential_stats['num_slides']}")
134
  logger.info(f"")
@@ -141,9 +149,11 @@ def compare_results(sequential_stats, batch_stats):
141
 
142
  if torch.cuda.is_available():
143
  logger.info(f"")
144
- logger.info(f"Sequential peak memory: {sequential_stats['peak_memory_gb']:.2f} GB")
 
 
145
  logger.info(f"Batch peak memory: {batch_stats['peak_memory_gb']:.2f} GB")
146
- memory_diff = batch_stats['peak_memory_gb'] - sequential_stats['peak_memory_gb']
147
  logger.info(f"Memory difference: {memory_diff:+.2f} GB")
148
 
149
  logger.info("=" * 80)
@@ -161,31 +171,20 @@ def main():
161
  parser = argparse.ArgumentParser(
162
  description="Benchmark batch processing performance"
163
  )
 
164
  parser.add_argument(
165
- "--slides",
166
- nargs="+",
167
- help="List of slide paths to process"
168
- )
169
- parser.add_argument(
170
- "--slide-csv",
171
- type=str,
172
- help="CSV file with slide paths and settings"
173
  )
174
  parser.add_argument(
175
- "--num-workers",
176
- type=int,
177
- default=4,
178
- help="Number of workers for data loading"
179
  )
180
  parser.add_argument(
181
  "--skip-sequential",
182
  action="store_true",
183
- help="Skip sequential benchmark (faster, only test batch mode)"
184
  )
185
  parser.add_argument(
186
- "--output",
187
- type=str,
188
- help="Save benchmark results to JSON file"
189
  )
190
 
191
  args = parser.parse_args()
@@ -195,27 +194,35 @@ def main():
195
 
196
  # Load cancer subtype mappings
197
  from mosaic.gradio_app import download_and_process_models
198
- cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map = download_and_process_models()
 
 
 
199
 
200
  # Prepare slides and settings
201
  if args.slide_csv:
202
  settings_df = load_settings(args.slide_csv)
203
  settings_df = validate_settings(
204
- settings_df, cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
 
 
 
205
  )
206
  slides = settings_df["Slide"].tolist()
207
  else:
208
  slides = args.slides
209
  # Create default settings
210
- settings_df = pd.DataFrame({
211
- "Slide": slides,
212
- "Site Type": ["Primary"] * len(slides),
213
- "Sex": ["Unknown"] * len(slides),
214
- "Tissue Site": ["Unknown"] * len(slides),
215
- "Cancer Subtype": ["Unknown"] * len(slides),
216
- "IHC Subtype": [""] * len(slides),
217
- "Segmentation Config": ["Biopsy"] * len(slides),
218
- })
 
 
219
 
220
  logger.info(f"Benchmarking with {len(slides)} slides")
221
  logger.info(f"GPU available: {torch.cuda.is_available()}")
@@ -239,8 +246,9 @@ def main():
239
  # Save results if requested
240
  if args.output:
241
  import json
 
242
  output_path = Path(args.output)
243
- with open(output_path, 'w') as f:
244
  json.dump(comparison, f, indent=2, default=str)
245
  logger.info(f"Benchmark results saved to {output_path}")
246
 
 
21
  from mosaic.ui.utils import load_settings, validate_settings
22
 
23
 
24
+ def benchmark_sequential_processing(
25
+ slides, settings_df, cancer_subtype_name_map, num_workers
26
+ ):
27
  """Benchmark traditional sequential processing (models loaded per slide)."""
28
  logger.info("=" * 80)
29
  logger.info("BENCHMARKING: Sequential Processing (OLD METHOD)")
 
53
  slide_time = time.time() - slide_start
54
  logger.info(f"Slide {idx + 1} completed in {slide_time:.2f}s")
55
 
56
+ results.append(
57
+ {
58
+ "slide": slide_path,
59
+ "time": slide_time,
60
+ "has_mask": slide_mask is not None,
61
+ "has_aeon": aeon_results is not None,
62
+ "has_paladin": paladin_results is not None,
63
+ }
64
+ )
65
 
66
  total_time = time.time() - start_time
67
  peak_memory = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
 
83
  }
84
 
85
 
86
+ def benchmark_batch_processing(
87
+ slides, settings_df, cancer_subtype_name_map, num_workers
88
+ ):
89
  """Benchmark optimized batch processing (models loaded once)."""
90
  logger.info("=" * 80)
91
  logger.info("BENCHMARKING: Batch Processing (NEW METHOD)")
 
134
 
135
  speedup = sequential_stats["total_time"] / batch_stats["total_time"]
136
  time_saved = sequential_stats["total_time"] - batch_stats["total_time"]
137
+ percent_faster = (
138
+ 1 - (batch_stats["total_time"] / sequential_stats["total_time"])
139
+ ) * 100
140
 
141
  logger.info(f"Number of slides: {sequential_stats['num_slides']}")
142
  logger.info(f"")
 
149
 
150
  if torch.cuda.is_available():
151
  logger.info(f"")
152
+ logger.info(
153
+ f"Sequential peak memory: {sequential_stats['peak_memory_gb']:.2f} GB"
154
+ )
155
  logger.info(f"Batch peak memory: {batch_stats['peak_memory_gb']:.2f} GB")
156
+ memory_diff = batch_stats["peak_memory_gb"] - sequential_stats["peak_memory_gb"]
157
  logger.info(f"Memory difference: {memory_diff:+.2f} GB")
158
 
159
  logger.info("=" * 80)
 
171
  parser = argparse.ArgumentParser(
172
  description="Benchmark batch processing performance"
173
  )
174
+ parser.add_argument("--slides", nargs="+", help="List of slide paths to process")
175
  parser.add_argument(
176
+ "--slide-csv", type=str, help="CSV file with slide paths and settings"
 
 
 
 
 
 
 
177
  )
178
  parser.add_argument(
179
+ "--num-workers", type=int, default=4, help="Number of workers for data loading"
 
 
 
180
  )
181
  parser.add_argument(
182
  "--skip-sequential",
183
  action="store_true",
184
+ help="Skip sequential benchmark (faster, only test batch mode)",
185
  )
186
  parser.add_argument(
187
+ "--output", type=str, help="Save benchmark results to JSON file"
 
 
188
  )
189
 
190
  args = parser.parse_args()
 
194
 
195
  # Load cancer subtype mappings
196
  from mosaic.gradio_app import download_and_process_models
197
+
198
+ cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map = (
199
+ download_and_process_models()
200
+ )
201
 
202
  # Prepare slides and settings
203
  if args.slide_csv:
204
  settings_df = load_settings(args.slide_csv)
205
  settings_df = validate_settings(
206
+ settings_df,
207
+ cancer_subtype_name_map,
208
+ cancer_subtypes,
209
+ reversed_cancer_subtype_name_map,
210
  )
211
  slides = settings_df["Slide"].tolist()
212
  else:
213
  slides = args.slides
214
  # Create default settings
215
+ settings_df = pd.DataFrame(
216
+ {
217
+ "Slide": slides,
218
+ "Site Type": ["Primary"] * len(slides),
219
+ "Sex": ["Unknown"] * len(slides),
220
+ "Tissue Site": ["Unknown"] * len(slides),
221
+ "Cancer Subtype": ["Unknown"] * len(slides),
222
+ "IHC Subtype": [""] * len(slides),
223
+ "Segmentation Config": ["Biopsy"] * len(slides),
224
+ }
225
+ )
226
 
227
  logger.info(f"Benchmarking with {len(slides)} slides")
228
  logger.info(f"GPU available: {torch.cuda.is_available()}")
 
246
  # Save results if requested
247
  if args.output:
248
  import json
249
+
250
  output_path = Path(args.output)
251
+ with open(output_path, "w") as f:
252
  json.dump(comparison, f, indent=2, default=str)
253
  logger.info(f"Benchmark results saved to {output_path}")
254
 
tests/conftest.py CHANGED
@@ -3,22 +3,28 @@
3
  import sys
4
  from unittest.mock import MagicMock
5
 
 
6
  # Create mock for gradio with Error class
7
  class GradioMock(MagicMock):
8
  """Mock for gradio that supports Error and Warning classes."""
 
9
  Error = Exception
10
  Warning = lambda msg: None
11
  Request = MagicMock
12
  Progress = MagicMock
13
-
 
14
  # Mock heavy dependencies before any imports
15
  # This is necessary to allow tests to run without full environment setup
16
- sys.modules['mussel'] = MagicMock()
17
- sys.modules['mussel.models'] = MagicMock()
18
- sys.modules['mussel.utils'] = MagicMock()
19
- sys.modules['mussel.utils.segment'] = MagicMock()
20
- sys.modules['mussel.cli'] = MagicMock()
21
- sys.modules['mussel.cli.tessellate'] = MagicMock()
22
- sys.modules['gradio'] = GradioMock()
23
- sys.modules['huggingface_hub'] = MagicMock()
24
- sys.modules['loguru'] = MagicMock()
 
 
 
 
3
  import sys
4
  from unittest.mock import MagicMock
5
 
6
+
7
  # Create mock for gradio with Error class
8
  class GradioMock(MagicMock):
9
  """Mock for gradio that supports Error and Warning classes."""
10
+
11
  Error = Exception
12
  Warning = lambda msg: None
13
  Request = MagicMock
14
  Progress = MagicMock
15
+
16
+
17
  # Mock heavy dependencies before any imports
18
  # This is necessary to allow tests to run without full environment setup
19
+ sys.modules["mussel"] = MagicMock()
20
+ sys.modules["mussel.models"] = MagicMock()
21
+ sys.modules["mussel.utils"] = MagicMock()
22
+ sys.modules["mussel.utils.segment"] = MagicMock()
23
+ sys.modules["mussel.cli"] = MagicMock()
24
+ sys.modules["mussel.cli.tessellate"] = MagicMock()
25
+ sys.modules["gradio"] = GradioMock()
26
+ sys.modules["huggingface_hub"] = MagicMock()
27
+ sys.modules["loguru"] = MagicMock()
28
+
29
+ # Import fixtures from test_fixtures.py to make them available to all tests
30
+ pytest_plugins = ["tests.test_fixtures"]
tests/test_batch_analysis.py DELETED
@@ -1,279 +0,0 @@
1
- """Integration tests for batch_analysis module.
2
-
3
- Tests the batch processing coordinator and end-to-end batch workflow.
4
- """
5
-
6
- import pytest
7
- import pandas as pd
8
- from pathlib import Path
9
- from unittest.mock import Mock, patch, MagicMock
10
- import numpy as np
11
-
12
- from mosaic.batch_analysis import analyze_slides_batch
13
-
14
-
15
- class TestAnalyzeSlidesBatch:
16
- """Test analyze_slides_batch function."""
17
-
18
- @pytest.fixture
19
- def sample_settings_df(self):
20
- """Create sample settings DataFrame for testing."""
21
- return pd.DataFrame({
22
- "Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
23
- "Site Type": ["Primary", "Primary", "Metastatic"],
24
- "Sex": ["Male", "Female", "Unknown"],
25
- "Tissue Site": ["Lung", "Breast", "Unknown"],
26
- "Cancer Subtype": ["Unknown", "Unknown", "LUAD"],
27
- "IHC Subtype": ["", "HR+/HER2-", ""],
28
- "Segmentation Config": ["Biopsy", "Resection", "Biopsy"],
29
- })
30
-
31
- @pytest.fixture
32
- def cancer_subtype_name_map(self):
33
- """Sample cancer subtype name mapping."""
34
- return {
35
- "Unknown": "Unknown",
36
- "Lung Adenocarcinoma": "LUAD",
37
- "Breast Invasive Ductal Carcinoma": "IDC",
38
- }
39
-
40
- @patch('mosaic.batch_analysis.load_all_models')
41
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
42
- def test_batch_analysis_basic(
43
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
44
- ):
45
- """Test basic batch analysis workflow."""
46
- # Mock model cache
47
- mock_cache = Mock()
48
- mock_cache.cleanup = Mock()
49
- mock_load_models.return_value = mock_cache
50
-
51
- # Mock analyze_slide_with_models to return NEW DataFrames each time
52
- def mock_analyze_side_effect(*args, **kwargs):
53
- mock_mask = Mock()
54
- # Aeon results should have Cancer Subtype as index, not a column
55
- mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
56
- mock_paladin = pd.DataFrame({
57
- "Cancer Subtype": ["LUAD"],
58
- "Biomarker": ["EGFR"],
59
- "Score": [0.85]
60
- })
61
- return (mock_mask, mock_aeon, mock_paladin)
62
-
63
- mock_analyze_slide.side_effect = mock_analyze_side_effect
64
-
65
- slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
66
-
67
- # Run batch analysis
68
- masks, aeon_results, paladin_results = analyze_slides_batch(
69
- slides=slides,
70
- settings_df=sample_settings_df,
71
- cancer_subtype_name_map=cancer_subtype_name_map,
72
- num_workers=4,
73
- )
74
-
75
- # Verify models were loaded once
76
- mock_load_models.assert_called_once()
77
-
78
- # Verify analyze_slide_with_models was called for each slide
79
- assert mock_analyze_slide.call_count == 3
80
-
81
- # Verify cleanup was called
82
- mock_cache.cleanup.assert_called_once()
83
-
84
- # Verify results structure
85
- assert len(masks) == 3
86
- assert len(aeon_results) == 3
87
- assert len(paladin_results) == 3
88
-
89
- @patch('mosaic.batch_analysis.load_all_models')
90
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
91
- def test_batch_analysis_with_failures(
92
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
93
- ):
94
- """Test batch analysis continues when individual slides fail."""
95
- mock_cache = Mock()
96
- mock_cache.cleanup = Mock()
97
- mock_load_models.return_value = mock_cache
98
-
99
- # First slide succeeds, second fails, third succeeds
100
- def mock_analyze_side_effect(*args, **kwargs):
101
- # Get the slide_path to determine which call this is
102
- call_count = mock_analyze_slide.call_count
103
- if call_count == 2: # Second call (index 1)
104
- raise RuntimeError("Slide processing failed")
105
-
106
- mock_mask = Mock()
107
- # Aeon results should have Cancer Subtype as index, not a column
108
- mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
109
- mock_paladin = pd.DataFrame({
110
- "Cancer Subtype": ["LUAD"],
111
- "Biomarker": ["EGFR"],
112
- "Score": [0.85]
113
- })
114
- return (mock_mask, mock_aeon, mock_paladin)
115
-
116
- mock_analyze_slide.side_effect = mock_analyze_side_effect
117
-
118
- slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
119
-
120
- # Should not raise exception
121
- masks, aeon_results, paladin_results = analyze_slides_batch(
122
- slides=slides,
123
- settings_df=sample_settings_df,
124
- cancer_subtype_name_map=cancer_subtype_name_map,
125
- )
126
-
127
- # Should have results for 2 out of 3 slides
128
- assert len(masks) == 2
129
- assert len(aeon_results) == 2
130
- assert len(paladin_results) == 2
131
-
132
- # Cleanup should still be called
133
- mock_cache.cleanup.assert_called_once()
134
-
135
- @patch('mosaic.batch_analysis.load_all_models')
136
- def test_batch_analysis_cleanup_on_error(
137
- self, mock_load_models, sample_settings_df, cancer_subtype_name_map
138
- ):
139
- """Test cleanup is called even when load_all_models fails."""
140
- mock_load_models.side_effect = RuntimeError("Failed to load models")
141
-
142
- slides = ["slide1.svs"]
143
-
144
- with pytest.raises(RuntimeError, match="Failed to load models"):
145
- analyze_slides_batch(
146
- slides=slides,
147
- settings_df=sample_settings_df,
148
- cancer_subtype_name_map=cancer_subtype_name_map,
149
- )
150
-
151
- @patch('mosaic.batch_analysis.load_all_models')
152
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
153
- def test_batch_analysis_empty_results(
154
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
155
- ):
156
- """Test batch analysis with slides that have no tissue."""
157
- mock_cache = Mock()
158
- mock_cache.cleanup = Mock()
159
- mock_load_models.return_value = mock_cache
160
-
161
- # All slides return None (no tissue found)
162
- mock_analyze_slide.return_value = (None, None, None)
163
-
164
- slides = ["slide1.svs", "slide2.svs"]
165
-
166
- masks, aeon_results, paladin_results = analyze_slides_batch(
167
- slides=slides,
168
- settings_df=sample_settings_df[:2],
169
- cancer_subtype_name_map=cancer_subtype_name_map,
170
- )
171
-
172
- # Should have empty results
173
- assert len(masks) == 0
174
- assert len(aeon_results) == 0
175
- assert len(paladin_results) == 0
176
-
177
- # Cleanup should still be called
178
- mock_cache.cleanup.assert_called_once()
179
-
180
- @patch('mosaic.batch_analysis.load_all_models')
181
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
182
- def test_batch_analysis_aggressive_memory_management(
183
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
184
- ):
185
- """Test batch analysis with explicit aggressive memory management."""
186
- mock_cache = Mock()
187
- mock_cache.cleanup = Mock()
188
- mock_cache.aggressive_memory_mgmt = True
189
- mock_load_models.return_value = mock_cache
190
-
191
- mock_analyze_slide.return_value = (Mock(), Mock(), Mock())
192
-
193
- slides = ["slide1.svs"]
194
-
195
- analyze_slides_batch(
196
- slides=slides,
197
- settings_df=sample_settings_df[:1],
198
- cancer_subtype_name_map=cancer_subtype_name_map,
199
- aggressive_memory_mgmt=True,
200
- )
201
-
202
- # Verify aggressive_memory_mgmt was passed to load_all_models
203
- mock_load_models.assert_called_once_with(
204
- use_gpu=True,
205
- aggressive_memory_mgmt=True,
206
- )
207
-
208
- @patch('mosaic.batch_analysis.load_all_models')
209
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
210
- def test_batch_analysis_progress_tracking(
211
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
212
- ):
213
- """Test batch analysis updates progress correctly."""
214
- mock_cache = Mock()
215
- mock_cache.cleanup = Mock()
216
- mock_load_models.return_value = mock_cache
217
-
218
- mock_analyze_slide.return_value = (Mock(), Mock(), Mock())
219
-
220
- mock_progress = Mock()
221
- slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
222
-
223
- analyze_slides_batch(
224
- slides=slides,
225
- settings_df=sample_settings_df,
226
- cancer_subtype_name_map=cancer_subtype_name_map,
227
- progress=mock_progress,
228
- )
229
-
230
- # Verify progress was called
231
- assert mock_progress.call_count > 0
232
-
233
- # Verify final progress call
234
- final_call = mock_progress.call_args_list[-1]
235
- assert final_call[0][0] == 1.0 # Should be 100% at end
236
-
237
- @patch('mosaic.batch_analysis.load_all_models')
238
- @patch('mosaic.batch_analysis.analyze_slide_with_models')
239
- def test_batch_analysis_multi_slide_naming(
240
- self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
241
- ):
242
- """Test that multi-slide results include slide names."""
243
- mock_cache = Mock()
244
- mock_cache.cleanup = Mock()
245
- mock_load_models.return_value = mock_cache
246
-
247
- # Return new DataFrames each time
248
- def mock_analyze_side_effect(*args, **kwargs):
249
- mock_mask = Mock()
250
- # Aeon results should have Cancer Subtype as index, not a column
251
- mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
252
- mock_paladin = pd.DataFrame({
253
- "Cancer Subtype": ["LUAD"],
254
- "Biomarker": ["EGFR"],
255
- "Score": [0.85]
256
- })
257
- return (mock_mask, mock_aeon, mock_paladin)
258
-
259
- mock_analyze_slide.side_effect = mock_analyze_side_effect
260
-
261
- slides = ["slide1.svs", "slide2.svs"]
262
-
263
- masks, aeon_results, paladin_results = analyze_slides_batch(
264
- slides=slides,
265
- settings_df=sample_settings_df[:2],
266
- cancer_subtype_name_map=cancer_subtype_name_map,
267
- )
268
-
269
- # Verify slide names are in results
270
- assert len(masks) == 2
271
- assert masks[0][1] == "slide1.svs"
272
- assert masks[1][1] == "slide2.svs"
273
-
274
- # Paladin results should have Slide column
275
- assert "Slide" in paladin_results[0].columns
276
-
277
-
278
- if __name__ == "__main__":
279
- pytest.main([__file__, "-v"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_cli.py ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for CLI execution modes and argument handling.
2
+
3
+ This module tests the Mosaic CLI, including:
4
+ - Argument parsing and routing
5
+ - Single-slide processing mode
6
+ - Batch CSV processing mode
7
+ - Model download behavior
8
+ - Output file generation
9
+ """
10
+
11
+ import pytest
12
+ from unittest.mock import Mock, patch, MagicMock, call
13
+ from pathlib import Path
14
+ import pandas as pd
15
+
16
+
17
+ class TestArgumentParsing:
18
+ """Test CLI argument parsing and mode routing."""
19
+
20
+ @patch("mosaic.gradio_app.launch_gradio")
21
+ @patch("mosaic.gradio_app.download_and_process_models")
22
+ @patch("sys.argv", ["mosaic"])
23
+ def test_no_arguments_launches_web_interface(self, mock_download, mock_launch):
24
+ """Test no arguments routes to web interface mode."""
25
+ mock_download.return_value = ({}, {}, [])
26
+
27
+ from mosaic.gradio_app import main
28
+
29
+ main()
30
+
31
+ # Should call launch_gradio
32
+ assert mock_launch.called
33
+ assert mock_launch.call_count == 1
34
+
35
+ @patch("mosaic.gradio_app.analyze_slide")
36
+ @patch("mosaic.gradio_app.download_and_process_models")
37
+ @patch("sys.argv", ["mosaic", "--slide-path", "test.svs", "--output-dir", "out"])
38
+ def test_slide_path_routes_to_single_mode(self, mock_download, mock_analyze):
39
+ """Test --slide-path routes to single-slide mode."""
40
+ mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
41
+ mock_analyze.return_value = (None, None, None)
42
+
43
+ from mosaic.gradio_app import main
44
+
45
+ with patch("mosaic.gradio_app.Path.mkdir"):
46
+ main()
47
+
48
+ # Should call analyze_slide
49
+ assert mock_analyze.called
50
+
51
+ @patch("mosaic.gradio_app.load_all_models")
52
+ @patch("mosaic.gradio_app.load_settings")
53
+ @patch("mosaic.gradio_app.validate_settings")
54
+ @patch("mosaic.gradio_app.analyze_slide")
55
+ @patch("mosaic.gradio_app.download_and_process_models")
56
+ @patch("sys.argv", ["mosaic", "--slide-csv", "test.csv", "--output-dir", "out"])
57
+ def test_slide_csv_routes_to_batch_mode(
58
+ self,
59
+ mock_download,
60
+ mock_analyze,
61
+ mock_validate,
62
+ mock_load_settings,
63
+ mock_load_models,
64
+ ):
65
+ """Test --slide-csv routes to batch mode."""
66
+ mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
67
+ mock_load_settings.return_value = pd.DataFrame(
68
+ {
69
+ "Slide": ["test.svs"],
70
+ "Site Type": ["Primary"],
71
+ "Sex": ["Unknown"],
72
+ "Tissue Site": ["Unknown"],
73
+ "Cancer Subtype": ["Unknown"],
74
+ "IHC Subtype": [""],
75
+ "Segmentation Config": ["Biopsy"],
76
+ }
77
+ )
78
+ mock_validate.return_value = mock_load_settings.return_value
79
+ mock_analyze.return_value = (None, None, None)
80
+
81
+ mock_cache = Mock()
82
+ mock_cache.cleanup = Mock()
83
+ mock_load_models.return_value = mock_cache
84
+
85
+ from mosaic.gradio_app import main
86
+
87
+ with patch("mosaic.gradio_app.Path.mkdir"):
88
+ main()
89
+
90
+ # Should call load_all_models (batch mode)
91
+ assert mock_load_models.called
92
+
93
+
94
+ class TestSingleSlideMode:
95
+ """Test single-slide processing mode."""
96
+
97
+ @patch("mosaic.gradio_app.Path.mkdir")
98
+ @patch("mosaic.gradio_app.analyze_slide")
99
+ @patch("mosaic.gradio_app.download_and_process_models")
100
+ def test_analyze_slide_called_with_correct_params(
101
+ self, mock_download, mock_analyze, mock_mkdir, cli_args_single
102
+ ):
103
+ """Test analyze_slide called with correct parameters in single mode."""
104
+ mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
105
+ mock_analyze.return_value = (None, None, None)
106
+
107
+ # Patch ArgumentParser to return our test args
108
+ with patch(
109
+ "mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_single
110
+ ):
111
+ from mosaic.gradio_app import main
112
+
113
+ main()
114
+
115
+ # Verify analyze_slide was called
116
+ assert mock_analyze.called
117
+ call_args = mock_analyze.call_args[0] # Positional args
118
+
119
+ # Check key parameters (analyze_slide uses positional args)
120
+ assert call_args[0] == cli_args_single.slide_path # slide_path
121
+ assert call_args[1] == cli_args_single.segmentation_config # seg_config
122
+ assert call_args[2] == cli_args_single.site_type # site_type
123
+
124
+ @patch("PIL.Image.Image.save")
125
+ @patch("mosaic.gradio_app.Path.mkdir")
126
+ @patch("mosaic.gradio_app.analyze_slide")
127
+ @patch("mosaic.gradio_app.download_and_process_models")
128
+ def test_output_files_saved_correctly(
129
+ self,
130
+ mock_download,
131
+ mock_analyze,
132
+ mock_mkdir,
133
+ mock_save,
134
+ cli_args_single,
135
+ mock_analyze_slide_results,
136
+ ):
137
+ """Test output files are saved with correct names."""
138
+ from PIL import Image
139
+
140
+ mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
141
+
142
+ # Mock analyze_slide to return results
143
+ mask, aeon_results, paladin_results = mock_analyze_slide_results
144
+ mock_analyze.return_value = (mask, aeon_results, paladin_results)
145
+
146
+ # Patch ArgumentParser
147
+ with patch(
148
+ "mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_single
149
+ ):
150
+ # Patch DataFrame.to_csv to avoid actual file writes
151
+ with patch("pandas.DataFrame.to_csv"):
152
+ from mosaic.gradio_app import main
153
+
154
+ main()
155
+
156
+ # Verify save was called for mask
157
+ assert mock_save.called
158
+
159
+
160
+ class TestBatchCsvMode:
161
+ """Test batch CSV processing mode."""
162
+
163
+ @patch("mosaic.gradio_app.Path.mkdir")
164
+ @patch("mosaic.gradio_app.load_all_models")
165
+ @patch("mosaic.gradio_app.analyze_slide")
166
+ @patch("mosaic.gradio_app.validate_settings")
167
+ @patch("mosaic.gradio_app.load_settings")
168
+ @patch("mosaic.gradio_app.download_and_process_models")
169
+ def test_load_all_models_called_once(
170
+ self,
171
+ mock_download,
172
+ mock_load_settings,
173
+ mock_validate,
174
+ mock_analyze,
175
+ mock_load_models,
176
+ mock_mkdir,
177
+ cli_args_batch,
178
+ sample_settings_df,
179
+ mock_analyze_slide_results,
180
+ ):
181
+ """Test load_all_models called once in batch mode."""
182
+ from PIL import Image
183
+
184
+ mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
185
+ mock_load_settings.return_value = sample_settings_df
186
+ mock_validate.return_value = sample_settings_df
187
+
188
+ # Return fresh DataFrames on each call to avoid mutation
189
+ def mock_analyze_side_effect(*args, **kwargs):
190
+ mask = Image.new("RGB", (100, 100), color="red")
191
+ aeon_results = pd.DataFrame(
192
+ {"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
193
+ )
194
+ paladin_results = pd.DataFrame(
195
+ {
196
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
197
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
198
+ "Score": [0.85, 0.72, 0.63],
199
+ }
200
+ )
201
+ return (mask, aeon_results, paladin_results)
202
+
203
+ mock_analyze.side_effect = mock_analyze_side_effect
204
+
205
+ mock_cache = Mock()
206
+ mock_cache.cleanup = Mock()
207
+ mock_load_models.return_value = mock_cache
208
+
209
+ with patch(
210
+ "mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_batch
211
+ ):
212
+ with patch("pandas.DataFrame.to_csv"):
213
+ with patch("PIL.Image.Image.save"):
214
+ from mosaic.gradio_app import main
215
+
216
+ main()
217
+
218
+ # load_all_models should be called exactly once
219
+ assert mock_load_models.call_count == 1
220
+
221
+ # analyze_slide should be called for each slide (3 times)
222
+ assert mock_analyze.call_count == 3
223
+
224
+ # All analyze_slide calls should receive the model_cache
225
+ for call in mock_analyze.call_args_list:
226
+ assert call[1]["model_cache"] == mock_cache
227
+
228
+ # cleanup should be called
229
+ assert mock_cache.cleanup.called
230
+
231
+ @patch("mosaic.gradio_app.Path.mkdir")
232
+ @patch("mosaic.gradio_app.load_all_models")
233
+ @patch("mosaic.gradio_app.analyze_slide")
234
+ @patch("mosaic.gradio_app.validate_settings")
235
+ @patch("mosaic.gradio_app.load_settings")
236
+ @patch("mosaic.gradio_app.download_and_process_models")
237
+ def test_combined_outputs_generated(
238
+ self,
239
+ mock_download,
240
+ mock_load_settings,
241
+ mock_validate,
242
+ mock_analyze,
243
+ mock_load_models,
244
+ mock_mkdir,
245
+ cli_args_batch,
246
+ sample_settings_df,
247
+ mock_analyze_slide_results,
248
+ ):
249
+ """Test combined output files are generated in batch mode."""
250
+ from PIL import Image
251
+
252
+ mock_download.return_value = (
253
+ {"Unknown": "UNK", "Lung Adenocarcinoma (LUAD)": "LUAD"},
254
+ {"UNK": "Unknown", "LUAD": "Lung Adenocarcinoma (LUAD)"},
255
+ ["LUAD"],
256
+ )
257
+ mock_load_settings.return_value = sample_settings_df
258
+ mock_validate.return_value = sample_settings_df
259
+
260
+ # Return fresh DataFrames on each call
261
+ def mock_analyze_side_effect(*args, **kwargs):
262
+ mask = Image.new("RGB", (100, 100), color="red")
263
+ aeon_results = pd.DataFrame(
264
+ {"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
265
+ )
266
+ paladin_results = pd.DataFrame(
267
+ {
268
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
269
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
270
+ "Score": [0.85, 0.72, 0.63],
271
+ }
272
+ )
273
+ return (mask, aeon_results, paladin_results)
274
+
275
+ mock_analyze.side_effect = mock_analyze_side_effect
276
+
277
+ mock_cache = Mock()
278
+ mock_cache.cleanup = Mock()
279
+ mock_load_models.return_value = mock_cache
280
+
281
+ csv_calls = []
282
+
283
+ def track_csv_write(path, *args, **kwargs):
284
+ """Track CSV file writes."""
285
+ csv_calls.append(str(path))
286
+
287
+ with patch(
288
+ "mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_batch
289
+ ):
290
+ with patch("pandas.DataFrame.to_csv", side_effect=track_csv_write):
291
+ with patch("PIL.Image.Image.save"):
292
+ from mosaic.gradio_app import main
293
+
294
+ main()
295
+
296
+ # Should have combined files
297
+ combined_files = [c for c in csv_calls if "combined" in c]
298
+ assert len(combined_files) >= 2 # combined_aeon and combined_paladin
tests/test_fixtures.py ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared fixtures and utilities for UI and CLI tests.
2
+
3
+ This module provides reusable fixtures for testing the Mosaic Gradio UI and CLI,
4
+ including mock file objects, settings DataFrames, cancer subtype mappings, and
5
+ utility functions for test setup/teardown.
6
+ """
7
+
8
+ import tempfile
9
+ from pathlib import Path
10
+ from unittest.mock import Mock
11
+ import pandas as pd
12
+ import numpy as np
13
+ import pytest
14
+ from PIL import Image
15
+
16
+
17
+ # ============================================================================
18
+ # File and Path Fixtures
19
+ # ============================================================================
20
+
21
+
22
+ @pytest.fixture
23
+ def test_slide_path():
24
+ """Path to actual test slide for integration tests."""
25
+ return Path("tests/testdata/948176.svs")
26
+
27
+
28
+ @pytest.fixture
29
+ def temp_output_dir():
30
+ """Temporary directory for test outputs."""
31
+ with tempfile.TemporaryDirectory(prefix="mosaic_test_") as tmpdir:
32
+ yield Path(tmpdir)
33
+
34
+
35
+ @pytest.fixture
36
+ def mock_user_dir(temp_output_dir):
37
+ """Mock user directory (same as temp_output_dir for simplicity)."""
38
+ return temp_output_dir
39
+
40
+
41
+ # ============================================================================
42
+ # Mock File Upload Fixtures
43
+ # ============================================================================
44
+
45
+
46
+ @pytest.fixture
47
+ def sample_files_single():
48
+ """Mock single file upload."""
49
+ mock_file = Mock()
50
+ mock_file.name = "test_slide_1.svs"
51
+ return [mock_file]
52
+
53
+
54
+ @pytest.fixture
55
+ def sample_files_multiple():
56
+ """Mock multiple file uploads (3 files)."""
57
+ files = []
58
+ for i in range(1, 4):
59
+ mock_file = Mock()
60
+ mock_file.name = f"test_slide_{i}.svs"
61
+ files.append(mock_file)
62
+ return files
63
+
64
+
65
+ def create_mock_file(filename):
66
+ """Create a mock file object with specified filename.
67
+
68
+ Args:
69
+ filename: Name for the mock file
70
+
71
+ Returns:
72
+ Mock object with .name attribute
73
+ """
74
+ mock_file = Mock()
75
+ mock_file.name = filename
76
+ return mock_file
77
+
78
+
79
+ # ============================================================================
80
+ # Settings DataFrame Fixtures
81
+ # ============================================================================
82
+
83
+
84
+ @pytest.fixture
85
+ def sample_settings_df():
86
+ """Sample settings DataFrame with 3 slides."""
87
+ return pd.DataFrame(
88
+ {
89
+ "Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
90
+ "Site Type": ["Primary", "Metastatic", "Primary"],
91
+ "Sex": ["Unknown", "Female", "Male"],
92
+ "Tissue Site": ["Lung", "Liver", "Unknown"],
93
+ "Cancer Subtype": ["Unknown", "Lung Adenocarcinoma (LUAD)", "Unknown"],
94
+ "IHC Subtype": ["", "", ""],
95
+ "Segmentation Config": ["Biopsy", "Resection", "TCGA"],
96
+ }
97
+ )
98
+
99
+
100
+ def create_settings_df(n_rows, **kwargs):
101
+ """Generate a test settings DataFrame with specified number of rows.
102
+
103
+ Args:
104
+ n_rows: Number of rows to generate
105
+ **kwargs: Column overrides (e.g., site_type="Metastatic")
106
+
107
+ Returns:
108
+ DataFrame with SETTINGS_COLUMNS
109
+ """
110
+ defaults = {
111
+ "Slide": [f"slide_{i}.svs" for i in range(1, n_rows + 1)],
112
+ "Site Type": ["Primary"] * n_rows,
113
+ "Sex": ["Unknown"] * n_rows,
114
+ "Tissue Site": ["Unknown"] * n_rows,
115
+ "Cancer Subtype": ["Unknown"] * n_rows,
116
+ "IHC Subtype": [""] * n_rows,
117
+ "Segmentation Config": ["Biopsy"] * n_rows,
118
+ }
119
+
120
+ # Override with any provided kwargs
121
+ for key, value in kwargs.items():
122
+ column_name = key.replace("_", " ").title()
123
+ if column_name in defaults:
124
+ if isinstance(value, list):
125
+ defaults[column_name] = value
126
+ else:
127
+ defaults[column_name] = [value] * n_rows
128
+
129
+ return pd.DataFrame(defaults)
130
+
131
+
132
+ # ============================================================================
133
+ # CSV File Fixtures
134
+ # ============================================================================
135
+
136
+
137
+ @pytest.fixture
138
+ def sample_csv_valid():
139
+ """Temporary CSV file with valid settings."""
140
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
141
+ f.write(
142
+ "Slide,Site Type,Sex,Tissue Site,Cancer Subtype,IHC Subtype,Segmentation Config\n"
143
+ )
144
+ f.write("slide1.svs,Primary,Unknown,Lung,Unknown,,Biopsy\n")
145
+ f.write(
146
+ "slide2.svs,Metastatic,Female,Liver,Lung Adenocarcinoma (LUAD),,Resection\n"
147
+ )
148
+ f.write("slide3.svs,Primary,Male,Unknown,Unknown,,TCGA\n")
149
+ f.flush()
150
+ yield f.name
151
+ Path(f.name).unlink(missing_ok=True)
152
+
153
+
154
+ @pytest.fixture
155
+ def sample_csv_invalid():
156
+ """Temporary CSV file with invalid values (for validation testing)."""
157
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
158
+ f.write(
159
+ "Slide,Site Type,Sex,Tissue Site,Cancer Subtype,IHC Subtype,Segmentation Config\n"
160
+ )
161
+ f.write(
162
+ "slide1.svs,InvalidSite,InvalidSex,InvalidTissue,InvalidSubtype,InvalidIHC,InvalidConfig\n"
163
+ )
164
+ f.write(
165
+ "slide2.svs,Primary,Unknown,Lung,BRCA,HR+/HER2+,Biopsy\n"
166
+ ) # Valid breast cancer
167
+ f.flush()
168
+ yield f.name
169
+ Path(f.name).unlink(missing_ok=True)
170
+
171
+
172
+ @pytest.fixture
173
+ def sample_csv_minimal():
174
+ """Temporary CSV file with only required columns (missing optional columns)."""
175
+ with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
176
+ f.write("Slide,Site Type,Cancer Subtype\n")
177
+ f.write("slide1.svs,Primary,Unknown\n")
178
+ f.write("slide2.svs,Metastatic,LUAD\n")
179
+ f.flush()
180
+ yield f.name
181
+ Path(f.name).unlink(missing_ok=True)
182
+
183
+
184
+ # ============================================================================
185
+ # Cancer Subtype Mapping Fixtures
186
+ # ============================================================================
187
+
188
+
189
+ @pytest.fixture
190
+ def mock_cancer_subtype_maps():
191
+ """Mock cancer subtype mappings for testing."""
192
+ cancer_subtype_name_map = {
193
+ "Unknown": "UNK",
194
+ "Lung Adenocarcinoma (LUAD)": "LUAD",
195
+ "Breast Invasive Carcinoma (BRCA)": "BRCA",
196
+ "Colorectal Adenocarcinoma (COAD)": "COAD",
197
+ "Prostate Adenocarcinoma (PRAD)": "PRAD",
198
+ }
199
+
200
+ reversed_cancer_subtype_name_map = {
201
+ "UNK": "Unknown",
202
+ "LUAD": "Lung Adenocarcinoma (LUAD)",
203
+ "BRCA": "Breast Invasive Carcinoma (BRCA)",
204
+ "COAD": "Colorectal Adenocarcinoma (COAD)",
205
+ "PRAD": "Prostate Adenocarcinoma (PRAD)",
206
+ }
207
+
208
+ cancer_subtypes = ["LUAD", "BRCA", "COAD", "PRAD"]
209
+
210
+ return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
211
+
212
+
213
+ # ============================================================================
214
+ # Mock Analysis Results Fixtures
215
+ # ============================================================================
216
+
217
+
218
+ @pytest.fixture
219
+ def mock_analyze_slide_results():
220
+ """Mock results from analyze_slide function."""
221
+ # Create a simple test mask image
222
+ mask = Image.new("RGB", (100, 100), color="red")
223
+
224
+ # Create Aeon results DataFrame
225
+ aeon_results = pd.DataFrame(
226
+ {
227
+ "Cancer Subtype": ["LUAD"],
228
+ "Confidence": [0.95],
229
+ }
230
+ )
231
+
232
+ # Create Paladin results DataFrame (NOTE: No "Slide" column - that gets added by CLI/UI)
233
+ paladin_results = pd.DataFrame(
234
+ {
235
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
236
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
237
+ "Score": [0.85, 0.72, 0.63],
238
+ }
239
+ )
240
+
241
+ return (mask, aeon_results, paladin_results)
242
+
243
+
244
+ @pytest.fixture
245
+ def mock_model_cache():
246
+ """Mock ModelCache with test models."""
247
+ from unittest.mock import Mock
248
+
249
+ cache = Mock()
250
+ cache.ctranspath_model = Mock()
251
+ cache.optimus_model = Mock()
252
+ cache.marker_classifier = Mock()
253
+ cache.aeon_model = Mock()
254
+ cache.paladin_models = {}
255
+ cache.device = Mock()
256
+ cache.cleanup = Mock()
257
+
258
+ return cache
259
+
260
+
261
+ # ============================================================================
262
+ # CLI Argument Fixtures
263
+ # ============================================================================
264
+
265
+
266
+ @pytest.fixture
267
+ def cli_args_single():
268
+ """Mock argparse Namespace for single-slide mode."""
269
+ from argparse import Namespace
270
+
271
+ return Namespace(
272
+ debug=False,
273
+ server_name="0.0.0.0",
274
+ server_port=None,
275
+ share=False,
276
+ slide_path="tests/testdata/948176.svs",
277
+ slide_csv=None,
278
+ output_dir="test_output",
279
+ site_type="Primary",
280
+ sex="Unknown",
281
+ tissue_site="Unknown",
282
+ cancer_subtype="Unknown",
283
+ ihc_subtype="",
284
+ segmentation_config="Biopsy",
285
+ num_workers=4,
286
+ )
287
+
288
+
289
+ @pytest.fixture
290
+ def cli_args_batch(sample_csv_valid):
291
+ """Mock argparse Namespace for batch mode."""
292
+ from argparse import Namespace
293
+
294
+ return Namespace(
295
+ debug=False,
296
+ server_name="0.0.0.0",
297
+ server_port=None,
298
+ share=False,
299
+ slide_path=None,
300
+ slide_csv=sample_csv_valid,
301
+ output_dir="test_output",
302
+ site_type="Primary",
303
+ sex="Unknown",
304
+ tissue_site="Unknown",
305
+ cancer_subtype="Unknown",
306
+ ihc_subtype="",
307
+ segmentation_config="Biopsy",
308
+ num_workers=4,
309
+ )
310
+
311
+
312
+ # ============================================================================
313
+ # Utility Functions
314
+ # ============================================================================
315
+
316
+
317
+ def verify_csv_output(path, expected_columns):
318
+ """Validate CSV file structure.
319
+
320
+ Args:
321
+ path: Path to CSV file
322
+ expected_columns: List of expected column names
323
+
324
+ Returns:
325
+ DataFrame loaded from CSV
326
+
327
+ Raises:
328
+ AssertionError: If CSV is invalid or missing columns
329
+ """
330
+ assert Path(path).exists(), f"CSV file not found: {path}"
331
+
332
+ df = pd.read_csv(path)
333
+ assert not df.empty, f"CSV file is empty: {path}"
334
+
335
+ missing_cols = set(expected_columns) - set(df.columns)
336
+ assert not missing_cols, f"Missing columns in CSV: {missing_cols}"
337
+
338
+ return df
339
+
340
+
341
+ def mock_gradio_components():
342
+ """Context manager to mock Gradio component classes.
343
+
344
+ Usage:
345
+ with mock_gradio_components() as mocks:
346
+ # Gradio components are mocked
347
+ result = function_that_returns_gr_components()
348
+ # Verify mocks
349
+ assert mocks['Dataframe'].called
350
+ """
351
+ from unittest.mock import patch, Mock
352
+
353
+ mocks = {
354
+ "Dataframe": Mock(return_value=Mock()),
355
+ "File": Mock(return_value=Mock()),
356
+ "DownloadButton": Mock(return_value=Mock()),
357
+ "Dropdown": Mock(return_value=Mock()),
358
+ "Gallery": Mock(return_value=Mock()),
359
+ "Error": Exception, # gr.Error is an exception
360
+ "Warning": Mock(),
361
+ }
362
+
363
+ patches = []
364
+ for name, mock_obj in mocks.items():
365
+ patch_obj = patch(f"mosaic.ui.app.gr.{name}", mock_obj)
366
+ patches.append(patch_obj)
367
+
368
+ # Start all patches
369
+ for p in patches:
370
+ p.start()
371
+
372
+ try:
373
+ yield mocks
374
+ finally:
375
+ # Stop all patches
376
+ for p in patches:
377
+ p.stop()
tests/test_gradio_app.py CHANGED
@@ -71,14 +71,16 @@ class TestLoadSettings:
71
  reversed_cancer_subtype_name_map = {
72
  value: key for key, value in cancer_subtype_name_map.items()
73
  }
74
- return cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map
 
 
 
 
75
 
76
  @pytest.fixture
77
  def temp_settings_csv(self):
78
  """Create a temporary settings CSV file with all columns."""
79
- with tempfile.NamedTemporaryFile(
80
- mode="w", delete=False, suffix=".csv"
81
- ) as f:
82
  f.write("Slide,Site Type,Cancer Subtype,IHC Subtype,Segmentation Config\n")
83
  f.write("slide1.svs,Primary,Unknown,,Biopsy\n")
84
  f.write("slide2.svs,Metastatic,Unknown,,Resection\n")
@@ -89,9 +91,7 @@ class TestLoadSettings:
89
  @pytest.fixture
90
  def temp_minimal_settings_csv(self):
91
  """Create a temporary settings CSV file with minimal columns."""
92
- with tempfile.NamedTemporaryFile(
93
- mode="w", delete=False, suffix=".csv"
94
- ) as f:
95
  f.write("Slide,Site Type\n")
96
  f.write("slide1.svs,Primary\n")
97
  f.write("slide2.svs,Metastatic\n")
@@ -129,9 +129,7 @@ class TestLoadSettings:
129
 
130
  def test_load_settings_missing_required_column_raises_error(self):
131
  """Test that missing required column raises ValueError."""
132
- with tempfile.NamedTemporaryFile(
133
- mode="w", delete=False, suffix=".csv"
134
- ) as f:
135
  f.write("RandomColumn\n")
136
  f.write("value\n")
137
  temp_path = f.name
 
71
  reversed_cancer_subtype_name_map = {
72
  value: key for key, value in cancer_subtype_name_map.items()
73
  }
74
+ return (
75
+ cancer_subtype_name_map,
76
+ cancer_subtypes,
77
+ reversed_cancer_subtype_name_map,
78
+ )
79
 
80
  @pytest.fixture
81
  def temp_settings_csv(self):
82
  """Create a temporary settings CSV file with all columns."""
83
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
 
 
84
  f.write("Slide,Site Type,Cancer Subtype,IHC Subtype,Segmentation Config\n")
85
  f.write("slide1.svs,Primary,Unknown,,Biopsy\n")
86
  f.write("slide2.svs,Metastatic,Unknown,,Resection\n")
 
91
  @pytest.fixture
92
  def temp_minimal_settings_csv(self):
93
  """Create a temporary settings CSV file with minimal columns."""
94
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
 
 
95
  f.write("Slide,Site Type\n")
96
  f.write("slide1.svs,Primary\n")
97
  f.write("slide2.svs,Metastatic\n")
 
129
 
130
  def test_load_settings_missing_required_column_raises_error(self):
131
  """Test that missing required column raises ValueError."""
132
+ with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
 
 
133
  f.write("RandomColumn\n")
134
  f.write("value\n")
135
  temp_path = f.name
tests/test_model_manager.py CHANGED
@@ -10,7 +10,11 @@ from unittest.mock import Mock, patch, MagicMock
10
  import pickle
11
  import gc
12
 
13
- from mosaic.model_manager import ModelCache, load_all_models, load_paladin_model_for_inference
 
 
 
 
14
 
15
 
16
  class TestModelCache:
@@ -73,9 +77,11 @@ class TestModelCache:
73
 
74
  assert cache.paladin_models == {}
75
 
76
- @patch('torch.cuda.is_available', return_value=True)
77
- @patch('torch.cuda.empty_cache')
78
- def test_cleanup_paladin_clears_cuda_cache(self, mock_empty_cache, mock_cuda_available):
 
 
79
  """Test cleanup_paladin calls torch.cuda.empty_cache()."""
80
  cache = ModelCache()
81
  cache.paladin_models = {"model1": Mock()}
@@ -107,52 +113,52 @@ class TestModelCache:
107
  class TestLoadAllModels:
108
  """Test load_all_models function."""
109
 
110
- @patch('torch.cuda.is_available', return_value=False)
111
  def test_load_models_cpu_only(self, mock_cuda_available):
112
  """Test loading models when CUDA is not available."""
113
- with patch('builtins.open', create=True) as mock_open:
114
- with patch('pickle.load') as mock_pickle:
115
  # Mock the pickle loads
116
  mock_pickle.return_value = Mock()
117
 
118
  # Mock file exists checks
119
- with patch.object(Path, 'exists', return_value=True):
120
  cache = load_all_models(use_gpu=False)
121
 
122
  assert cache is not None
123
  assert cache.device == torch.device("cpu")
124
  assert cache.aggressive_memory_mgmt is False
125
 
126
- @patch('torch.cuda.is_available', return_value=True)
127
- @patch('torch.cuda.get_device_name', return_value="NVIDIA A100")
128
  def test_load_models_a100_gpu(self, mock_get_device, mock_cuda_available):
129
  """Test loading models on A100 GPU (high memory)."""
130
- with patch('builtins.open', create=True):
131
- with patch('pickle.load') as mock_pickle:
132
  mock_model = Mock()
133
  mock_model.to = Mock(return_value=mock_model)
134
  mock_model.eval = Mock()
135
  mock_pickle.return_value = mock_model
136
 
137
- with patch.object(Path, 'exists', return_value=True):
138
  cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
139
 
140
  assert cache.device == torch.device("cuda")
141
  assert cache.is_t4_gpu is False
142
  assert cache.aggressive_memory_mgmt is False # A100 should use caching
143
 
144
- @patch('torch.cuda.is_available', return_value=True)
145
- @patch('torch.cuda.get_device_name', return_value="Tesla T4")
146
  def test_load_models_t4_gpu(self, mock_get_device, mock_cuda_available):
147
  """Test loading models on T4 GPU (low memory)."""
148
- with patch('builtins.open', create=True):
149
- with patch('pickle.load') as mock_pickle:
150
  mock_model = Mock()
151
  mock_model.to = Mock(return_value=mock_model)
152
  mock_model.eval = Mock()
153
  mock_pickle.return_value = mock_model
154
 
155
- with patch.object(Path, 'exists', return_value=True):
156
  cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
157
 
158
  assert cache.device == torch.device("cuda")
@@ -161,33 +167,36 @@ class TestLoadAllModels:
161
 
162
  def test_load_models_missing_aeon_file(self):
163
  """Test load_all_models raises error when Aeon model file is missing."""
 
164
  def exists_side_effect(self):
165
  # Return True for marker_classifier and optimus, False for aeon
166
  filename = str(self)
167
- if 'aeon_model.pkl' in filename:
168
  return False
169
  return True
170
 
171
- with patch.object(Path, 'exists', exists_side_effect):
172
  with pytest.raises(FileNotFoundError, match="Aeon model not found"):
173
- with patch('builtins.open', create=True):
174
- with patch('pickle.load'):
175
  load_all_models(use_gpu=False)
176
 
177
- @patch('torch.cuda.is_available', return_value=True)
178
  def test_load_models_explicit_aggressive_mode(self, mock_cuda_available):
179
  """Test explicit aggressive memory management setting."""
180
- with patch('torch.cuda.get_device_name', return_value="NVIDIA A100"):
181
- with patch('builtins.open', create=True):
182
- with patch('pickle.load') as mock_pickle:
183
  mock_model = Mock()
184
  mock_model.to = Mock(return_value=mock_model)
185
  mock_model.eval = Mock()
186
  mock_pickle.return_value = mock_model
187
 
188
- with patch.object(Path, 'exists', return_value=True):
189
  # Force aggressive mode even on A100
190
- cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=True)
 
 
191
 
192
  assert cache.aggressive_memory_mgmt is True # Should respect explicit setting
193
 
@@ -200,8 +209,8 @@ class TestLoadPaladinModelForInference:
200
  cache = ModelCache(aggressive_memory_mgmt=True, device=torch.device("cpu"))
201
  model_path = Path("data/paladin/test_model.pkl")
202
 
203
- with patch('builtins.open', create=True):
204
- with patch('pickle.load') as mock_pickle:
205
  mock_model = Mock()
206
  mock_model.to = Mock(return_value=mock_model)
207
  mock_model.eval = Mock()
@@ -220,8 +229,8 @@ class TestLoadPaladinModelForInference:
220
  cache = ModelCache(aggressive_memory_mgmt=False, device=torch.device("cpu"))
221
  model_path = Path("data/paladin/test_model.pkl")
222
 
223
- with patch('builtins.open', create=True):
224
- with patch('pickle.load') as mock_pickle:
225
  mock_model = Mock()
226
  mock_model.to = Mock(return_value=mock_model)
227
  mock_model.eval = Mock()
@@ -243,7 +252,7 @@ class TestLoadPaladinModelForInference:
243
  cache.paladin_models[str(model_path)] = cached_model
244
 
245
  # Load model - should return cached version without pickle.load
246
- with patch('pickle.load') as mock_pickle:
247
  model = load_paladin_model_for_inference(cache, model_path)
248
 
249
  assert model == cached_model
 
10
  import pickle
11
  import gc
12
 
13
+ from mosaic.model_manager import (
14
+ ModelCache,
15
+ load_all_models,
16
+ load_paladin_model_for_inference,
17
+ )
18
 
19
 
20
  class TestModelCache:
 
77
 
78
  assert cache.paladin_models == {}
79
 
80
+ @patch("torch.cuda.is_available", return_value=True)
81
+ @patch("torch.cuda.empty_cache")
82
+ def test_cleanup_paladin_clears_cuda_cache(
83
+ self, mock_empty_cache, mock_cuda_available
84
+ ):
85
  """Test cleanup_paladin calls torch.cuda.empty_cache()."""
86
  cache = ModelCache()
87
  cache.paladin_models = {"model1": Mock()}
 
113
  class TestLoadAllModels:
114
  """Test load_all_models function."""
115
 
116
+ @patch("torch.cuda.is_available", return_value=False)
117
  def test_load_models_cpu_only(self, mock_cuda_available):
118
  """Test loading models when CUDA is not available."""
119
+ with patch("builtins.open", create=True) as mock_open:
120
+ with patch("pickle.load") as mock_pickle:
121
  # Mock the pickle loads
122
  mock_pickle.return_value = Mock()
123
 
124
  # Mock file exists checks
125
+ with patch.object(Path, "exists", return_value=True):
126
  cache = load_all_models(use_gpu=False)
127
 
128
  assert cache is not None
129
  assert cache.device == torch.device("cpu")
130
  assert cache.aggressive_memory_mgmt is False
131
 
132
+ @patch("torch.cuda.is_available", return_value=True)
133
+ @patch("torch.cuda.get_device_name", return_value="NVIDIA A100")
134
  def test_load_models_a100_gpu(self, mock_get_device, mock_cuda_available):
135
  """Test loading models on A100 GPU (high memory)."""
136
+ with patch("builtins.open", create=True):
137
+ with patch("pickle.load") as mock_pickle:
138
  mock_model = Mock()
139
  mock_model.to = Mock(return_value=mock_model)
140
  mock_model.eval = Mock()
141
  mock_pickle.return_value = mock_model
142
 
143
+ with patch.object(Path, "exists", return_value=True):
144
  cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
145
 
146
  assert cache.device == torch.device("cuda")
147
  assert cache.is_t4_gpu is False
148
  assert cache.aggressive_memory_mgmt is False # A100 should use caching
149
 
150
+ @patch("torch.cuda.is_available", return_value=True)
151
+ @patch("torch.cuda.get_device_name", return_value="Tesla T4")
152
  def test_load_models_t4_gpu(self, mock_get_device, mock_cuda_available):
153
  """Test loading models on T4 GPU (low memory)."""
154
+ with patch("builtins.open", create=True):
155
+ with patch("pickle.load") as mock_pickle:
156
  mock_model = Mock()
157
  mock_model.to = Mock(return_value=mock_model)
158
  mock_model.eval = Mock()
159
  mock_pickle.return_value = mock_model
160
 
161
+ with patch.object(Path, "exists", return_value=True):
162
  cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
163
 
164
  assert cache.device == torch.device("cuda")
 
167
 
168
  def test_load_models_missing_aeon_file(self):
169
  """Test load_all_models raises error when Aeon model file is missing."""
170
+
171
  def exists_side_effect(self):
172
  # Return True for marker_classifier and optimus, False for aeon
173
  filename = str(self)
174
+ if "aeon_model.pkl" in filename:
175
  return False
176
  return True
177
 
178
+ with patch.object(Path, "exists", exists_side_effect):
179
  with pytest.raises(FileNotFoundError, match="Aeon model not found"):
180
+ with patch("builtins.open", create=True):
181
+ with patch("pickle.load"):
182
  load_all_models(use_gpu=False)
183
 
184
+ @patch("torch.cuda.is_available", return_value=True)
185
  def test_load_models_explicit_aggressive_mode(self, mock_cuda_available):
186
  """Test explicit aggressive memory management setting."""
187
+ with patch("torch.cuda.get_device_name", return_value="NVIDIA A100"):
188
+ with patch("builtins.open", create=True):
189
+ with patch("pickle.load") as mock_pickle:
190
  mock_model = Mock()
191
  mock_model.to = Mock(return_value=mock_model)
192
  mock_model.eval = Mock()
193
  mock_pickle.return_value = mock_model
194
 
195
+ with patch.object(Path, "exists", return_value=True):
196
  # Force aggressive mode even on A100
197
+ cache = load_all_models(
198
+ use_gpu=True, aggressive_memory_mgmt=True
199
+ )
200
 
201
  assert cache.aggressive_memory_mgmt is True # Should respect explicit setting
202
 
 
209
  cache = ModelCache(aggressive_memory_mgmt=True, device=torch.device("cpu"))
210
  model_path = Path("data/paladin/test_model.pkl")
211
 
212
+ with patch("builtins.open", create=True):
213
+ with patch("pickle.load") as mock_pickle:
214
  mock_model = Mock()
215
  mock_model.to = Mock(return_value=mock_model)
216
  mock_model.eval = Mock()
 
229
  cache = ModelCache(aggressive_memory_mgmt=False, device=torch.device("cpu"))
230
  model_path = Path("data/paladin/test_model.pkl")
231
 
232
+ with patch("builtins.open", create=True):
233
+ with patch("pickle.load") as mock_pickle:
234
  mock_model = Mock()
235
  mock_model.to = Mock(return_value=mock_model)
236
  mock_model.eval = Mock()
 
252
  cache.paladin_models[str(model_path)] = cached_model
253
 
254
  # Load model - should return cached version without pickle.load
255
+ with patch("pickle.load") as mock_pickle:
256
  model = load_paladin_model_for_inference(cache, model_path)
257
 
258
  assert model == cached_model
tests/test_regression_single_slide.py CHANGED
@@ -30,13 +30,14 @@ class TestSingleSlideRegression:
30
  "Lung Adenocarcinoma": "LUAD",
31
  }
32
 
33
- @patch('mosaic.analysis.segment_tissue')
34
- @patch('mosaic.analysis.draw_slide_mask')
35
- @patch('mosaic.analysis._extract_ctranspath_features')
36
- @patch('mosaic.analysis.filter_features')
37
- @patch('mosaic.analysis._extract_optimus_features')
38
- @patch('mosaic.analysis._run_aeon_inference')
39
- @patch('mosaic.analysis._run_paladin_inference')
 
40
  def test_single_slide_analyze_slide_unchanged(
41
  self,
42
  mock_paladin,
@@ -44,6 +45,7 @@ class TestSingleSlideRegression:
44
  mock_optimus,
45
  mock_filter,
46
  mock_ctranspath,
 
47
  mock_mask,
48
  mock_segment,
49
  mock_slide_path,
@@ -60,6 +62,16 @@ class TestSingleSlideRegression:
60
  mock_mask_image = Mock()
61
  mock_mask.return_value = mock_mask_image
62
 
 
 
 
 
 
 
 
 
 
 
63
  mock_features = np.random.rand(100, 768)
64
  mock_ctranspath.return_value = (mock_features, mock_coords)
65
 
@@ -69,17 +81,14 @@ class TestSingleSlideRegression:
69
  mock_optimus_features = np.random.rand(50, 1536)
70
  mock_optimus.return_value = mock_optimus_features
71
 
72
- mock_aeon_results = pd.DataFrame({
73
- "Cancer Subtype": ["LUAD", "LUSC"],
74
- "Confidence": [0.85, 0.15]
75
- })
76
  mock_aeon.return_value = mock_aeon_results
77
 
78
- mock_paladin_results = pd.DataFrame({
79
- "Cancer Subtype": ["LUAD"],
80
- "Biomarker": ["EGFR"],
81
- "Score": [0.75]
82
- })
83
  mock_paladin.return_value = mock_paladin_results
84
 
85
  # Run analyze_slide
@@ -107,10 +116,10 @@ class TestSingleSlideRegression:
107
  assert isinstance(aeon_results, pd.DataFrame)
108
  assert isinstance(paladin_results, pd.DataFrame)
109
 
110
- @patch('mosaic.ui.app.analyze_slide')
111
- @patch('mosaic.ui.app.create_user_directory')
112
- @patch('mosaic.ui.app.validate_settings')
113
- @patch('pandas.DataFrame.to_csv') # Mock CSV writing to avoid directory issues
114
  def test_gradio_single_slide_uses_analyze_slide(
115
  self,
116
  mock_to_csv,
@@ -121,40 +130,53 @@ class TestSingleSlideRegression:
121
  """Test that Gradio UI uses analyze_slide for single slide (not batch mode)."""
122
  # Setup
123
  import tempfile
 
124
  with tempfile.TemporaryDirectory() as tmpdir:
125
  mock_dir = Path(tmpdir) / "test_user"
126
  mock_dir.mkdir()
127
  mock_create_dir.return_value = mock_dir
128
 
129
- settings_df = pd.DataFrame({
130
- "Slide": ["test.svs"],
131
- "Site Type": ["Primary"],
132
- "Sex": ["Male"],
133
- "Tissue Site": ["Lung"],
134
- "Cancer Subtype": ["Unknown"],
135
- "IHC Subtype": [""],
136
- "Segmentation Config": ["Biopsy"],
137
- })
 
 
138
  mock_validate.return_value = settings_df
139
 
140
  mock_mask = Mock()
141
  mock_aeon = pd.DataFrame({"Cancer Subtype": ["LUAD"], "Confidence": [0.9]})
142
- mock_paladin = pd.DataFrame({
143
- "Cancer Subtype": ["LUAD"],
144
- "Biomarker": ["EGFR"],
145
- "Score": [0.8]
146
- })
147
  mock_analyze_slide.return_value = (mock_mask, mock_aeon, mock_paladin)
148
 
149
  from mosaic.ui.app import cancer_subtype_name_map
150
 
151
- # Call analyze_slides with a single slide
152
- with patch('mosaic.ui.app.get_oncotree_code_name', return_value="Lung Adenocarcinoma"):
153
- masks, aeon, aeon_btn, paladin, paladin_btn, user_dir = analyze_slides(
 
 
 
154
  slides=["test.svs"],
155
  settings_input=settings_df,
 
 
 
 
 
 
156
  user_dir=mock_dir,
157
  )
 
 
 
158
 
159
  # Verify analyze_slide was called (not analyze_slides_batch)
160
  mock_analyze_slide.assert_called_once()
@@ -162,10 +184,11 @@ class TestSingleSlideRegression:
162
  # Verify results
163
  assert len(masks) == 1
164
 
165
-
166
- @patch('mosaic.analysis.segment_tissue')
167
- @patch('mosaic.analysis.gr.Warning')
168
- def test_single_slide_no_tissue_found(self, mock_warning, mock_segment, mock_slide_path, cancer_subtype_name_map):
 
169
  """Test single-slide analysis when no tissue is found."""
170
  # No tissue tiles found
171
  mock_segment.return_value = None # segment_tissue returns None when no tissue
@@ -187,18 +210,20 @@ class TestSingleSlideRegression:
187
  # Verify warning was raised
188
  mock_warning.assert_called_once()
189
 
190
- @patch('mosaic.analysis.segment_tissue')
191
- @patch('mosaic.analysis.draw_slide_mask')
192
- @patch('mosaic.analysis._extract_ctranspath_features')
193
- @patch('mosaic.analysis.filter_features')
194
- @patch('mosaic.analysis._extract_optimus_features')
195
- @patch('mosaic.analysis._run_paladin_inference')
 
196
  def test_single_slide_known_cancer_subtype_skips_aeon(
197
  self,
198
  mock_paladin,
199
  mock_optimus,
200
  mock_filter,
201
  mock_ctranspath,
 
202
  mock_mask,
203
  mock_segment,
204
  mock_slide_path,
@@ -211,16 +236,25 @@ class TestSingleSlideRegression:
211
  mock_attrs = {}
212
  mock_segment.return_value = (mock_polygon, None, mock_coords, mock_attrs)
213
  mock_mask.return_value = Mock()
 
 
 
 
 
 
 
 
 
 
 
214
  mock_ctranspath.return_value = (np.random.rand(10, 768), np.array([[0, 0]]))
215
  mock_filter.return_value = (None, np.array([[0, 0]]))
216
  mock_optimus.return_value = np.random.rand(10, 1536)
217
- mock_paladin.return_value = pd.DataFrame({
218
- "Cancer Subtype": ["LUAD"],
219
- "Biomarker": ["EGFR"],
220
- "Score": [0.8]
221
- })
222
 
223
- with patch('mosaic.analysis._run_aeon_inference') as mock_aeon:
224
  slide_mask, aeon_results, paladin_results = analyze_slide(
225
  slide_path=mock_slide_path,
226
  seg_config="Biopsy",
@@ -244,6 +278,7 @@ class TestBackwardCompatibility:
244
  def test_analyze_slide_signature_unchanged(self):
245
  """Test that analyze_slide function signature is unchanged."""
246
  from inspect import signature
 
247
  sig = signature(analyze_slide)
248
 
249
  # Verify required parameters exist
@@ -261,8 +296,8 @@ class TestBackwardCompatibility:
261
 
262
  def test_analyze_slide_return_type_unchanged(self):
263
  """Test that analyze_slide returns the same tuple structure."""
264
- with patch('mosaic.analysis.segment_tissue', return_value=None): # No tissue
265
- with patch('mosaic.analysis.gr.Warning'): # Mock the warning
266
  result = analyze_slide(
267
  slide_path="test.svs",
268
  seg_config="Biopsy",
 
30
  "Lung Adenocarcinoma": "LUAD",
31
  }
32
 
33
+ @patch("mosaic.analysis.segment_tissue")
34
+ @patch("mosaic.analysis.draw_slide_mask")
35
+ @patch("mosaic.model_manager.load_all_models")
36
+ @patch("mosaic.analysis._extract_ctranspath_features")
37
+ @patch("mosaic.analysis.filter_features")
38
+ @patch("mosaic.analysis._extract_optimus_features")
39
+ @patch("mosaic.analysis._run_aeon_inference_with_model")
40
+ @patch("mosaic.analysis._run_paladin_inference_with_models")
41
  def test_single_slide_analyze_slide_unchanged(
42
  self,
43
  mock_paladin,
 
45
  mock_optimus,
46
  mock_filter,
47
  mock_ctranspath,
48
+ mock_load_models,
49
  mock_mask,
50
  mock_segment,
51
  mock_slide_path,
 
62
  mock_mask_image = Mock()
63
  mock_mask.return_value = mock_mask_image
64
 
65
+ # Mock ModelCache with required attributes
66
+ mock_model_cache = Mock()
67
+ mock_model_cache.ctranspath_model = Mock()
68
+ mock_model_cache.optimus_model = Mock()
69
+ mock_model_cache.marker_classifier = Mock()
70
+ mock_model_cache.aeon_model = Mock()
71
+ mock_model_cache.device = Mock()
72
+ mock_model_cache.cleanup = Mock()
73
+ mock_load_models.return_value = mock_model_cache
74
+
75
  mock_features = np.random.rand(100, 768)
76
  mock_ctranspath.return_value = (mock_features, mock_coords)
77
 
 
81
  mock_optimus_features = np.random.rand(50, 1536)
82
  mock_optimus.return_value = mock_optimus_features
83
 
84
+ mock_aeon_results = pd.DataFrame(
85
+ {"Cancer Subtype": ["LUAD", "LUSC"], "Confidence": [0.85, 0.15]}
86
+ )
 
87
  mock_aeon.return_value = mock_aeon_results
88
 
89
+ mock_paladin_results = pd.DataFrame(
90
+ {"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.75]}
91
+ )
 
 
92
  mock_paladin.return_value = mock_paladin_results
93
 
94
  # Run analyze_slide
 
116
  assert isinstance(aeon_results, pd.DataFrame)
117
  assert isinstance(paladin_results, pd.DataFrame)
118
 
119
+ @patch("mosaic.ui.app.analyze_slide")
120
+ @patch("mosaic.ui.app.create_user_directory")
121
+ @patch("mosaic.ui.app.validate_settings")
122
+ @patch("pandas.DataFrame.to_csv") # Mock CSV writing to avoid directory issues
123
  def test_gradio_single_slide_uses_analyze_slide(
124
  self,
125
  mock_to_csv,
 
130
  """Test that Gradio UI uses analyze_slide for single slide (not batch mode)."""
131
  # Setup
132
  import tempfile
133
+
134
  with tempfile.TemporaryDirectory() as tmpdir:
135
  mock_dir = Path(tmpdir) / "test_user"
136
  mock_dir.mkdir()
137
  mock_create_dir.return_value = mock_dir
138
 
139
+ settings_df = pd.DataFrame(
140
+ {
141
+ "Slide": ["test.svs"],
142
+ "Site Type": ["Primary"],
143
+ "Sex": ["Male"],
144
+ "Tissue Site": ["Lung"],
145
+ "Cancer Subtype": ["Unknown"],
146
+ "IHC Subtype": [""],
147
+ "Segmentation Config": ["Biopsy"],
148
+ }
149
+ )
150
  mock_validate.return_value = settings_df
151
 
152
  mock_mask = Mock()
153
  mock_aeon = pd.DataFrame({"Cancer Subtype": ["LUAD"], "Confidence": [0.9]})
154
+ mock_paladin = pd.DataFrame(
155
+ {"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.8]}
156
+ )
 
 
157
  mock_analyze_slide.return_value = (mock_mask, mock_aeon, mock_paladin)
158
 
159
  from mosaic.ui.app import cancer_subtype_name_map
160
 
161
+ # Call analyze_slides with a single slide (generator function)
162
+ with patch(
163
+ "mosaic.ui.app.get_oncotree_code_name",
164
+ return_value="Lung Adenocarcinoma",
165
+ ):
166
+ gen = analyze_slides(
167
  slides=["test.svs"],
168
  settings_input=settings_df,
169
+ site_type="Primary",
170
+ sex="Male",
171
+ tissue_site="Lung",
172
+ cancer_subtype="Unknown",
173
+ ihc_subtype="",
174
+ seg_config="Biopsy",
175
  user_dir=mock_dir,
176
  )
177
+ # Consume generator to get final result
178
+ results = list(gen)
179
+ masks, aeon, aeon_btn, paladin, paladin_btn, user_dir = results[-1]
180
 
181
  # Verify analyze_slide was called (not analyze_slides_batch)
182
  mock_analyze_slide.assert_called_once()
 
184
  # Verify results
185
  assert len(masks) == 1
186
 
187
+ @patch("mosaic.analysis.segment_tissue")
188
+ @patch("mosaic.analysis.gr.Warning")
189
+ def test_single_slide_no_tissue_found(
190
+ self, mock_warning, mock_segment, mock_slide_path, cancer_subtype_name_map
191
+ ):
192
  """Test single-slide analysis when no tissue is found."""
193
  # No tissue tiles found
194
  mock_segment.return_value = None # segment_tissue returns None when no tissue
 
210
  # Verify warning was raised
211
  mock_warning.assert_called_once()
212
 
213
+ @patch("mosaic.analysis.segment_tissue")
214
+ @patch("mosaic.analysis.draw_slide_mask")
215
+ @patch("mosaic.model_manager.load_all_models")
216
+ @patch("mosaic.analysis._extract_ctranspath_features")
217
+ @patch("mosaic.analysis.filter_features")
218
+ @patch("mosaic.analysis._extract_optimus_features")
219
+ @patch("mosaic.analysis._run_paladin_inference_with_models")
220
  def test_single_slide_known_cancer_subtype_skips_aeon(
221
  self,
222
  mock_paladin,
223
  mock_optimus,
224
  mock_filter,
225
  mock_ctranspath,
226
+ mock_load_models,
227
  mock_mask,
228
  mock_segment,
229
  mock_slide_path,
 
236
  mock_attrs = {}
237
  mock_segment.return_value = (mock_polygon, None, mock_coords, mock_attrs)
238
  mock_mask.return_value = Mock()
239
+
240
+ # Mock ModelCache
241
+ mock_model_cache = Mock()
242
+ mock_model_cache.ctranspath_model = Mock()
243
+ mock_model_cache.optimus_model = Mock()
244
+ mock_model_cache.marker_classifier = Mock()
245
+ mock_model_cache.aeon_model = Mock()
246
+ mock_model_cache.device = Mock()
247
+ mock_model_cache.cleanup = Mock()
248
+ mock_load_models.return_value = mock_model_cache
249
+
250
  mock_ctranspath.return_value = (np.random.rand(10, 768), np.array([[0, 0]]))
251
  mock_filter.return_value = (None, np.array([[0, 0]]))
252
  mock_optimus.return_value = np.random.rand(10, 1536)
253
+ mock_paladin.return_value = pd.DataFrame(
254
+ {"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.8]}
255
+ )
 
 
256
 
257
+ with patch("mosaic.analysis._run_aeon_inference_with_model") as mock_aeon:
258
  slide_mask, aeon_results, paladin_results = analyze_slide(
259
  slide_path=mock_slide_path,
260
  seg_config="Biopsy",
 
278
  def test_analyze_slide_signature_unchanged(self):
279
  """Test that analyze_slide function signature is unchanged."""
280
  from inspect import signature
281
+
282
  sig = signature(analyze_slide)
283
 
284
  # Verify required parameters exist
 
296
 
297
  def test_analyze_slide_return_type_unchanged(self):
298
  """Test that analyze_slide returns the same tuple structure."""
299
+ with patch("mosaic.analysis.segment_tissue", return_value=None): # No tissue
300
+ with patch("mosaic.analysis.gr.Warning"): # Mock the warning
301
  result = analyze_slide(
302
  slide_path="test.svs",
303
  seg_config="Biopsy",
tests/test_ui_components.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for Gradio UI components and their interactions.
2
+
3
+ This module tests the Mosaic Gradio UI components, including:
4
+ - Settings validation
5
+ - Analysis workflow
6
+ """
7
+
8
+ import pytest
9
+ import pandas as pd
10
+ from unittest.mock import Mock, patch, MagicMock
11
+ from pathlib import Path
12
+
13
+ # Import after mocking (mocks are set up in conftest.py)
14
+ from mosaic.ui.app import (
15
+ analyze_slides,
16
+ set_cancer_subtype_maps,
17
+ )
18
+ from mosaic.ui.utils import SETTINGS_COLUMNS
19
+
20
+
21
+ class TestSettingsValidation:
22
+ """Test settings validation logic."""
23
+
24
+ @patch("mosaic.ui.utils.gr.Warning")
25
+ def test_invalid_cancer_subtype_defaults_to_unknown(
26
+ self, mock_warning, mock_cancer_subtype_maps
27
+ ):
28
+ """Test invalid cancer subtype generates warning and defaults to Unknown."""
29
+ from mosaic.ui.utils import validate_settings
30
+
31
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
32
+ mock_cancer_subtype_maps
33
+ )
34
+
35
+ # Create DataFrame with invalid cancer subtype
36
+ df = pd.DataFrame(
37
+ {
38
+ "Slide": ["test.svs"],
39
+ "Site Type": ["Primary"],
40
+ "Sex": ["Unknown"],
41
+ "Tissue Site": ["Unknown"],
42
+ "Cancer Subtype": ["InvalidSubtype"],
43
+ "IHC Subtype": [""],
44
+ "Segmentation Config": ["Biopsy"],
45
+ }
46
+ )
47
+
48
+ result = validate_settings(
49
+ df, cancer_subtype_name_map, cancer_subtypes, reversed_map
50
+ )
51
+
52
+ # Should default to Unknown
53
+ assert result.iloc[0]["Cancer Subtype"] == "Unknown"
54
+ # Warning should be called
55
+ assert mock_warning.called
56
+
57
+ @patch("mosaic.ui.utils.gr.Warning")
58
+ def test_invalid_site_type_defaults_to_primary(
59
+ self, mock_warning, mock_cancer_subtype_maps
60
+ ):
61
+ """Test invalid site type generates warning and defaults to Primary."""
62
+ from mosaic.ui.utils import validate_settings
63
+
64
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
65
+ mock_cancer_subtype_maps
66
+ )
67
+
68
+ df = pd.DataFrame(
69
+ {
70
+ "Slide": ["test.svs"],
71
+ "Site Type": ["InvalidSite"],
72
+ "Sex": ["Unknown"],
73
+ "Tissue Site": ["Unknown"],
74
+ "Cancer Subtype": ["Unknown"],
75
+ "IHC Subtype": [""],
76
+ "Segmentation Config": ["Biopsy"],
77
+ }
78
+ )
79
+
80
+ result = validate_settings(
81
+ df, cancer_subtype_name_map, cancer_subtypes, reversed_map
82
+ )
83
+
84
+ assert result.iloc[0]["Site Type"] == "Primary"
85
+ assert mock_warning.called
86
+
87
+
88
+ class TestAnalysisWorkflow:
89
+ """Test analysis workflow with mocked analyze_slide."""
90
+
91
+ @patch("mosaic.ui.app.analyze_slide")
92
+ @patch("mosaic.ui.app.create_user_directory")
93
+ def test_single_slide_analysis_no_model_cache(
94
+ self,
95
+ mock_create_dir,
96
+ mock_analyze,
97
+ sample_files_single,
98
+ mock_analyze_slide_results,
99
+ mock_cancer_subtype_maps,
100
+ temp_output_dir,
101
+ ):
102
+ """Test single slide analysis doesn't load model cache."""
103
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
104
+ mock_cancer_subtype_maps
105
+ )
106
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
107
+
108
+ # Setup mocks
109
+ mock_create_dir.return_value = temp_output_dir
110
+ mock_analyze.return_value = mock_analyze_slide_results
111
+
112
+ # Generate settings DataFrame manually
113
+ settings_df = pd.DataFrame(
114
+ {
115
+ "Slide": ["test_slide_1.svs"],
116
+ "Site Type": ["Primary"],
117
+ "Sex": ["Unknown"],
118
+ "Tissue Site": ["Unknown"],
119
+ "Cancer Subtype": ["Unknown"],
120
+ "IHC Subtype": [""],
121
+ "Segmentation Config": ["Biopsy"],
122
+ }
123
+ )
124
+
125
+ # Call analyze_slides (generator)
126
+ gen = analyze_slides(
127
+ sample_files_single,
128
+ settings_df,
129
+ "Primary",
130
+ "Unknown",
131
+ "Unknown",
132
+ "Unknown",
133
+ "",
134
+ "Biopsy",
135
+ temp_output_dir,
136
+ )
137
+
138
+ # Consume generator
139
+ results = list(gen)
140
+
141
+ # Should yield at least once (intermediate + final)
142
+ assert len(results) >= 1
143
+
144
+ # analyze_slide should be called once
145
+ assert mock_analyze.call_count == 1
146
+
147
+ # Should be called with model_cache=None (single-slide mode)
148
+ call_kwargs = mock_analyze.call_args[1]
149
+ assert call_kwargs["model_cache"] is None
150
+
151
+ @patch("mosaic.ui.app.load_all_models")
152
+ @patch("mosaic.ui.app.analyze_slide")
153
+ @patch("mosaic.ui.app.create_user_directory")
154
+ def test_batch_analysis_loads_model_cache_once(
155
+ self,
156
+ mock_create_dir,
157
+ mock_analyze,
158
+ mock_load_models,
159
+ sample_files_multiple,
160
+ mock_analyze_slide_results,
161
+ mock_model_cache,
162
+ mock_cancer_subtype_maps,
163
+ temp_output_dir,
164
+ ):
165
+ """Test batch analysis loads models once and reuses cache."""
166
+ from PIL import Image
167
+
168
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
169
+ mock_cancer_subtype_maps
170
+ )
171
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
172
+
173
+ # Setup mocks - return new DataFrames on each call to avoid mutation issues
174
+ def mock_analyze_side_effect(*args, **kwargs):
175
+ mask = Image.new("RGB", (100, 100), color="red")
176
+ aeon_results = pd.DataFrame(
177
+ {"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
178
+ )
179
+ paladin_results = pd.DataFrame(
180
+ {
181
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
182
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
183
+ "Score": [0.85, 0.72, 0.63],
184
+ }
185
+ )
186
+ return (mask, aeon_results, paladin_results)
187
+
188
+ mock_create_dir.return_value = temp_output_dir
189
+ mock_load_models.return_value = mock_model_cache
190
+ mock_analyze.side_effect = mock_analyze_side_effect
191
+
192
+ # Generate settings DataFrame manually for 3 files
193
+ settings_df = pd.DataFrame(
194
+ {
195
+ "Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
196
+ "Site Type": ["Primary", "Primary", "Primary"],
197
+ "Sex": ["Unknown", "Unknown", "Unknown"],
198
+ "Tissue Site": ["Unknown", "Unknown", "Unknown"],
199
+ "Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
200
+ "IHC Subtype": ["", "", ""],
201
+ "Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
202
+ }
203
+ )
204
+
205
+ # Call analyze_slides
206
+ gen = analyze_slides(
207
+ sample_files_multiple,
208
+ settings_df,
209
+ "Primary",
210
+ "Unknown",
211
+ "Unknown",
212
+ "Unknown",
213
+ "",
214
+ "Biopsy",
215
+ temp_output_dir,
216
+ )
217
+
218
+ # Consume generator
219
+ results = list(gen)
220
+
221
+ # load_all_models should be called once
222
+ assert mock_load_models.call_count == 1
223
+
224
+ # analyze_slide should be called 3 times (once per file)
225
+ assert mock_analyze.call_count == 3
226
+
227
+ # All calls should use the same model_cache
228
+ for call in mock_analyze.call_args_list:
229
+ assert call[1]["model_cache"] == mock_model_cache
230
+
231
+ # cleanup should be called
232
+ assert mock_model_cache.cleanup.called
233
+
234
+ @patch("mosaic.ui.app.create_user_directory")
235
+ def test_no_slides_raises_error(
236
+ self, mock_create_dir, mock_cancer_subtype_maps, temp_output_dir
237
+ ):
238
+ """Test that no slides uploaded raises gr.Error."""
239
+ import gradio as gr
240
+
241
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
242
+ mock_cancer_subtype_maps
243
+ )
244
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
245
+
246
+ mock_create_dir.return_value = temp_output_dir
247
+
248
+ # Call with no slides
249
+ gen = analyze_slides(
250
+ None,
251
+ None,
252
+ "Primary",
253
+ "Unknown",
254
+ "Unknown",
255
+ "Unknown",
256
+ "",
257
+ "Biopsy",
258
+ temp_output_dir,
259
+ )
260
+
261
+ # Should raise gr.Error
262
+ with pytest.raises(gr.Error):
263
+ next(gen)
264
+
265
+ @patch("mosaic.ui.app.create_user_directory")
266
+ def test_settings_mismatch_raises_error(
267
+ self,
268
+ mock_create_dir,
269
+ sample_files_multiple,
270
+ sample_settings_df,
271
+ mock_cancer_subtype_maps,
272
+ temp_output_dir,
273
+ ):
274
+ """Test that settings count mismatch raises gr.Error."""
275
+ import gradio as gr
276
+
277
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
278
+ mock_cancer_subtype_maps
279
+ )
280
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
281
+
282
+ mock_create_dir.return_value = temp_output_dir
283
+
284
+ # sample_files_multiple has 3 files, sample_settings_df has 3 rows
285
+ # Manually create mismatch by using only 2 files
286
+ two_files = sample_files_multiple[:2]
287
+
288
+ gen = analyze_slides(
289
+ two_files,
290
+ sample_settings_df,
291
+ "Primary",
292
+ "Unknown",
293
+ "Unknown",
294
+ "Unknown",
295
+ "",
296
+ "Biopsy",
297
+ temp_output_dir,
298
+ )
299
+
300
+ # Should raise gr.Error about mismatch
301
+ with pytest.raises(gr.Error):
302
+ next(gen)
tests/test_ui_events.py ADDED
@@ -0,0 +1,349 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for UI event handlers and state management.
2
+
3
+ This module tests complex event interactions in the Mosaic Gradio UI, including:
4
+ - Settings state management across events
5
+ - Generator behavior and incremental updates
6
+ - Error and warning display
7
+ """
8
+
9
+ import pytest
10
+ import pandas as pd
11
+ from unittest.mock import Mock, patch, MagicMock
12
+ from pathlib import Path
13
+ import inspect
14
+
15
+ from mosaic.ui.app import (
16
+ analyze_slides,
17
+ set_cancer_subtype_maps,
18
+ )
19
+ from mosaic.ui.utils import SETTINGS_COLUMNS, validate_settings, load_settings
20
+
21
+
22
+ class TestSettingsStateManagement:
23
+ """Test settings state management across multiple events."""
24
+
25
+ def test_csv_upload_replaces_settings(
26
+ self, sample_csv_valid, mock_cancer_subtype_maps
27
+ ):
28
+ """Test CSV upload replaces existing settings."""
29
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
30
+ mock_cancer_subtype_maps
31
+ )
32
+
33
+ # Load CSV
34
+ loaded_df = load_settings(sample_csv_valid)
35
+ validated_df = validate_settings(
36
+ loaded_df, cancer_subtype_name_map, cancer_subtypes, reversed_map
37
+ )
38
+
39
+ # Verify new settings loaded
40
+ assert len(validated_df) == 3
41
+ assert validated_df.iloc[0]["Slide"] == "slide1.svs"
42
+ assert validated_df.iloc[1]["Slide"] == "slide2.svs"
43
+
44
+
45
+ class TestGeneratorBehavior:
46
+ """Test generator behavior for incremental updates."""
47
+
48
+ @patch("mosaic.ui.app.analyze_slide")
49
+ @patch("mosaic.ui.app.create_user_directory")
50
+ def test_analyze_slides_is_generator(
51
+ self,
52
+ mock_create_dir,
53
+ mock_analyze,
54
+ sample_files_single,
55
+ mock_analyze_slide_results,
56
+ mock_cancer_subtype_maps,
57
+ temp_output_dir,
58
+ ):
59
+ """Test analyze_slides returns a generator."""
60
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
61
+ mock_cancer_subtype_maps
62
+ )
63
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
64
+
65
+ mock_create_dir.return_value = temp_output_dir
66
+ mock_analyze.return_value = mock_analyze_slide_results
67
+
68
+ settings_df = pd.DataFrame(
69
+ {
70
+ "Slide": ["test_slide_1.svs"],
71
+ "Site Type": ["Primary"],
72
+ "Sex": ["Unknown"],
73
+ "Tissue Site": ["Unknown"],
74
+ "Cancer Subtype": ["Unknown"],
75
+ "IHC Subtype": [""],
76
+ "Segmentation Config": ["Biopsy"],
77
+ }
78
+ )
79
+
80
+ result = analyze_slides(
81
+ sample_files_single,
82
+ settings_df,
83
+ "Primary",
84
+ "Unknown",
85
+ "Unknown",
86
+ "Unknown",
87
+ "",
88
+ "Biopsy",
89
+ temp_output_dir,
90
+ )
91
+
92
+ # Verify it's a generator
93
+ assert inspect.isgenerator(result)
94
+
95
+ @patch("mosaic.ui.app.load_all_models")
96
+ @patch("mosaic.ui.app.analyze_slide")
97
+ @patch("mosaic.ui.app.create_user_directory")
98
+ def test_intermediate_yields_update_masks_only(
99
+ self,
100
+ mock_create_dir,
101
+ mock_analyze,
102
+ mock_load_models,
103
+ sample_files_multiple,
104
+ mock_analyze_slide_results,
105
+ mock_model_cache,
106
+ mock_cancer_subtype_maps,
107
+ temp_output_dir,
108
+ ):
109
+ """Test intermediate yields show only slide masks."""
110
+ from PIL import Image
111
+
112
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
113
+ mock_cancer_subtype_maps
114
+ )
115
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
116
+
117
+ mock_create_dir.return_value = temp_output_dir
118
+ mock_load_models.return_value = mock_model_cache
119
+
120
+ # Return fresh DataFrames on each call
121
+ def mock_analyze_side_effect(*args, **kwargs):
122
+ mask = Image.new("RGB", (100, 100), color="red")
123
+ aeon_results = pd.DataFrame(
124
+ {"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
125
+ )
126
+ paladin_results = pd.DataFrame(
127
+ {
128
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
129
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
130
+ "Score": [0.85, 0.72, 0.63],
131
+ }
132
+ )
133
+ return (mask, aeon_results, paladin_results)
134
+
135
+ mock_analyze.side_effect = mock_analyze_side_effect
136
+
137
+ settings_df = pd.DataFrame(
138
+ {
139
+ "Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
140
+ "Site Type": ["Primary", "Primary", "Primary"],
141
+ "Sex": ["Unknown", "Unknown", "Unknown"],
142
+ "Tissue Site": ["Unknown", "Unknown", "Unknown"],
143
+ "Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
144
+ "IHC Subtype": ["", "", ""],
145
+ "Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
146
+ }
147
+ )
148
+
149
+ gen = analyze_slides(
150
+ sample_files_multiple,
151
+ settings_df,
152
+ "Primary",
153
+ "Unknown",
154
+ "Unknown",
155
+ "Unknown",
156
+ "",
157
+ "Biopsy",
158
+ temp_output_dir,
159
+ )
160
+
161
+ # Get first intermediate yield (after first slide)
162
+ first_yield = next(gen)
163
+
164
+ # Should be tuple with 6 elements
165
+ assert len(first_yield) == 6
166
+
167
+ # First element is slide_masks (should have 1 entry)
168
+ slide_masks = first_yield[0]
169
+ assert len(slide_masks) == 1
170
+
171
+ @patch("mosaic.ui.app.load_all_models")
172
+ @patch("mosaic.ui.app.analyze_slide")
173
+ @patch("mosaic.ui.app.create_user_directory")
174
+ def test_final_yield_has_complete_results(
175
+ self,
176
+ mock_create_dir,
177
+ mock_analyze,
178
+ mock_load_models,
179
+ sample_files_multiple,
180
+ mock_analyze_slide_results,
181
+ mock_model_cache,
182
+ mock_cancer_subtype_maps,
183
+ temp_output_dir,
184
+ ):
185
+ """Test final yield contains complete results."""
186
+ from PIL import Image
187
+
188
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
189
+ mock_cancer_subtype_maps
190
+ )
191
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
192
+
193
+ mock_create_dir.return_value = temp_output_dir
194
+ mock_load_models.return_value = mock_model_cache
195
+
196
+ # Return fresh DataFrames on each call
197
+ def mock_analyze_side_effect(*args, **kwargs):
198
+ mask = Image.new("RGB", (100, 100), color="red")
199
+ aeon_results = pd.DataFrame(
200
+ {"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
201
+ )
202
+ paladin_results = pd.DataFrame(
203
+ {
204
+ "Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
205
+ "Biomarker": ["TP53", "KRAS", "EGFR"],
206
+ "Score": [0.85, 0.72, 0.63],
207
+ }
208
+ )
209
+ return (mask, aeon_results, paladin_results)
210
+
211
+ mock_analyze.side_effect = mock_analyze_side_effect
212
+
213
+ settings_df = pd.DataFrame(
214
+ {
215
+ "Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
216
+ "Site Type": ["Primary", "Primary", "Primary"],
217
+ "Sex": ["Unknown", "Unknown", "Unknown"],
218
+ "Tissue Site": ["Unknown", "Unknown", "Unknown"],
219
+ "Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
220
+ "IHC Subtype": ["", "", ""],
221
+ "Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
222
+ }
223
+ )
224
+
225
+ gen = analyze_slides(
226
+ sample_files_multiple,
227
+ settings_df,
228
+ "Primary",
229
+ "Unknown",
230
+ "Unknown",
231
+ "Unknown",
232
+ "",
233
+ "Biopsy",
234
+ temp_output_dir,
235
+ )
236
+
237
+ # Consume generator to get final yield
238
+ results = list(gen)
239
+ final_yield = results[-1]
240
+
241
+ # Final yield should have all results
242
+ assert len(final_yield) == 6
243
+ slide_masks = final_yield[0]
244
+ assert len(slide_masks) == 3 # All 3 slides
245
+
246
+
247
+ class TestErrorDisplay:
248
+ """Test error and warning display behavior."""
249
+
250
+ @patch("mosaic.ui.app.create_user_directory")
251
+ def test_no_slides_raises_gr_error(
252
+ self, mock_create_dir, mock_cancer_subtype_maps, temp_output_dir
253
+ ):
254
+ """Test that no slides raises gr.Error."""
255
+ import gradio as gr
256
+
257
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
258
+ mock_cancer_subtype_maps
259
+ )
260
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
261
+
262
+ mock_create_dir.return_value = temp_output_dir
263
+
264
+ gen = analyze_slides(
265
+ None,
266
+ None,
267
+ "Primary",
268
+ "Unknown",
269
+ "Unknown",
270
+ "Unknown",
271
+ "",
272
+ "Biopsy",
273
+ temp_output_dir,
274
+ )
275
+
276
+ # Should raise gr.Error
277
+ with pytest.raises(gr.Error):
278
+ next(gen)
279
+
280
+ @patch("mosaic.ui.utils.gr.Warning")
281
+ def test_validation_warnings_shown(self, mock_warning, mock_cancer_subtype_maps):
282
+ """Test validation warnings are displayed."""
283
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
284
+ mock_cancer_subtype_maps
285
+ )
286
+
287
+ # Create DataFrame with multiple invalid values
288
+ df = pd.DataFrame(
289
+ {
290
+ "Slide": ["test1.svs", "test2.svs"],
291
+ "Site Type": ["InvalidSite", "Primary"],
292
+ "Sex": ["Unknown", "InvalidSex"],
293
+ "Tissue Site": ["Unknown", "Unknown"],
294
+ "Cancer Subtype": ["InvalidSubtype", "Unknown"],
295
+ "IHC Subtype": ["", ""],
296
+ "Segmentation Config": ["Biopsy", "InvalidConfig"],
297
+ }
298
+ )
299
+
300
+ result = validate_settings(
301
+ df, cancer_subtype_name_map, cancer_subtypes, reversed_map
302
+ )
303
+
304
+ # Should have warning calls (at least 1 for the multiple invalid values)
305
+ assert mock_warning.call_count >= 1
306
+
307
+ # Verify defaults applied
308
+ assert result.iloc[0]["Site Type"] == "Primary" # Invalid → Primary
309
+ assert result.iloc[0]["Cancer Subtype"] == "Unknown" # Invalid → Unknown
310
+ assert result.iloc[1]["Sex"] == "Unknown" # Invalid → Unknown
311
+ assert result.iloc[1]["Segmentation Config"] == "Biopsy" # Invalid → Biopsy
312
+
313
+ @patch("mosaic.ui.app.create_user_directory")
314
+ def test_settings_mismatch_raises_gr_error(
315
+ self,
316
+ mock_create_dir,
317
+ sample_files_multiple,
318
+ sample_settings_df,
319
+ mock_cancer_subtype_maps,
320
+ temp_output_dir,
321
+ ):
322
+ """Test settings/files count mismatch raises gr.Error."""
323
+ import gradio as gr
324
+
325
+ cancer_subtype_name_map, reversed_map, cancer_subtypes = (
326
+ mock_cancer_subtype_maps
327
+ )
328
+ set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
329
+
330
+ mock_create_dir.return_value = temp_output_dir
331
+
332
+ # Create mismatch: 2 files but 3 settings rows
333
+ two_files = sample_files_multiple[:2]
334
+
335
+ gen = analyze_slides(
336
+ two_files,
337
+ sample_settings_df,
338
+ "Primary",
339
+ "Unknown",
340
+ "Unknown",
341
+ "Unknown",
342
+ "",
343
+ "Biopsy",
344
+ temp_output_dir,
345
+ )
346
+
347
+ # Should raise gr.Error about mismatch
348
+ with pytest.raises(gr.Error):
349
+ next(gen)
uv.lock CHANGED
The diff for this file is too large to render. See raw diff