Spaces:
Sleeping
Sleeping
Add GitHub Actions workflows and comprehensive test suite
Browse files- Add CI/CD workflows for tests, code quality, and Docker builds
- Add comprehensive Makefile with test, lint, format, and Docker targets
- Add new test files for CLI, fixtures, UI components, and UI events
- Refactor batch analysis into main analysis module
- Update model manager and inference modules
- Add .dockerignore for cleaner Docker builds
- Update dependencies in uv.lock
- .dockerignore +79 -0
- .github/workflows/code-quality.yml +80 -0
- .github/workflows/docker.yml +73 -0
- .github/workflows/tests.yml +74 -0
- MAKEFILE_QUICK_REF.md +93 -0
- MAKEFILE_USAGE.md +459 -0
- Makefile +257 -0
- src/mosaic/analysis.py +244 -275
- src/mosaic/batch_analysis.py +0 -238
- src/mosaic/gradio_app.py +103 -34
- src/mosaic/inference/aeon.py +32 -10
- src/mosaic/inference/data.py +18 -22
- src/mosaic/inference/paladin.py +15 -15
- src/mosaic/model_manager.py +71 -25
- src/mosaic/ui/app.py +235 -74
- src/mosaic/ui/utils.py +20 -15
- tests/benchmark_batch_performance.py +48 -40
- tests/conftest.py +16 -10
- tests/test_batch_analysis.py +0 -279
- tests/test_cli.py +298 -0
- tests/test_fixtures.py +377 -0
- tests/test_gradio_app.py +8 -10
- tests/test_model_manager.py +42 -33
- tests/test_regression_single_slide.py +90 -55
- tests/test_ui_components.py +302 -0
- tests/test_ui_events.py +349 -0
- uv.lock +0 -0
.dockerignore
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
*.egg-info/
|
| 8 |
+
dist/
|
| 9 |
+
build/
|
| 10 |
+
*.egg
|
| 11 |
+
|
| 12 |
+
# Virtual environments
|
| 13 |
+
.venv/
|
| 14 |
+
venv/
|
| 15 |
+
ENV/
|
| 16 |
+
env/
|
| 17 |
+
|
| 18 |
+
# Testing
|
| 19 |
+
.pytest_cache/
|
| 20 |
+
.coverage
|
| 21 |
+
htmlcov/
|
| 22 |
+
.tox/
|
| 23 |
+
*.cover
|
| 24 |
+
|
| 25 |
+
# IDE
|
| 26 |
+
.vscode/
|
| 27 |
+
.idea/
|
| 28 |
+
*.swp
|
| 29 |
+
*.swo
|
| 30 |
+
*~
|
| 31 |
+
|
| 32 |
+
# Git
|
| 33 |
+
.git/
|
| 34 |
+
.gitignore
|
| 35 |
+
.gitattributes
|
| 36 |
+
|
| 37 |
+
# CI/CD
|
| 38 |
+
.github/
|
| 39 |
+
.gitlab-ci.yml
|
| 40 |
+
|
| 41 |
+
# Documentation
|
| 42 |
+
docs/
|
| 43 |
+
*.md
|
| 44 |
+
!README.md
|
| 45 |
+
|
| 46 |
+
# Data and outputs
|
| 47 |
+
data/
|
| 48 |
+
output/
|
| 49 |
+
*.svs
|
| 50 |
+
*.tiff
|
| 51 |
+
*.tif
|
| 52 |
+
*.png
|
| 53 |
+
*.jpg
|
| 54 |
+
*.jpeg
|
| 55 |
+
|
| 56 |
+
# Logs
|
| 57 |
+
*.log
|
| 58 |
+
logs/
|
| 59 |
+
|
| 60 |
+
# OS
|
| 61 |
+
.DS_Store
|
| 62 |
+
Thumbs.db
|
| 63 |
+
|
| 64 |
+
# Project specific
|
| 65 |
+
tests/
|
| 66 |
+
*.csv
|
| 67 |
+
profile.stats
|
| 68 |
+
benchmark_output/
|
| 69 |
+
profile_output/
|
| 70 |
+
|
| 71 |
+
# Lock files (we use uv.lock)
|
| 72 |
+
poetry.lock
|
| 73 |
+
Pipfile.lock
|
| 74 |
+
requirements*.txt
|
| 75 |
+
|
| 76 |
+
# Makefile and CI configs
|
| 77 |
+
Makefile
|
| 78 |
+
.dockerignore
|
| 79 |
+
Dockerfile*
|
.github/workflows/code-quality.yml
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Code Quality
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ main, dev ]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [ main, dev ]
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
format-check:
|
| 12 |
+
name: Check Code Formatting
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
|
| 15 |
+
steps:
|
| 16 |
+
- name: Checkout code
|
| 17 |
+
uses: actions/checkout@v4
|
| 18 |
+
|
| 19 |
+
- name: Set up Python
|
| 20 |
+
uses: actions/setup-python@v5
|
| 21 |
+
with:
|
| 22 |
+
python-version: "3.10"
|
| 23 |
+
|
| 24 |
+
- name: Install uv
|
| 25 |
+
uses: astral-sh/setup-uv@v4
|
| 26 |
+
with:
|
| 27 |
+
enable-cache: true
|
| 28 |
+
|
| 29 |
+
- name: Install dependencies
|
| 30 |
+
run: |
|
| 31 |
+
uv sync
|
| 32 |
+
|
| 33 |
+
- name: Check formatting with black
|
| 34 |
+
run: |
|
| 35 |
+
make format-check
|
| 36 |
+
|
| 37 |
+
- name: Format Summary
|
| 38 |
+
if: always()
|
| 39 |
+
run: |
|
| 40 |
+
echo "## Code Formatting :art:" >> $GITHUB_STEP_SUMMARY
|
| 41 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 42 |
+
echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
|
| 43 |
+
if [ "${{ job.status }}" == "failure" ]; then
|
| 44 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 45 |
+
echo "Run \`make format\` to auto-fix formatting issues." >> $GITHUB_STEP_SUMMARY
|
| 46 |
+
fi
|
| 47 |
+
|
| 48 |
+
lint:
|
| 49 |
+
name: Lint Code
|
| 50 |
+
runs-on: ubuntu-latest
|
| 51 |
+
|
| 52 |
+
steps:
|
| 53 |
+
- name: Checkout code
|
| 54 |
+
uses: actions/checkout@v4
|
| 55 |
+
|
| 56 |
+
- name: Set up Python
|
| 57 |
+
uses: actions/setup-python@v5
|
| 58 |
+
with:
|
| 59 |
+
python-version: "3.10"
|
| 60 |
+
|
| 61 |
+
- name: Install uv
|
| 62 |
+
uses: astral-sh/setup-uv@v4
|
| 63 |
+
with:
|
| 64 |
+
enable-cache: true
|
| 65 |
+
|
| 66 |
+
- name: Install dependencies
|
| 67 |
+
run: |
|
| 68 |
+
uv sync
|
| 69 |
+
|
| 70 |
+
- name: Lint with pylint
|
| 71 |
+
run: |
|
| 72 |
+
make lint
|
| 73 |
+
continue-on-error: true # Don't fail CI on pylint warnings
|
| 74 |
+
|
| 75 |
+
- name: Lint Summary
|
| 76 |
+
if: always()
|
| 77 |
+
run: |
|
| 78 |
+
echo "## Linting Results :mag:" >> $GITHUB_STEP_SUMMARY
|
| 79 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 80 |
+
echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
|
.github/workflows/docker.yml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Docker Build
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ main, dev ]
|
| 6 |
+
tags:
|
| 7 |
+
- 'v*'
|
| 8 |
+
pull_request:
|
| 9 |
+
branches: [ main ]
|
| 10 |
+
workflow_dispatch:
|
| 11 |
+
|
| 12 |
+
env:
|
| 13 |
+
REGISTRY: ghcr.io
|
| 14 |
+
IMAGE_NAME: ${{ github.repository }}
|
| 15 |
+
|
| 16 |
+
jobs:
|
| 17 |
+
build:
|
| 18 |
+
name: Build Docker Image
|
| 19 |
+
runs-on: ubuntu-latest
|
| 20 |
+
permissions:
|
| 21 |
+
contents: read
|
| 22 |
+
packages: write
|
| 23 |
+
|
| 24 |
+
steps:
|
| 25 |
+
- name: Checkout code
|
| 26 |
+
uses: actions/checkout@v4
|
| 27 |
+
|
| 28 |
+
- name: Set up Docker Buildx
|
| 29 |
+
uses: docker/setup-buildx-action@v3
|
| 30 |
+
|
| 31 |
+
- name: Log in to Container Registry
|
| 32 |
+
if: github.event_name != 'pull_request'
|
| 33 |
+
uses: docker/login-action@v3
|
| 34 |
+
with:
|
| 35 |
+
registry: ${{ env.REGISTRY }}
|
| 36 |
+
username: ${{ github.actor }}
|
| 37 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 38 |
+
|
| 39 |
+
- name: Extract metadata
|
| 40 |
+
id: meta
|
| 41 |
+
uses: docker/metadata-action@v5
|
| 42 |
+
with:
|
| 43 |
+
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
|
| 44 |
+
tags: |
|
| 45 |
+
type=ref,event=branch
|
| 46 |
+
type=ref,event=pr
|
| 47 |
+
type=semver,pattern={{version}}
|
| 48 |
+
type=semver,pattern={{major}}.{{minor}}
|
| 49 |
+
type=sha,prefix={{branch}}-
|
| 50 |
+
|
| 51 |
+
- name: Build and push Docker image
|
| 52 |
+
uses: docker/build-push-action@v5
|
| 53 |
+
with:
|
| 54 |
+
context: .
|
| 55 |
+
push: ${{ github.event_name != 'pull_request' }}
|
| 56 |
+
tags: ${{ steps.meta.outputs.tags }}
|
| 57 |
+
labels: ${{ steps.meta.outputs.labels }}
|
| 58 |
+
cache-from: type=gha
|
| 59 |
+
cache-to: type=gha,mode=max
|
| 60 |
+
secret-files: |
|
| 61 |
+
"github_token=${{ secrets.GITHUB_TOKEN }}"
|
| 62 |
+
|
| 63 |
+
- name: Docker Summary
|
| 64 |
+
run: |
|
| 65 |
+
echo "## Docker Build :whale:" >> $GITHUB_STEP_SUMMARY
|
| 66 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 67 |
+
echo "Registry: ${{ env.REGISTRY }}" >> $GITHUB_STEP_SUMMARY
|
| 68 |
+
echo "Image: ${{ env.IMAGE_NAME }}" >> $GITHUB_STEP_SUMMARY
|
| 69 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 70 |
+
echo "### Tags" >> $GITHUB_STEP_SUMMARY
|
| 71 |
+
echo '```' >> $GITHUB_STEP_SUMMARY
|
| 72 |
+
echo "${{ steps.meta.outputs.tags }}" >> $GITHUB_STEP_SUMMARY
|
| 73 |
+
echo '```' >> $GITHUB_STEP_SUMMARY
|
.github/workflows/tests.yml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Tests
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [ main, dev ]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [ main, dev ]
|
| 8 |
+
workflow_dispatch: # Allow manual trigger
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
test:
|
| 12 |
+
name: Run Tests (Python ${{ matrix.python-version }})
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
strategy:
|
| 15 |
+
fail-fast: false
|
| 16 |
+
matrix:
|
| 17 |
+
python-version: ["3.10", "3.11"]
|
| 18 |
+
|
| 19 |
+
steps:
|
| 20 |
+
- name: Checkout code
|
| 21 |
+
uses: actions/checkout@v4
|
| 22 |
+
with:
|
| 23 |
+
fetch-depth: 0 # Full history for better coverage reports
|
| 24 |
+
|
| 25 |
+
- name: Set up Python ${{ matrix.python-version }}
|
| 26 |
+
uses: actions/setup-python@v5
|
| 27 |
+
with:
|
| 28 |
+
python-version: ${{ matrix.python-version }}
|
| 29 |
+
|
| 30 |
+
- name: Install uv
|
| 31 |
+
uses: astral-sh/setup-uv@v4
|
| 32 |
+
with:
|
| 33 |
+
enable-cache: true
|
| 34 |
+
cache-dependency-glob: "uv.lock"
|
| 35 |
+
|
| 36 |
+
- name: Install dependencies
|
| 37 |
+
run: |
|
| 38 |
+
uv sync
|
| 39 |
+
|
| 40 |
+
- name: Run tests with coverage
|
| 41 |
+
run: |
|
| 42 |
+
make test-coverage
|
| 43 |
+
|
| 44 |
+
- name: Generate coverage badge
|
| 45 |
+
if: matrix.python-version == '3.10'
|
| 46 |
+
run: |
|
| 47 |
+
COVERAGE=$(uv run coverage report | grep TOTAL | awk '{print $NF}' | sed 's/%//')
|
| 48 |
+
echo "COVERAGE=$COVERAGE" >> $GITHUB_ENV
|
| 49 |
+
echo "Coverage: $COVERAGE%"
|
| 50 |
+
|
| 51 |
+
- name: Upload coverage reports to Codecov
|
| 52 |
+
if: matrix.python-version == '3.10'
|
| 53 |
+
uses: codecov/codecov-action@v4
|
| 54 |
+
with:
|
| 55 |
+
file: ./coverage.xml
|
| 56 |
+
fail_ci_if_error: false
|
| 57 |
+
token: ${{ secrets.CODECOV_TOKEN }}
|
| 58 |
+
continue-on-error: true
|
| 59 |
+
|
| 60 |
+
- name: Upload coverage HTML report
|
| 61 |
+
if: matrix.python-version == '3.10'
|
| 62 |
+
uses: actions/upload-artifact@v4
|
| 63 |
+
with:
|
| 64 |
+
name: coverage-report
|
| 65 |
+
path: htmlcov/
|
| 66 |
+
retention-days: 30
|
| 67 |
+
|
| 68 |
+
- name: Test Summary
|
| 69 |
+
if: always()
|
| 70 |
+
run: |
|
| 71 |
+
echo "## Test Results :test_tube:" >> $GITHUB_STEP_SUMMARY
|
| 72 |
+
echo "" >> $GITHUB_STEP_SUMMARY
|
| 73 |
+
echo "Python Version: ${{ matrix.python-version }}" >> $GITHUB_STEP_SUMMARY
|
| 74 |
+
echo "Status: ${{ job.status }}" >> $GITHUB_STEP_SUMMARY
|
MAKEFILE_QUICK_REF.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile Quick Reference
|
| 2 |
+
|
| 3 |
+
## Most Common Commands
|
| 4 |
+
|
| 5 |
+
```bash
|
| 6 |
+
# Setup
|
| 7 |
+
make install-dev # Install dev dependencies
|
| 8 |
+
make help # Show all available commands
|
| 9 |
+
|
| 10 |
+
# Testing
|
| 11 |
+
make test # Run tests with coverage
|
| 12 |
+
make test-fast # Run tests quickly (no coverage)
|
| 13 |
+
make test-ui # Test UI components only
|
| 14 |
+
make test-cli # Test CLI only
|
| 15 |
+
|
| 16 |
+
# Code Quality
|
| 17 |
+
make format # Format code with black
|
| 18 |
+
make format-check # Check formatting
|
| 19 |
+
make quality # Run all quality checks
|
| 20 |
+
|
| 21 |
+
# Running
|
| 22 |
+
make run-ui # Launch web interface
|
| 23 |
+
make run-single SLIDE=x.svs OUTPUT=out/ # Process single slide
|
| 24 |
+
make run-batch CSV=s.csv OUTPUT=out/ # Process batch
|
| 25 |
+
|
| 26 |
+
# Docker
|
| 27 |
+
make docker-build # Build image
|
| 28 |
+
make docker-run # Run web UI in container
|
| 29 |
+
make docker-shell # Shell into container
|
| 30 |
+
|
| 31 |
+
# Cleanup
|
| 32 |
+
make clean # Remove cache files
|
| 33 |
+
make clean-all # Remove everything
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## Development Workflow
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
# 1. Initial setup
|
| 40 |
+
make install-dev
|
| 41 |
+
|
| 42 |
+
# 2. Make changes to code
|
| 43 |
+
# ... edit files ...
|
| 44 |
+
|
| 45 |
+
# 3. Format and test
|
| 46 |
+
make format
|
| 47 |
+
make test
|
| 48 |
+
|
| 49 |
+
# 4. Before committing
|
| 50 |
+
make quality
|
| 51 |
+
make test-coverage
|
| 52 |
+
|
| 53 |
+
# 5. Optional: Install pre-commit hooks
|
| 54 |
+
make pre-commit-install
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
## Docker Workflow
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
# Build and test locally
|
| 61 |
+
make docker-build
|
| 62 |
+
make docker-run
|
| 63 |
+
|
| 64 |
+
# Process slides with Docker
|
| 65 |
+
make docker-run-single SLIDE=my_slide.svs
|
| 66 |
+
make docker-run-batch CSV=settings.csv
|
| 67 |
+
|
| 68 |
+
# Push to registry
|
| 69 |
+
make docker-tag DOCKER_REGISTRY=myregistry.com/user
|
| 70 |
+
make docker-push DOCKER_REGISTRY=myregistry.com/user
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## CI/CD
|
| 74 |
+
|
| 75 |
+
```bash
|
| 76 |
+
# Run all CI checks
|
| 77 |
+
make ci-test # Tests + format check (fast)
|
| 78 |
+
make ci-test-strict # Tests + format check + pylint (slow)
|
| 79 |
+
make ci-docker # Build Docker for CI
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
## Tips
|
| 83 |
+
|
| 84 |
+
- Use `make help` to see all available commands
|
| 85 |
+
- Use `make test-specific TEST=path/to/test` for debugging
|
| 86 |
+
- Use `make test-verbose` to see print statements
|
| 87 |
+
- Use `make info` to see project information
|
| 88 |
+
- Set environment variables to customize Docker:
|
| 89 |
+
```bash
|
| 90 |
+
export DOCKER_REGISTRY=myregistry.com/user
|
| 91 |
+
export DOCKER_TAG=v1.0.0
|
| 92 |
+
make docker-build
|
| 93 |
+
```
|
MAKEFILE_USAGE.md
ADDED
|
@@ -0,0 +1,459 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Makefile Usage Guide
|
| 2 |
+
|
| 3 |
+
This document provides detailed information about the Makefile targets available in the Mosaic project.
|
| 4 |
+
|
| 5 |
+
## Quick Start
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
# See all available commands
|
| 9 |
+
make help
|
| 10 |
+
|
| 11 |
+
# Setup development environment
|
| 12 |
+
make install-dev
|
| 13 |
+
|
| 14 |
+
# Run tests
|
| 15 |
+
make test
|
| 16 |
+
|
| 17 |
+
# Launch web interface
|
| 18 |
+
make run-ui
|
| 19 |
+
```
|
| 20 |
+
|
| 21 |
+
## Development Setup
|
| 22 |
+
|
| 23 |
+
### `make install`
|
| 24 |
+
Install production dependencies only (no dev tools).
|
| 25 |
+
```bash
|
| 26 |
+
make install
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
### `make install-dev`
|
| 30 |
+
Install all dependencies including development tools (pytest, ruff, etc.).
|
| 31 |
+
```bash
|
| 32 |
+
make install-dev
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
## Testing
|
| 36 |
+
|
| 37 |
+
### `make test`
|
| 38 |
+
Run full test suite with coverage reporting.
|
| 39 |
+
```bash
|
| 40 |
+
make test
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### `make test-fast`
|
| 44 |
+
Run tests without coverage (faster execution).
|
| 45 |
+
```bash
|
| 46 |
+
make test-fast
|
| 47 |
+
```
|
| 48 |
+
|
| 49 |
+
### `make test-coverage`
|
| 50 |
+
Run tests with detailed coverage report (terminal + HTML).
|
| 51 |
+
```bash
|
| 52 |
+
make test-coverage
|
| 53 |
+
# View HTML report at: htmlcov/index.html
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
### `make test-ui`
|
| 57 |
+
Run only UI-related tests.
|
| 58 |
+
```bash
|
| 59 |
+
make test-ui
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
### `make test-cli`
|
| 63 |
+
Run only CLI-related tests.
|
| 64 |
+
```bash
|
| 65 |
+
make test-cli
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
### `make test-verbose`
|
| 69 |
+
Run tests with verbose output and show print statements.
|
| 70 |
+
```bash
|
| 71 |
+
make test-verbose
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
### `make test-specific`
|
| 75 |
+
Run a specific test file, class, or method.
|
| 76 |
+
```bash
|
| 77 |
+
# Run specific test file
|
| 78 |
+
make test-specific TEST=tests/test_cli.py
|
| 79 |
+
|
| 80 |
+
# Run specific test class
|
| 81 |
+
make test-specific TEST=tests/test_cli.py::TestArgumentParsing
|
| 82 |
+
|
| 83 |
+
# Run specific test method
|
| 84 |
+
make test-specific TEST=tests/test_cli.py::TestArgumentParsing::test_no_arguments_launches_web_interface
|
| 85 |
+
```
|
| 86 |
+
|
| 87 |
+
## Code Quality
|
| 88 |
+
|
| 89 |
+
### `make lint`
|
| 90 |
+
Check code for linting issues using pylint (src only for speed).
|
| 91 |
+
```bash
|
| 92 |
+
make lint
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
### `make lint-strict`
|
| 96 |
+
Run pylint on both src and tests (slower but comprehensive).
|
| 97 |
+
```bash
|
| 98 |
+
make lint-strict
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### `make format`
|
| 102 |
+
Format code using black formatter.
|
| 103 |
+
```bash
|
| 104 |
+
make format
|
| 105 |
+
```
|
| 106 |
+
|
| 107 |
+
### `make format-check`
|
| 108 |
+
Check if code is properly formatted without making changes.
|
| 109 |
+
```bash
|
| 110 |
+
make format-check
|
| 111 |
+
```
|
| 112 |
+
|
| 113 |
+
### `make quality`
|
| 114 |
+
Run all code quality checks (format-check + lint).
|
| 115 |
+
```bash
|
| 116 |
+
make quality
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
## Running the Application
|
| 120 |
+
|
| 121 |
+
### `make run-ui`
|
| 122 |
+
Launch the Gradio web interface locally.
|
| 123 |
+
```bash
|
| 124 |
+
make run-ui
|
| 125 |
+
# Open browser to http://localhost:7860
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
### `make run-ui-public`
|
| 129 |
+
Launch Gradio web interface with public sharing enabled.
|
| 130 |
+
```bash
|
| 131 |
+
make run-ui-public
|
| 132 |
+
# Returns a public gradio.app URL for sharing
|
| 133 |
+
```
|
| 134 |
+
|
| 135 |
+
### `make run-single`
|
| 136 |
+
Process a single slide from the command line.
|
| 137 |
+
```bash
|
| 138 |
+
make run-single SLIDE=data/my_slide.svs OUTPUT=output/
|
| 139 |
+
```
|
| 140 |
+
|
| 141 |
+
### `make run-batch`
|
| 142 |
+
Process multiple slides from a CSV file.
|
| 143 |
+
```bash
|
| 144 |
+
make run-batch CSV=data/settings.csv OUTPUT=output/
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
## Docker
|
| 148 |
+
|
| 149 |
+
### `make docker-build`
|
| 150 |
+
Build Docker image for Mosaic.
|
| 151 |
+
```bash
|
| 152 |
+
make docker-build
|
| 153 |
+
|
| 154 |
+
# Build with custom tag
|
| 155 |
+
make docker-build DOCKER_TAG=v1.0.0
|
| 156 |
+
|
| 157 |
+
# Build with custom image name
|
| 158 |
+
make docker-build DOCKER_IMAGE_NAME=my-mosaic DOCKER_TAG=latest
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
### `make docker-build-no-cache`
|
| 162 |
+
Build Docker image without using cache (useful for clean builds).
|
| 163 |
+
```bash
|
| 164 |
+
make docker-build-no-cache
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
### `make docker-run`
|
| 168 |
+
Run Docker container in web UI mode.
|
| 169 |
+
```bash
|
| 170 |
+
make docker-run
|
| 171 |
+
# Access at http://localhost:7860
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
### `make docker-run-single`
|
| 175 |
+
Run Docker container to process a single slide.
|
| 176 |
+
```bash
|
| 177 |
+
# Place your slide in ./data directory first
|
| 178 |
+
make docker-run-single SLIDE=my_slide.svs
|
| 179 |
+
# Results will be in ./output directory
|
| 180 |
+
```
|
| 181 |
+
|
| 182 |
+
### `make docker-run-batch`
|
| 183 |
+
Run Docker container for batch processing.
|
| 184 |
+
```bash
|
| 185 |
+
# Place CSV and slides in ./data directory
|
| 186 |
+
make docker-run-batch CSV=settings.csv
|
| 187 |
+
# Results will be in ./output directory
|
| 188 |
+
```
|
| 189 |
+
|
| 190 |
+
### `make docker-shell`
|
| 191 |
+
Open an interactive shell inside the Docker container.
|
| 192 |
+
```bash
|
| 193 |
+
make docker-shell
|
| 194 |
+
```
|
| 195 |
+
|
| 196 |
+
### `make docker-tag`
|
| 197 |
+
Tag Docker image for pushing to a registry.
|
| 198 |
+
```bash
|
| 199 |
+
make docker-tag DOCKER_REGISTRY=docker.io/myusername
|
| 200 |
+
```
|
| 201 |
+
|
| 202 |
+
### `make docker-push`
|
| 203 |
+
Push Docker image to registry.
|
| 204 |
+
```bash
|
| 205 |
+
# Set your registry first
|
| 206 |
+
make docker-push DOCKER_REGISTRY=docker.io/myusername DOCKER_TAG=latest
|
| 207 |
+
```
|
| 208 |
+
|
| 209 |
+
### `make docker-clean`
|
| 210 |
+
Remove local Docker image.
|
| 211 |
+
```bash
|
| 212 |
+
make docker-clean
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### `make docker-prune`
|
| 216 |
+
Clean up Docker build cache to free space.
|
| 217 |
+
```bash
|
| 218 |
+
make docker-prune
|
| 219 |
+
```
|
| 220 |
+
|
| 221 |
+
## Cleanup
|
| 222 |
+
|
| 223 |
+
### `make clean`
|
| 224 |
+
Remove Python cache files and build artifacts.
|
| 225 |
+
```bash
|
| 226 |
+
make clean
|
| 227 |
+
```
|
| 228 |
+
|
| 229 |
+
### `make clean-outputs`
|
| 230 |
+
Remove generated output files (masks, CSVs).
|
| 231 |
+
```bash
|
| 232 |
+
make clean-outputs
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
### `make clean-all`
|
| 236 |
+
Remove all artifacts, cache, and Docker images.
|
| 237 |
+
```bash
|
| 238 |
+
make clean-all
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
## Model Management
|
| 242 |
+
|
| 243 |
+
### `make download-models`
|
| 244 |
+
Explicitly download required models from HuggingFace.
|
| 245 |
+
```bash
|
| 246 |
+
make download-models
|
| 247 |
+
# Note: Models are automatically downloaded on first run
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
## CI/CD
|
| 251 |
+
|
| 252 |
+
### `make ci-test`
|
| 253 |
+
Run complete CI test suite (install deps, test with coverage, lint).
|
| 254 |
+
```bash
|
| 255 |
+
make ci-test
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
### `make ci-docker`
|
| 259 |
+
Build Docker image for CI pipeline.
|
| 260 |
+
```bash
|
| 261 |
+
make ci-docker
|
| 262 |
+
```
|
| 263 |
+
|
| 264 |
+
## Development Utilities
|
| 265 |
+
|
| 266 |
+
### `make shell`
|
| 267 |
+
Open Python shell with project in path.
|
| 268 |
+
```bash
|
| 269 |
+
make shell
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
### `make ipython`
|
| 273 |
+
Open IPython shell with project in path.
|
| 274 |
+
```bash
|
| 275 |
+
make ipython
|
| 276 |
+
```
|
| 277 |
+
|
| 278 |
+
### `make notebook`
|
| 279 |
+
Start Jupyter notebook server.
|
| 280 |
+
```bash
|
| 281 |
+
make notebook
|
| 282 |
+
```
|
| 283 |
+
|
| 284 |
+
### `make check-deps`
|
| 285 |
+
Check for outdated dependencies.
|
| 286 |
+
```bash
|
| 287 |
+
make check-deps
|
| 288 |
+
```
|
| 289 |
+
|
| 290 |
+
### `make update-deps`
|
| 291 |
+
Update all dependencies (use with caution).
|
| 292 |
+
```bash
|
| 293 |
+
make update-deps
|
| 294 |
+
```
|
| 295 |
+
|
| 296 |
+
### `make lock`
|
| 297 |
+
Update uv.lock file.
|
| 298 |
+
```bash
|
| 299 |
+
make lock
|
| 300 |
+
```
|
| 301 |
+
|
| 302 |
+
## Git Hooks
|
| 303 |
+
|
| 304 |
+
### `make pre-commit-install`
|
| 305 |
+
Install pre-commit hooks that run lint, format-check, and test-fast before each commit.
|
| 306 |
+
```bash
|
| 307 |
+
make pre-commit-install
|
| 308 |
+
```
|
| 309 |
+
|
| 310 |
+
### `make pre-commit-uninstall`
|
| 311 |
+
Remove pre-commit hooks.
|
| 312 |
+
```bash
|
| 313 |
+
make pre-commit-uninstall
|
| 314 |
+
```
|
| 315 |
+
|
| 316 |
+
## Information
|
| 317 |
+
|
| 318 |
+
### `make info`
|
| 319 |
+
Display project information and key commands.
|
| 320 |
+
```bash
|
| 321 |
+
make info
|
| 322 |
+
```
|
| 323 |
+
|
| 324 |
+
### `make version`
|
| 325 |
+
Show version information.
|
| 326 |
+
```bash
|
| 327 |
+
make version
|
| 328 |
+
```
|
| 329 |
+
|
| 330 |
+
### `make tree`
|
| 331 |
+
Show project directory structure (requires `tree` command).
|
| 332 |
+
```bash
|
| 333 |
+
make tree
|
| 334 |
+
```
|
| 335 |
+
|
| 336 |
+
## Performance
|
| 337 |
+
|
| 338 |
+
### `make profile`
|
| 339 |
+
Profile single slide analysis to identify performance bottlenecks.
|
| 340 |
+
```bash
|
| 341 |
+
make profile SLIDE=tests/testdata/948176.svs
|
| 342 |
+
# Creates profile.stats file with profiling data
|
| 343 |
+
```
|
| 344 |
+
|
| 345 |
+
### `make benchmark`
|
| 346 |
+
Run performance benchmarks on test slide.
|
| 347 |
+
```bash
|
| 348 |
+
make benchmark
|
| 349 |
+
# Times full analysis pipeline
|
| 350 |
+
```
|
| 351 |
+
|
| 352 |
+
## Common Workflows
|
| 353 |
+
|
| 354 |
+
### Setting up for development
|
| 355 |
+
```bash
|
| 356 |
+
# 1. Install dependencies
|
| 357 |
+
make install-dev
|
| 358 |
+
|
| 359 |
+
# 2. Run tests to ensure everything works
|
| 360 |
+
make test
|
| 361 |
+
|
| 362 |
+
# 3. Install pre-commit hooks
|
| 363 |
+
make pre-commit-install
|
| 364 |
+
```
|
| 365 |
+
|
| 366 |
+
### Before committing changes
|
| 367 |
+
```bash
|
| 368 |
+
# Run quality checks
|
| 369 |
+
make quality
|
| 370 |
+
|
| 371 |
+
# Run tests
|
| 372 |
+
make test
|
| 373 |
+
|
| 374 |
+
# Clean up
|
| 375 |
+
make clean
|
| 376 |
+
```
|
| 377 |
+
|
| 378 |
+
### Preparing a release
|
| 379 |
+
```bash
|
| 380 |
+
# Run full CI suite
|
| 381 |
+
make ci-test
|
| 382 |
+
|
| 383 |
+
# Build Docker image
|
| 384 |
+
make docker-build DOCKER_TAG=v1.0.0
|
| 385 |
+
|
| 386 |
+
# Test Docker image
|
| 387 |
+
make docker-run DOCKER_TAG=v1.0.0
|
| 388 |
+
|
| 389 |
+
# Push to registry
|
| 390 |
+
make docker-push DOCKER_REGISTRY=your-registry DOCKER_TAG=v1.0.0
|
| 391 |
+
```
|
| 392 |
+
|
| 393 |
+
### Processing slides
|
| 394 |
+
```bash
|
| 395 |
+
# Web UI (recommended for exploration)
|
| 396 |
+
make run-ui
|
| 397 |
+
|
| 398 |
+
# Single slide (CLI)
|
| 399 |
+
make run-single SLIDE=data/sample.svs OUTPUT=results/
|
| 400 |
+
|
| 401 |
+
# Batch processing (CLI)
|
| 402 |
+
make run-batch CSV=data/batch_settings.csv OUTPUT=results/
|
| 403 |
+
|
| 404 |
+
# Using Docker
|
| 405 |
+
make docker-build
|
| 406 |
+
make docker-run-batch CSV=batch_settings.csv
|
| 407 |
+
```
|
| 408 |
+
|
| 409 |
+
## Customization
|
| 410 |
+
|
| 411 |
+
You can customize Makefile behavior by setting environment variables or editing the Makefile:
|
| 412 |
+
|
| 413 |
+
```bash
|
| 414 |
+
# Custom Docker registry
|
| 415 |
+
export DOCKER_REGISTRY=my-registry.com/username
|
| 416 |
+
|
| 417 |
+
# Custom image name
|
| 418 |
+
export DOCKER_IMAGE_NAME=my-custom-mosaic
|
| 419 |
+
|
| 420 |
+
# Then use make commands as normal
|
| 421 |
+
make docker-build
|
| 422 |
+
make docker-push
|
| 423 |
+
```
|
| 424 |
+
|
| 425 |
+
## Troubleshooting
|
| 426 |
+
|
| 427 |
+
### Tests fail
|
| 428 |
+
```bash
|
| 429 |
+
# Run with verbose output
|
| 430 |
+
make test-verbose
|
| 431 |
+
|
| 432 |
+
# Run specific failing test
|
| 433 |
+
make test-specific TEST=tests/test_file.py::test_name
|
| 434 |
+
```
|
| 435 |
+
|
| 436 |
+
### Docker build fails
|
| 437 |
+
```bash
|
| 438 |
+
# Build without cache
|
| 439 |
+
make docker-build-no-cache
|
| 440 |
+
|
| 441 |
+
# Check Docker logs
|
| 442 |
+
docker logs <container-id>
|
| 443 |
+
```
|
| 444 |
+
|
| 445 |
+
### Permission errors
|
| 446 |
+
```bash
|
| 447 |
+
# Clean and rebuild
|
| 448 |
+
make clean-all
|
| 449 |
+
make install-dev
|
| 450 |
+
```
|
| 451 |
+
|
| 452 |
+
### Out of disk space
|
| 453 |
+
```bash
|
| 454 |
+
# Clean Docker cache
|
| 455 |
+
make docker-prune
|
| 456 |
+
|
| 457 |
+
# Clean project artifacts
|
| 458 |
+
make clean
|
| 459 |
+
```
|
Makefile
ADDED
|
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.PHONY: help install install-dev test test-coverage test-verbose lint format clean docker-build docker-run docker-push docker-clean run-ui run-cli
|
| 2 |
+
|
| 3 |
+
# Default target
|
| 4 |
+
.DEFAULT_GOAL := help
|
| 5 |
+
|
| 6 |
+
# Variables
|
| 7 |
+
DOCKER_IMAGE_NAME := mosaic
|
| 8 |
+
DOCKER_TAG := latest
|
| 9 |
+
DOCKER_REGISTRY := # Set your registry here (e.g., docker.io/username)
|
| 10 |
+
PYTHON := uv run python
|
| 11 |
+
PYTEST := uv run pytest
|
| 12 |
+
BLACK := uv run black
|
| 13 |
+
PYLINT := uv run pylint
|
| 14 |
+
|
| 15 |
+
##@ General
|
| 16 |
+
|
| 17 |
+
help: ## Display this help message
|
| 18 |
+
@awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m<target>\033[0m\n"} /^[a-zA-Z_-]+:.*?##/ { printf " \033[36m%-20s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST)
|
| 19 |
+
|
| 20 |
+
##@ Development Setup
|
| 21 |
+
|
| 22 |
+
install: ## Install production dependencies using uv
|
| 23 |
+
uv sync --no-dev
|
| 24 |
+
|
| 25 |
+
install-dev: ## Install development dependencies using uv
|
| 26 |
+
uv sync
|
| 27 |
+
|
| 28 |
+
##@ Testing
|
| 29 |
+
|
| 30 |
+
test: ## Run all tests
|
| 31 |
+
$(PYTEST) tests/ -v
|
| 32 |
+
|
| 33 |
+
test-fast: ## Run tests without coverage (faster)
|
| 34 |
+
$(PYTEST) tests/ -v --no-cov
|
| 35 |
+
|
| 36 |
+
test-coverage: ## Run tests with detailed coverage report
|
| 37 |
+
$(PYTEST) tests/ -v --cov=src/mosaic --cov-report=term-missing --cov-report=html
|
| 38 |
+
|
| 39 |
+
test-ui: ## Run only UI tests
|
| 40 |
+
$(PYTEST) tests/test_ui_components.py tests/test_ui_events.py -v
|
| 41 |
+
|
| 42 |
+
test-cli: ## Run only CLI tests
|
| 43 |
+
$(PYTEST) tests/test_cli.py -v
|
| 44 |
+
|
| 45 |
+
test-verbose: ## Run tests with verbose output and show print statements
|
| 46 |
+
$(PYTEST) tests/ -vv -s
|
| 47 |
+
|
| 48 |
+
test-specific: ## Run specific test (usage: make test-specific TEST=tests/test_cli.py::TestClass::test_method)
|
| 49 |
+
$(PYTEST) $(TEST) -v
|
| 50 |
+
|
| 51 |
+
test-watch: ## Run tests in watch mode (requires pytest-watch)
|
| 52 |
+
$(PYTEST) tests/ --watch
|
| 53 |
+
|
| 54 |
+
##@ Code Quality
|
| 55 |
+
|
| 56 |
+
lint: ## Run linting checks with pylint
|
| 57 |
+
$(PYLINT) src/mosaic/
|
| 58 |
+
|
| 59 |
+
lint-strict: ## Run pylint on both src and tests
|
| 60 |
+
$(PYLINT) src/mosaic/ tests/
|
| 61 |
+
|
| 62 |
+
format: ## Format code with black
|
| 63 |
+
$(BLACK) src/ tests/
|
| 64 |
+
|
| 65 |
+
format-check: ## Check code formatting without making changes
|
| 66 |
+
$(BLACK) --check src/ tests/
|
| 67 |
+
|
| 68 |
+
quality: format-check lint ## Run all code quality checks
|
| 69 |
+
|
| 70 |
+
##@ Application
|
| 71 |
+
|
| 72 |
+
run-ui: ## Launch Gradio web interface
|
| 73 |
+
$(PYTHON) -m mosaic.gradio_app
|
| 74 |
+
|
| 75 |
+
run-ui-public: ## Launch Gradio web interface with public sharing
|
| 76 |
+
$(PYTHON) -m mosaic.gradio_app --share
|
| 77 |
+
|
| 78 |
+
run-single: ## Run single slide analysis (usage: make run-single SLIDE=path/to/slide.svs OUTPUT=output_dir)
|
| 79 |
+
$(PYTHON) -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir $(OUTPUT)
|
| 80 |
+
|
| 81 |
+
run-batch: ## Run batch analysis from CSV (usage: make run-batch CSV=settings.csv OUTPUT=output_dir)
|
| 82 |
+
$(PYTHON) -m mosaic.gradio_app --slide-csv $(CSV) --output-dir $(OUTPUT)
|
| 83 |
+
|
| 84 |
+
##@ Docker
|
| 85 |
+
|
| 86 |
+
docker-build: ## Build Docker image
|
| 87 |
+
docker build -t $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) .
|
| 88 |
+
|
| 89 |
+
docker-build-no-cache: ## Build Docker image without cache
|
| 90 |
+
docker build --no-cache -t $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) .
|
| 91 |
+
|
| 92 |
+
docker-run: ## Run Docker container (web UI mode)
|
| 93 |
+
docker run -it --rm \
|
| 94 |
+
--gpus all \
|
| 95 |
+
-p 7860:7860 \
|
| 96 |
+
-v $(PWD)/data:/app/data \
|
| 97 |
+
-v $(PWD)/output:/app/output \
|
| 98 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 99 |
+
|
| 100 |
+
docker-run-single: ## Run Docker container (single slide mode)
|
| 101 |
+
docker run -it --rm \
|
| 102 |
+
--gpus all \
|
| 103 |
+
-v $(PWD)/data:/app/data \
|
| 104 |
+
-v $(PWD)/output:/app/output \
|
| 105 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 106 |
+
--slide-path /app/data/$(SLIDE) \
|
| 107 |
+
--output-dir /app/output
|
| 108 |
+
|
| 109 |
+
docker-run-batch: ## Run Docker container (batch mode)
|
| 110 |
+
docker run -it --rm \
|
| 111 |
+
--gpus all \
|
| 112 |
+
-v $(PWD)/data:/app/data \
|
| 113 |
+
-v $(PWD)/output:/app/output \
|
| 114 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 115 |
+
--slide-csv /app/data/$(CSV) \
|
| 116 |
+
--output-dir /app/output
|
| 117 |
+
|
| 118 |
+
docker-shell: ## Open shell in Docker container
|
| 119 |
+
docker run -it --rm \
|
| 120 |
+
--gpus all \
|
| 121 |
+
-v $(PWD)/data:/app/data \
|
| 122 |
+
-v $(PWD)/output:/app/output \
|
| 123 |
+
$(DOCKER_IMAGE_NAME):$(DOCKER_TAG) \
|
| 124 |
+
/bin/bash
|
| 125 |
+
|
| 126 |
+
docker-tag: ## Tag Docker image for registry
|
| 127 |
+
docker tag $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 128 |
+
|
| 129 |
+
docker-push: docker-tag ## Push Docker image to registry
|
| 130 |
+
docker push $(DOCKER_REGISTRY)/$(DOCKER_IMAGE_NAME):$(DOCKER_TAG)
|
| 131 |
+
|
| 132 |
+
docker-clean: ## Remove Docker image
|
| 133 |
+
docker rmi $(DOCKER_IMAGE_NAME):$(DOCKER_TAG) || true
|
| 134 |
+
|
| 135 |
+
docker-prune: ## Clean up Docker build cache
|
| 136 |
+
docker system prune -f
|
| 137 |
+
docker builder prune -f
|
| 138 |
+
|
| 139 |
+
##@ Cleanup
|
| 140 |
+
|
| 141 |
+
clean: ## Remove build artifacts and cache files
|
| 142 |
+
find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
|
| 143 |
+
find . -type d -name "*.egg-info" -exec rm -rf {} + 2>/dev/null || true
|
| 144 |
+
find . -type d -name ".pytest_cache" -exec rm -rf {} + 2>/dev/null || true
|
| 145 |
+
find . -type d -name ".ruff_cache" -exec rm -rf {} + 2>/dev/null || true
|
| 146 |
+
find . -type f -name "*.pyc" -delete
|
| 147 |
+
find . -type f -name "*.pyo" -delete
|
| 148 |
+
find . -type f -name ".coverage" -delete
|
| 149 |
+
rm -rf htmlcov/
|
| 150 |
+
rm -rf dist/
|
| 151 |
+
rm -rf build/
|
| 152 |
+
|
| 153 |
+
clean-outputs: ## Remove output files (masks, results CSVs)
|
| 154 |
+
rm -rf output/*
|
| 155 |
+
@echo "Output directory cleaned"
|
| 156 |
+
|
| 157 |
+
clean-all: clean docker-clean ## Remove all build artifacts, cache, and Docker images
|
| 158 |
+
|
| 159 |
+
##@ Model Management
|
| 160 |
+
|
| 161 |
+
download-models: ## Download required models from HuggingFace
|
| 162 |
+
@echo "Models will be downloaded automatically on first run"
|
| 163 |
+
$(PYTHON) -c "from mosaic.gradio_app import download_and_process_models; download_and_process_models()"
|
| 164 |
+
|
| 165 |
+
##@ Documentation
|
| 166 |
+
|
| 167 |
+
docs-requirements: ## Show what needs to be documented
|
| 168 |
+
@echo "Documentation TODO:"
|
| 169 |
+
@echo " - API documentation"
|
| 170 |
+
@echo " - Model architecture details"
|
| 171 |
+
@echo " - CLI usage examples"
|
| 172 |
+
@echo " - Docker deployment guide"
|
| 173 |
+
|
| 174 |
+
##@ CI/CD
|
| 175 |
+
|
| 176 |
+
ci-test: install-dev test-coverage format-check ## Run all CI checks (no lint to save time)
|
| 177 |
+
@echo "All CI checks passed!"
|
| 178 |
+
|
| 179 |
+
ci-test-strict: install-dev test-coverage format-check lint ## Run all CI checks including pylint
|
| 180 |
+
@echo "All strict CI checks passed!"
|
| 181 |
+
|
| 182 |
+
ci-docker: docker-build ## Build Docker image for CI
|
| 183 |
+
@echo "Docker image built successfully"
|
| 184 |
+
|
| 185 |
+
##@ Development Utilities
|
| 186 |
+
|
| 187 |
+
shell: ## Open Python shell with project in path
|
| 188 |
+
$(PYTHON)
|
| 189 |
+
|
| 190 |
+
ipython: ## Open IPython shell with project in path
|
| 191 |
+
uv run ipython
|
| 192 |
+
|
| 193 |
+
notebook: ## Start Jupyter notebook server
|
| 194 |
+
uv run jupyter notebook
|
| 195 |
+
|
| 196 |
+
check-deps: ## Check for outdated dependencies
|
| 197 |
+
uv pip list --outdated
|
| 198 |
+
|
| 199 |
+
update-deps: ## Update dependencies (be careful!)
|
| 200 |
+
uv sync --upgrade
|
| 201 |
+
|
| 202 |
+
lock: ## Update lock file
|
| 203 |
+
uv lock
|
| 204 |
+
|
| 205 |
+
##@ Git Hooks
|
| 206 |
+
|
| 207 |
+
pre-commit-install: ## Install pre-commit hooks
|
| 208 |
+
@echo "Setting up pre-commit hooks..."
|
| 209 |
+
@echo "#!/bin/sh" > .git/hooks/pre-commit
|
| 210 |
+
@echo "make format-check test-fast" >> .git/hooks/pre-commit
|
| 211 |
+
@chmod +x .git/hooks/pre-commit
|
| 212 |
+
@echo "Pre-commit hooks installed (format-check + test-fast)"
|
| 213 |
+
|
| 214 |
+
pre-commit-uninstall: ## Uninstall pre-commit hooks
|
| 215 |
+
rm -f .git/hooks/pre-commit
|
| 216 |
+
@echo "Pre-commit hooks uninstalled"
|
| 217 |
+
|
| 218 |
+
##@ Information
|
| 219 |
+
|
| 220 |
+
info: ## Display project information
|
| 221 |
+
@echo "Mosaic - H&E Whole Slide Image Analysis"
|
| 222 |
+
@echo "========================================"
|
| 223 |
+
@echo ""
|
| 224 |
+
@echo "Python version:"
|
| 225 |
+
@$(PYTHON) --version
|
| 226 |
+
@echo ""
|
| 227 |
+
@echo "UV version:"
|
| 228 |
+
@uv --version
|
| 229 |
+
@echo ""
|
| 230 |
+
@echo "Project structure:"
|
| 231 |
+
@echo " src/mosaic/ - Main application code"
|
| 232 |
+
@echo " tests/ - Test suite"
|
| 233 |
+
@echo " data/ - Input data directory"
|
| 234 |
+
@echo " output/ - Analysis results"
|
| 235 |
+
@echo ""
|
| 236 |
+
@echo "Key commands:"
|
| 237 |
+
@echo " make install-dev - Setup development environment"
|
| 238 |
+
@echo " make test - Run test suite"
|
| 239 |
+
@echo " make run-ui - Launch web interface"
|
| 240 |
+
@echo " make docker-build - Build Docker image"
|
| 241 |
+
|
| 242 |
+
version: ## Show version information
|
| 243 |
+
@$(PYTHON) -c "import mosaic; print(f'Mosaic version: {mosaic.__version__}')" 2>/dev/null || echo "Version info not available"
|
| 244 |
+
|
| 245 |
+
tree: ## Show project directory tree (requires tree command)
|
| 246 |
+
@tree -L 3 -I '__pycache__|*.pyc|*.egg-info|.pytest_cache|.ruff_cache|htmlcov|.venv' . || echo "tree command not found. Install with: apt-get install tree"
|
| 247 |
+
|
| 248 |
+
##@ Performance
|
| 249 |
+
|
| 250 |
+
profile: ## Profile a single slide analysis (usage: make profile SLIDE=path/to/slide.svs)
|
| 251 |
+
$(PYTHON) -m cProfile -o profile.stats -m mosaic.gradio_app --slide-path $(SLIDE) --output-dir profile_output
|
| 252 |
+
$(PYTHON) -c "import pstats; p = pstats.Stats('profile.stats'); p.sort_stats('cumulative'); p.print_stats(20)"
|
| 253 |
+
|
| 254 |
+
benchmark: ## Run performance benchmarks
|
| 255 |
+
@echo "Running benchmark suite..."
|
| 256 |
+
@echo "This will process the test slide and measure performance"
|
| 257 |
+
time $(PYTHON) -m mosaic.gradio_app --slide-path tests/testdata/948176.svs --output-dir benchmark_output
|
src/mosaic/analysis.py
CHANGED
|
@@ -26,8 +26,10 @@ except ImportError:
|
|
| 26 |
return lambda f: f
|
| 27 |
return fn
|
| 28 |
|
|
|
|
| 29 |
# Detect T4 hardware by checking actual GPU
|
| 30 |
import torch
|
|
|
|
| 31 |
IS_T4_GPU = False
|
| 32 |
GPU_NAME = "Unknown"
|
| 33 |
if not IS_ZEROGPU and torch.cuda.is_available():
|
|
@@ -64,18 +66,21 @@ from mosaic.inference import run_aeon, run_paladin
|
|
| 64 |
from mosaic.data_directory import get_data_directory
|
| 65 |
|
| 66 |
# Log hardware detection at module load
|
| 67 |
-
logger.info(
|
|
|
|
|
|
|
|
|
|
| 68 |
|
|
|
|
|
|
|
| 69 |
|
| 70 |
-
def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
|
| 71 |
-
"""Extract CTransPath features on GPU.
|
| 72 |
-
|
| 73 |
Args:
|
| 74 |
coords: Tissue tile coordinates
|
| 75 |
slide_path: Path to the whole slide image file
|
| 76 |
attrs: Slide attributes
|
| 77 |
num_workers: Number of worker processes
|
| 78 |
-
|
|
|
|
| 79 |
Returns:
|
| 80 |
tuple: (ctranspath_features, coords)
|
| 81 |
"""
|
|
@@ -86,87 +91,92 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
|
|
| 86 |
elif IS_T4_GPU:
|
| 87 |
num_workers = DEFAULT_NUM_WORKERS
|
| 88 |
batch_size = DEFAULT_BATCH_SIZE
|
| 89 |
-
logger.info(
|
|
|
|
|
|
|
| 90 |
else:
|
| 91 |
num_workers = max(num_workers, 8)
|
| 92 |
batch_size = 64
|
| 93 |
logger.info(f"Running CTransPath with {num_workers} workers")
|
| 94 |
-
|
| 95 |
start_time = pd.Timestamp.now()
|
| 96 |
|
| 97 |
-
data_dir = get_data_directory()
|
| 98 |
ctranspath_features, _ = get_features(
|
| 99 |
coords,
|
| 100 |
slide_path,
|
| 101 |
attrs,
|
| 102 |
-
|
| 103 |
-
model_path=str(data_dir / "ctranspath.pth"),
|
| 104 |
num_workers=num_workers,
|
| 105 |
batch_size=batch_size,
|
| 106 |
use_gpu=True,
|
| 107 |
)
|
| 108 |
-
|
| 109 |
end_time = pd.Timestamp.now()
|
| 110 |
logger.info(f"CTransPath extraction took {end_time - start_time}")
|
| 111 |
-
|
| 112 |
return ctranspath_features, coords
|
| 113 |
|
| 114 |
|
| 115 |
-
def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
|
| 116 |
-
"""Extract Optimus features on GPU.
|
| 117 |
-
|
| 118 |
Args:
|
| 119 |
filtered_coords: Filtered tissue tile coordinates
|
| 120 |
slide_path: Path to the whole slide image file
|
| 121 |
attrs: Slide attributes
|
| 122 |
num_workers: Number of worker processes
|
| 123 |
-
|
|
|
|
| 124 |
Returns:
|
| 125 |
Optimus features
|
| 126 |
"""
|
| 127 |
if IS_ZEROGPU:
|
| 128 |
num_workers = 0
|
| 129 |
batch_size = 128
|
| 130 |
-
logger.info(
|
|
|
|
|
|
|
| 131 |
elif IS_T4_GPU:
|
| 132 |
num_workers = DEFAULT_NUM_WORKERS
|
| 133 |
batch_size = DEFAULT_BATCH_SIZE
|
| 134 |
-
logger.info(
|
|
|
|
|
|
|
| 135 |
else:
|
| 136 |
num_workers = max(num_workers, 8)
|
| 137 |
batch_size = 64
|
| 138 |
logger.info(f"Running Optimus with {num_workers} workers")
|
| 139 |
-
|
| 140 |
start_time = pd.Timestamp.now()
|
| 141 |
|
| 142 |
-
data_dir = get_data_directory()
|
| 143 |
features, _ = get_features(
|
| 144 |
filtered_coords,
|
| 145 |
slide_path,
|
| 146 |
attrs,
|
| 147 |
-
|
| 148 |
-
model_path=str(data_dir / "optimus.pkl"),
|
| 149 |
num_workers=num_workers,
|
| 150 |
batch_size=batch_size,
|
| 151 |
use_gpu=True,
|
| 152 |
)
|
| 153 |
-
|
| 154 |
end_time = pd.Timestamp.now()
|
| 155 |
logger.info(f"Optimus extraction took {end_time - start_time}")
|
| 156 |
-
|
| 157 |
return features
|
| 158 |
|
| 159 |
|
| 160 |
-
def _run_aeon_inference(
|
|
|
|
|
|
|
| 161 |
"""Run Aeon cancer subtype inference on GPU.
|
| 162 |
-
|
| 163 |
Args:
|
| 164 |
features: Optimus features
|
| 165 |
site_type: Site type ("Primary" or "Metastatic")
|
| 166 |
num_workers: Number of worker processes
|
| 167 |
sex: Patient sex (0=Male, 1=Female), optional
|
| 168 |
tissue_site_idx: Tissue site index (0-56), optional
|
| 169 |
-
|
| 170 |
Returns:
|
| 171 |
Aeon results DataFrame
|
| 172 |
"""
|
|
@@ -179,7 +189,7 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
|
|
| 179 |
else:
|
| 180 |
num_workers = max(num_workers, 8)
|
| 181 |
logger.info(f"Running Aeon with num_workers={num_workers}")
|
| 182 |
-
|
| 183 |
start_time = pd.Timestamp.now()
|
| 184 |
logger.info("Running Aeon for cancer subtype inference")
|
| 185 |
data_dir = get_data_directory()
|
|
@@ -194,7 +204,7 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
|
|
| 194 |
use_cpu=False,
|
| 195 |
)
|
| 196 |
end_time = pd.Timestamp.now()
|
| 197 |
-
|
| 198 |
# Log memory stats if CUDA is available
|
| 199 |
if torch.cuda.is_available():
|
| 200 |
try:
|
|
@@ -207,19 +217,19 @@ def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_
|
|
| 207 |
logger.info(f"Aeon inference took {end_time - start_time}")
|
| 208 |
else:
|
| 209 |
logger.info(f"Aeon inference took {end_time - start_time}")
|
| 210 |
-
|
| 211 |
return aeon_results
|
| 212 |
|
| 213 |
|
| 214 |
def _run_paladin_inference(features, aeon_results, site_type, num_workers):
|
| 215 |
"""Run Paladin biomarker inference on GPU.
|
| 216 |
-
|
| 217 |
Args:
|
| 218 |
features: Optimus features
|
| 219 |
aeon_results: Aeon results DataFrame
|
| 220 |
site_type: Site type ("Primary" or "Metastatic")
|
| 221 |
num_workers: Number of worker processes
|
| 222 |
-
|
| 223 |
Returns:
|
| 224 |
Paladin results DataFrame
|
| 225 |
"""
|
|
@@ -232,7 +242,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
|
|
| 232 |
else:
|
| 233 |
num_workers = max(num_workers, 8)
|
| 234 |
logger.info(f"Running Paladin with num_workers={num_workers}")
|
| 235 |
-
|
| 236 |
start_time = pd.Timestamp.now()
|
| 237 |
logger.info("Running Paladin for biomarker inference")
|
| 238 |
data_dir = get_data_directory()
|
|
@@ -246,7 +256,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
|
|
| 246 |
use_cpu=False,
|
| 247 |
)
|
| 248 |
end_time = pd.Timestamp.now()
|
| 249 |
-
|
| 250 |
# Log memory stats if CUDA is available
|
| 251 |
if torch.cuda.is_available():
|
| 252 |
try:
|
|
@@ -259,7 +269,7 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
|
|
| 259 |
logger.info(f"Paladin inference took {end_time - start_time}")
|
| 260 |
else:
|
| 261 |
logger.info(f"Paladin inference took {end_time - start_time}")
|
| 262 |
-
|
| 263 |
return paladin_results
|
| 264 |
|
| 265 |
|
|
@@ -278,8 +288,16 @@ def _run_inference_pipeline_free(
|
|
| 278 |
):
|
| 279 |
"""Run inference pipeline with 60s GPU limit (for free users)."""
|
| 280 |
return _run_inference_pipeline_impl(
|
| 281 |
-
coords,
|
| 282 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
)
|
| 284 |
|
| 285 |
|
|
@@ -298,8 +316,16 @@ def _run_inference_pipeline_pro(
|
|
| 298 |
):
|
| 299 |
"""Run inference pipeline with 300s GPU limit (for PRO users)."""
|
| 300 |
return _run_inference_pipeline_impl(
|
| 301 |
-
coords,
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
)
|
| 304 |
|
| 305 |
|
|
@@ -315,11 +341,10 @@ def _run_inference_pipeline_impl(
|
|
| 315 |
num_workers,
|
| 316 |
progress,
|
| 317 |
):
|
| 318 |
-
"""Run complete inference pipeline
|
| 319 |
|
| 320 |
-
This function
|
| 321 |
-
|
| 322 |
-
independently for each operation.
|
| 323 |
|
| 324 |
Args:
|
| 325 |
coords: Tissue tile coordinates
|
|
@@ -336,59 +361,84 @@ def _run_inference_pipeline_impl(
|
|
| 336 |
- aeon_results: DataFrame with cancer subtype predictions and confidence scores
|
| 337 |
- paladin_results: DataFrame with biomarker predictions
|
| 338 |
"""
|
| 339 |
-
#
|
| 340 |
-
|
| 341 |
-
ctranspath_features, coords = _extract_ctranspath_features(
|
| 342 |
-
coords, slide_path, attrs, num_workers
|
| 343 |
-
)
|
| 344 |
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
marker_classifier = pickle.load(open(data_dir / "marker_classifier.pkl", "rb"))
|
| 349 |
-
progress(0.35, desc="Filtering features with marker classifier")
|
| 350 |
-
logger.info("Filtering features with marker classifier")
|
| 351 |
-
_, filtered_coords = filter_features(
|
| 352 |
-
ctranspath_features,
|
| 353 |
-
coords,
|
| 354 |
-
marker_classifier,
|
| 355 |
-
threshold=0.25,
|
| 356 |
-
)
|
| 357 |
-
end_time = pd.Timestamp.now()
|
| 358 |
-
logger.info(f"Feature filtering took {end_time - start_time}")
|
| 359 |
-
logger.info(
|
| 360 |
-
f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
|
| 361 |
-
)
|
| 362 |
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
progress(0.
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
|
|
|
|
|
|
|
|
|
| 378 |
)
|
| 379 |
-
logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
|
| 380 |
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
|
|
|
|
|
|
| 388 |
|
| 389 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
|
| 391 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 392 |
|
| 393 |
|
| 394 |
# ============================================================================
|
|
@@ -531,11 +581,10 @@ def _run_inference_pipeline_with_models(
|
|
| 531 |
Returns:
|
| 532 |
Tuple of (aeon_results, paladin_results)
|
| 533 |
"""
|
| 534 |
-
# Step 1: Extract CTransPath features
|
| 535 |
-
# Note: Feature extraction optimization can be added later if needed
|
| 536 |
progress(0.3, desc="Extracting CTransPath features")
|
| 537 |
ctranspath_features, coords = _extract_ctranspath_features(
|
| 538 |
-
coords, slide_path, attrs, num_workers
|
| 539 |
)
|
| 540 |
|
| 541 |
# Step 2: Filter features using pre-loaded marker classifier
|
|
@@ -554,9 +603,11 @@ def _run_inference_pipeline_with_models(
|
|
| 554 |
f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
|
| 555 |
)
|
| 556 |
|
| 557 |
-
# Step 3: Extract Optimus features
|
| 558 |
progress(0.5, desc="Extracting Optimus features")
|
| 559 |
-
features = _extract_optimus_features(
|
|
|
|
|
|
|
| 560 |
|
| 561 |
# Step 4: Run Aeon inference with pre-loaded model (if cancer subtype unknown)
|
| 562 |
aeon_results = None
|
|
@@ -564,7 +615,9 @@ def _run_inference_pipeline_with_models(
|
|
| 564 |
|
| 565 |
# Check if cancer subtype is unknown
|
| 566 |
if cancer_subtype in ["Unknown", None]:
|
| 567 |
-
logger.info(
|
|
|
|
|
|
|
| 568 |
aeon_results = _run_aeon_inference_with_model(
|
| 569 |
features,
|
| 570 |
model_cache.aeon_model, # Use pre-loaded Aeon model
|
|
@@ -593,116 +646,7 @@ def _run_inference_pipeline_with_models(
|
|
| 593 |
return aeon_results, paladin_results
|
| 594 |
|
| 595 |
|
| 596 |
-
|
| 597 |
-
slide_path,
|
| 598 |
-
seg_config,
|
| 599 |
-
site_type,
|
| 600 |
-
sex,
|
| 601 |
-
tissue_site,
|
| 602 |
-
cancer_subtype,
|
| 603 |
-
cancer_subtype_name_map,
|
| 604 |
-
model_cache,
|
| 605 |
-
ihc_subtype="",
|
| 606 |
-
num_workers=4,
|
| 607 |
-
progress=None,
|
| 608 |
-
):
|
| 609 |
-
"""Analyze a slide using pre-loaded models (batch-optimized version).
|
| 610 |
-
|
| 611 |
-
This function is optimized for batch processing where models are loaded once
|
| 612 |
-
in a ModelCache and reused across multiple slides.
|
| 613 |
-
|
| 614 |
-
Args:
|
| 615 |
-
slide_path: Path to the slide file
|
| 616 |
-
seg_config: Segmentation configuration ("Biopsy", "Resection", or "TCGA")
|
| 617 |
-
site_type: "Primary" or "Metastatic"
|
| 618 |
-
sex: Patient sex ("Unknown", "Male", "Female")
|
| 619 |
-
tissue_site: Tissue site name
|
| 620 |
-
cancer_subtype: Known cancer subtype or "Unknown"
|
| 621 |
-
cancer_subtype_name_map: Dict mapping display names to OncoTree codes
|
| 622 |
-
model_cache: ModelCache instance with pre-loaded models
|
| 623 |
-
ihc_subtype: IHC subtype for breast cancer (optional)
|
| 624 |
-
num_workers: Number of workers for data loading
|
| 625 |
-
progress: Gradio progress tracker
|
| 626 |
-
|
| 627 |
-
Returns:
|
| 628 |
-
Tuple of (slide_mask, aeon_results, paladin_results)
|
| 629 |
-
"""
|
| 630 |
-
from mosaic.inference.data import encode_sex, encode_tissue_site
|
| 631 |
-
|
| 632 |
-
if progress is None:
|
| 633 |
-
progress = lambda frac, desc: None # No-op progress function
|
| 634 |
-
|
| 635 |
-
# Encode sex and tissue site
|
| 636 |
-
sex_idx = encode_sex(sex) if sex else None
|
| 637 |
-
tissue_site_idx = encode_tissue_site(tissue_site) if tissue_site else None
|
| 638 |
-
|
| 639 |
-
# Step 1: Convert seg_config string to config object
|
| 640 |
-
if isinstance(seg_config, str):
|
| 641 |
-
if seg_config == "Biopsy":
|
| 642 |
-
seg_config = BiopsySegConfig()
|
| 643 |
-
elif seg_config == "Resection":
|
| 644 |
-
seg_config = ResectionSegConfig()
|
| 645 |
-
elif seg_config == "TCGA":
|
| 646 |
-
seg_config = TcgaSegConfig()
|
| 647 |
-
else:
|
| 648 |
-
raise ValueError(f"Unknown segmentation configuration: {seg_config}")
|
| 649 |
-
|
| 650 |
-
# Step 2: Tissue segmentation (CPU operation, not affected by model caching)
|
| 651 |
-
progress(0.0, desc="Segmenting tissue")
|
| 652 |
-
logger.info(f"Segmenting tissue for slide: {slide_path}")
|
| 653 |
-
start_time = pd.Timestamp.now()
|
| 654 |
-
|
| 655 |
-
if values := segment_tissue(
|
| 656 |
-
slide_path=slide_path,
|
| 657 |
-
patch_size=224,
|
| 658 |
-
mpp=0.5,
|
| 659 |
-
seg_level=-1,
|
| 660 |
-
segment_threshold=seg_config.segment_threshold,
|
| 661 |
-
median_blur_ksize=seg_config.median_blur_ksize,
|
| 662 |
-
morphology_ex_kernel=seg_config.morphology_ex_kernel,
|
| 663 |
-
tissue_area_threshold=seg_config.tissue_area_threshold,
|
| 664 |
-
hole_area_threshold=seg_config.hole_area_threshold,
|
| 665 |
-
max_num_holes=seg_config.max_num_holes,
|
| 666 |
-
):
|
| 667 |
-
polygon, _, coords, attrs = values
|
| 668 |
-
else:
|
| 669 |
-
logger.warning("No tissue detected in slide")
|
| 670 |
-
return None, None, None
|
| 671 |
-
|
| 672 |
-
end_time = pd.Timestamp.now()
|
| 673 |
-
logger.info(f"Tissue segmentation took {end_time - start_time}")
|
| 674 |
-
logger.info(f"Found {len(coords)} tissue tiles")
|
| 675 |
-
|
| 676 |
-
if len(coords) == 0:
|
| 677 |
-
logger.warning("No tissue tiles found in slide")
|
| 678 |
-
return None, None, None
|
| 679 |
-
|
| 680 |
-
# Step 2: Create slide mask visualization (CPU operation)
|
| 681 |
-
progress(0.2, desc="Creating slide mask")
|
| 682 |
-
logger.info("Drawing slide mask")
|
| 683 |
-
slide_mask = draw_slide_mask(
|
| 684 |
-
slide_path, polygon, outline="black", fill=(255, 0, 0, 80), vis_level=-1
|
| 685 |
-
)
|
| 686 |
-
logger.info("Slide mask drawn")
|
| 687 |
-
|
| 688 |
-
# Step 3: Run inference pipeline with pre-loaded models
|
| 689 |
-
aeon_results, paladin_results = _run_inference_pipeline_with_models(
|
| 690 |
-
coords,
|
| 691 |
-
slide_path,
|
| 692 |
-
attrs,
|
| 693 |
-
site_type,
|
| 694 |
-
sex_idx,
|
| 695 |
-
tissue_site_idx,
|
| 696 |
-
cancer_subtype,
|
| 697 |
-
cancer_subtype_name_map,
|
| 698 |
-
model_cache,
|
| 699 |
-
num_workers,
|
| 700 |
-
progress,
|
| 701 |
-
)
|
| 702 |
-
|
| 703 |
-
progress(1.0, desc="Analysis complete")
|
| 704 |
-
|
| 705 |
-
return slide_mask, aeon_results, paladin_results
|
| 706 |
|
| 707 |
|
| 708 |
def analyze_slide(
|
|
@@ -717,26 +661,27 @@ def analyze_slide(
|
|
| 717 |
num_workers=4,
|
| 718 |
progress=gr.Progress(track_tqdm=True),
|
| 719 |
request: gr.Request = None,
|
|
|
|
| 720 |
):
|
| 721 |
"""Analyze a whole slide image for cancer subtype and biomarker prediction.
|
| 722 |
|
| 723 |
-
This function
|
| 724 |
-
1.
|
| 725 |
-
2.
|
| 726 |
-
|
| 727 |
-
The GPU-intensive operations are handled by a separate function decorated
|
| 728 |
-
with @spaces.GPU to efficiently manage GPU resources on Hugging Face Spaces.
|
| 729 |
-
Tissue segmentation runs on CPU and is not included in the GPU allocation.
|
| 730 |
|
| 731 |
Args:
|
| 732 |
slide_path: Path to the whole slide image file
|
| 733 |
seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
|
| 734 |
site_type: Site type, either "Primary" or "Metastatic"
|
|
|
|
|
|
|
| 735 |
cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
|
| 736 |
cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
|
| 737 |
ihc_subtype: IHC subtype for breast cancer (optional)
|
| 738 |
num_workers: Number of worker processes for feature extraction
|
| 739 |
progress: Gradio progress tracker for UI updates
|
|
|
|
|
|
|
| 740 |
|
| 741 |
Returns:
|
| 742 |
tuple: (slide_mask, aeon_results, paladin_results)
|
|
@@ -795,51 +740,6 @@ def analyze_slide(
|
|
| 795 |
)
|
| 796 |
logger.info("Slide mask drawn")
|
| 797 |
|
| 798 |
-
# Step 2-6: Run inference pipeline with GPU
|
| 799 |
-
# Check if user is logged in for longer GPU duration
|
| 800 |
-
is_logged_in = False
|
| 801 |
-
username = "anonymous"
|
| 802 |
-
if request is not None:
|
| 803 |
-
try:
|
| 804 |
-
# Check if user is logged in via JWT token in referer
|
| 805 |
-
# HF Spaces doesn't populate request.username but includes JWT in URL
|
| 806 |
-
if hasattr(request, 'headers'):
|
| 807 |
-
referer = request.headers.get('referer', '')
|
| 808 |
-
if '__sign=' in referer:
|
| 809 |
-
# Extract and decode JWT token
|
| 810 |
-
import re
|
| 811 |
-
import json
|
| 812 |
-
import base64
|
| 813 |
-
|
| 814 |
-
match = re.search(r'__sign=([^&]+)', referer)
|
| 815 |
-
if match:
|
| 816 |
-
token = match.group(1)
|
| 817 |
-
try:
|
| 818 |
-
# JWT format: header.payload.signature
|
| 819 |
-
# We only need the payload (middle part)
|
| 820 |
-
parts = token.split('.')
|
| 821 |
-
if len(parts) == 3:
|
| 822 |
-
# Decode base64 payload (add padding if needed)
|
| 823 |
-
payload = parts[1]
|
| 824 |
-
payload += '=' * (4 - len(payload) % 4)
|
| 825 |
-
decoded = base64.urlsafe_b64decode(payload)
|
| 826 |
-
token_data = json.loads(decoded)
|
| 827 |
-
|
| 828 |
-
# Check if user is in token
|
| 829 |
-
if 'onBehalfOf' in token_data and 'user' in token_data['onBehalfOf']:
|
| 830 |
-
username = token_data['onBehalfOf']['user']
|
| 831 |
-
is_logged_in = True
|
| 832 |
-
logger.info(f"Found user in JWT token: {username}")
|
| 833 |
-
except Exception as e:
|
| 834 |
-
logger.warning(f"Failed to decode JWT: {e}")
|
| 835 |
-
|
| 836 |
-
if IS_ZEROGPU:
|
| 837 |
-
logger.info(f"User: {username} | Logged in: {is_logged_in}")
|
| 838 |
-
except Exception as e:
|
| 839 |
-
logger.warning(f"Failed to detect user: {e}")
|
| 840 |
-
import traceback
|
| 841 |
-
logger.warning(traceback.format_exc())
|
| 842 |
-
|
| 843 |
# Convert sex and tissue_site to indices for Aeon model
|
| 844 |
from mosaic.inference.data import encode_sex, encode_tissue_site
|
| 845 |
|
|
@@ -851,10 +751,11 @@ def analyze_slide(
|
|
| 851 |
if tissue_site is not None:
|
| 852 |
tissue_site_idx = encode_tissue_site(tissue_site)
|
| 853 |
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
|
|
|
| 858 |
coords,
|
| 859 |
slide_path,
|
| 860 |
attrs,
|
|
@@ -863,23 +764,91 @@ def analyze_slide(
|
|
| 863 |
tissue_site_idx,
|
| 864 |
cancer_subtype,
|
| 865 |
cancer_subtype_name_map,
|
|
|
|
| 866 |
num_workers,
|
| 867 |
progress,
|
| 868 |
)
|
| 869 |
else:
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 884 |
|
| 885 |
return slide_mask, aeon_results, paladin_results
|
|
|
|
| 26 |
return lambda f: f
|
| 27 |
return fn
|
| 28 |
|
| 29 |
+
|
| 30 |
# Detect T4 hardware by checking actual GPU
|
| 31 |
import torch
|
| 32 |
+
|
| 33 |
IS_T4_GPU = False
|
| 34 |
GPU_NAME = "Unknown"
|
| 35 |
if not IS_ZEROGPU and torch.cuda.is_available():
|
|
|
|
| 66 |
from mosaic.data_directory import get_data_directory
|
| 67 |
|
| 68 |
# Log hardware detection at module load
|
| 69 |
+
logger.info(
|
| 70 |
+
f"Hardware: {GPU_TYPE} | batch_size={DEFAULT_BATCH_SIZE}, num_workers={DEFAULT_NUM_WORKERS}"
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
|
| 74 |
+
def _extract_ctranspath_features(coords, slide_path, attrs, num_workers, model):
|
| 75 |
+
"""Extract CTransPath features on GPU using pre-loaded model.
|
| 76 |
|
|
|
|
|
|
|
|
|
|
| 77 |
Args:
|
| 78 |
coords: Tissue tile coordinates
|
| 79 |
slide_path: Path to the whole slide image file
|
| 80 |
attrs: Slide attributes
|
| 81 |
num_workers: Number of worker processes
|
| 82 |
+
model: Pre-loaded CTransPath model from ModelCache
|
| 83 |
+
|
| 84 |
Returns:
|
| 85 |
tuple: (ctranspath_features, coords)
|
| 86 |
"""
|
|
|
|
| 91 |
elif IS_T4_GPU:
|
| 92 |
num_workers = DEFAULT_NUM_WORKERS
|
| 93 |
batch_size = DEFAULT_BATCH_SIZE
|
| 94 |
+
logger.info(
|
| 95 |
+
f"Running CTransPath on T4: processing {len(coords)} tiles with batch_size={batch_size}"
|
| 96 |
+
)
|
| 97 |
else:
|
| 98 |
num_workers = max(num_workers, 8)
|
| 99 |
batch_size = 64
|
| 100 |
logger.info(f"Running CTransPath with {num_workers} workers")
|
| 101 |
+
|
| 102 |
start_time = pd.Timestamp.now()
|
| 103 |
|
|
|
|
| 104 |
ctranspath_features, _ = get_features(
|
| 105 |
coords,
|
| 106 |
slide_path,
|
| 107 |
attrs,
|
| 108 |
+
model=model,
|
|
|
|
| 109 |
num_workers=num_workers,
|
| 110 |
batch_size=batch_size,
|
| 111 |
use_gpu=True,
|
| 112 |
)
|
| 113 |
+
|
| 114 |
end_time = pd.Timestamp.now()
|
| 115 |
logger.info(f"CTransPath extraction took {end_time - start_time}")
|
| 116 |
+
|
| 117 |
return ctranspath_features, coords
|
| 118 |
|
| 119 |
|
| 120 |
+
def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers, model):
|
| 121 |
+
"""Extract Optimus features on GPU using pre-loaded model.
|
| 122 |
+
|
| 123 |
Args:
|
| 124 |
filtered_coords: Filtered tissue tile coordinates
|
| 125 |
slide_path: Path to the whole slide image file
|
| 126 |
attrs: Slide attributes
|
| 127 |
num_workers: Number of worker processes
|
| 128 |
+
model: Pre-loaded Optimus model from ModelCache
|
| 129 |
+
|
| 130 |
Returns:
|
| 131 |
Optimus features
|
| 132 |
"""
|
| 133 |
if IS_ZEROGPU:
|
| 134 |
num_workers = 0
|
| 135 |
batch_size = 128
|
| 136 |
+
logger.info(
|
| 137 |
+
f"Running Optimus on ZeroGPU: processing {len(filtered_coords)} tiles"
|
| 138 |
+
)
|
| 139 |
elif IS_T4_GPU:
|
| 140 |
num_workers = DEFAULT_NUM_WORKERS
|
| 141 |
batch_size = DEFAULT_BATCH_SIZE
|
| 142 |
+
logger.info(
|
| 143 |
+
f"Running Optimus on T4: processing {len(filtered_coords)} tiles with batch_size={batch_size}"
|
| 144 |
+
)
|
| 145 |
else:
|
| 146 |
num_workers = max(num_workers, 8)
|
| 147 |
batch_size = 64
|
| 148 |
logger.info(f"Running Optimus with {num_workers} workers")
|
| 149 |
+
|
| 150 |
start_time = pd.Timestamp.now()
|
| 151 |
|
|
|
|
| 152 |
features, _ = get_features(
|
| 153 |
filtered_coords,
|
| 154 |
slide_path,
|
| 155 |
attrs,
|
| 156 |
+
model=model,
|
|
|
|
| 157 |
num_workers=num_workers,
|
| 158 |
batch_size=batch_size,
|
| 159 |
use_gpu=True,
|
| 160 |
)
|
| 161 |
+
|
| 162 |
end_time = pd.Timestamp.now()
|
| 163 |
logger.info(f"Optimus extraction took {end_time - start_time}")
|
| 164 |
+
|
| 165 |
return features
|
| 166 |
|
| 167 |
|
| 168 |
+
def _run_aeon_inference(
|
| 169 |
+
features, site_type, num_workers, sex=None, tissue_site_idx=None
|
| 170 |
+
):
|
| 171 |
"""Run Aeon cancer subtype inference on GPU.
|
| 172 |
+
|
| 173 |
Args:
|
| 174 |
features: Optimus features
|
| 175 |
site_type: Site type ("Primary" or "Metastatic")
|
| 176 |
num_workers: Number of worker processes
|
| 177 |
sex: Patient sex (0=Male, 1=Female), optional
|
| 178 |
tissue_site_idx: Tissue site index (0-56), optional
|
| 179 |
+
|
| 180 |
Returns:
|
| 181 |
Aeon results DataFrame
|
| 182 |
"""
|
|
|
|
| 189 |
else:
|
| 190 |
num_workers = max(num_workers, 8)
|
| 191 |
logger.info(f"Running Aeon with num_workers={num_workers}")
|
| 192 |
+
|
| 193 |
start_time = pd.Timestamp.now()
|
| 194 |
logger.info("Running Aeon for cancer subtype inference")
|
| 195 |
data_dir = get_data_directory()
|
|
|
|
| 204 |
use_cpu=False,
|
| 205 |
)
|
| 206 |
end_time = pd.Timestamp.now()
|
| 207 |
+
|
| 208 |
# Log memory stats if CUDA is available
|
| 209 |
if torch.cuda.is_available():
|
| 210 |
try:
|
|
|
|
| 217 |
logger.info(f"Aeon inference took {end_time - start_time}")
|
| 218 |
else:
|
| 219 |
logger.info(f"Aeon inference took {end_time - start_time}")
|
| 220 |
+
|
| 221 |
return aeon_results
|
| 222 |
|
| 223 |
|
| 224 |
def _run_paladin_inference(features, aeon_results, site_type, num_workers):
|
| 225 |
"""Run Paladin biomarker inference on GPU.
|
| 226 |
+
|
| 227 |
Args:
|
| 228 |
features: Optimus features
|
| 229 |
aeon_results: Aeon results DataFrame
|
| 230 |
site_type: Site type ("Primary" or "Metastatic")
|
| 231 |
num_workers: Number of worker processes
|
| 232 |
+
|
| 233 |
Returns:
|
| 234 |
Paladin results DataFrame
|
| 235 |
"""
|
|
|
|
| 242 |
else:
|
| 243 |
num_workers = max(num_workers, 8)
|
| 244 |
logger.info(f"Running Paladin with num_workers={num_workers}")
|
| 245 |
+
|
| 246 |
start_time = pd.Timestamp.now()
|
| 247 |
logger.info("Running Paladin for biomarker inference")
|
| 248 |
data_dir = get_data_directory()
|
|
|
|
| 256 |
use_cpu=False,
|
| 257 |
)
|
| 258 |
end_time = pd.Timestamp.now()
|
| 259 |
+
|
| 260 |
# Log memory stats if CUDA is available
|
| 261 |
if torch.cuda.is_available():
|
| 262 |
try:
|
|
|
|
| 269 |
logger.info(f"Paladin inference took {end_time - start_time}")
|
| 270 |
else:
|
| 271 |
logger.info(f"Paladin inference took {end_time - start_time}")
|
| 272 |
+
|
| 273 |
return paladin_results
|
| 274 |
|
| 275 |
|
|
|
|
| 288 |
):
|
| 289 |
"""Run inference pipeline with 60s GPU limit (for free users)."""
|
| 290 |
return _run_inference_pipeline_impl(
|
| 291 |
+
coords,
|
| 292 |
+
slide_path,
|
| 293 |
+
attrs,
|
| 294 |
+
site_type,
|
| 295 |
+
sex,
|
| 296 |
+
tissue_site_idx,
|
| 297 |
+
cancer_subtype,
|
| 298 |
+
cancer_subtype_name_map,
|
| 299 |
+
num_workers,
|
| 300 |
+
progress,
|
| 301 |
)
|
| 302 |
|
| 303 |
|
|
|
|
| 316 |
):
|
| 317 |
"""Run inference pipeline with 300s GPU limit (for PRO users)."""
|
| 318 |
return _run_inference_pipeline_impl(
|
| 319 |
+
coords,
|
| 320 |
+
slide_path,
|
| 321 |
+
attrs,
|
| 322 |
+
site_type,
|
| 323 |
+
sex,
|
| 324 |
+
tissue_site_idx,
|
| 325 |
+
cancer_subtype,
|
| 326 |
+
cancer_subtype_name_map,
|
| 327 |
+
num_workers,
|
| 328 |
+
progress,
|
| 329 |
)
|
| 330 |
|
| 331 |
|
|
|
|
| 341 |
num_workers,
|
| 342 |
progress,
|
| 343 |
):
|
| 344 |
+
"""Run complete inference pipeline using model cache.
|
| 345 |
|
| 346 |
+
This function loads models once and reuses them throughout the pipeline,
|
| 347 |
+
orchestrating GPU operations for feature extraction and inference.
|
|
|
|
| 348 |
|
| 349 |
Args:
|
| 350 |
coords: Tissue tile coordinates
|
|
|
|
| 361 |
- aeon_results: DataFrame with cancer subtype predictions and confidence scores
|
| 362 |
- paladin_results: DataFrame with biomarker predictions
|
| 363 |
"""
|
| 364 |
+
# Load all models once for the entire pipeline
|
| 365 |
+
from mosaic.model_manager import load_all_models
|
|
|
|
|
|
|
|
|
|
| 366 |
|
| 367 |
+
progress(0.1, desc="Loading models")
|
| 368 |
+
logger.info("Loading models for inference pipeline")
|
| 369 |
+
model_cache = load_all_models(use_gpu=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
|
| 371 |
+
try:
|
| 372 |
+
# Step 2: Extract CTransPath features using cached model
|
| 373 |
+
progress(0.3, desc="Extracting CTransPath features")
|
| 374 |
+
ctranspath_features, coords = _extract_ctranspath_features(
|
| 375 |
+
coords, slide_path, attrs, num_workers, model=model_cache.ctranspath_model
|
| 376 |
+
)
|
| 377 |
|
| 378 |
+
# Step 3: Filter features using cached marker classifier
|
| 379 |
+
start_time = pd.Timestamp.now()
|
| 380 |
+
progress(0.35, desc="Filtering features with marker classifier")
|
| 381 |
+
logger.info("Filtering features with marker classifier")
|
| 382 |
+
_, filtered_coords = filter_features(
|
| 383 |
+
ctranspath_features,
|
| 384 |
+
coords,
|
| 385 |
+
model_cache.marker_classifier,
|
| 386 |
+
threshold=0.25,
|
| 387 |
+
)
|
| 388 |
+
end_time = pd.Timestamp.now()
|
| 389 |
+
logger.info(f"Feature filtering took {end_time - start_time}")
|
| 390 |
+
logger.info(
|
| 391 |
+
f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
|
| 392 |
)
|
|
|
|
| 393 |
|
| 394 |
+
# Step 4: Extract Optimus features using cached model
|
| 395 |
+
progress(0.4, desc="Extracting Optimus features")
|
| 396 |
+
features = _extract_optimus_features(
|
| 397 |
+
filtered_coords,
|
| 398 |
+
slide_path,
|
| 399 |
+
attrs,
|
| 400 |
+
num_workers,
|
| 401 |
+
model=model_cache.optimus_model,
|
| 402 |
+
)
|
| 403 |
|
| 404 |
+
# Step 5: Run Aeon to predict histology if not supplied
|
| 405 |
+
if cancer_subtype == "Unknown":
|
| 406 |
+
progress(0.9, desc="Running Aeon for cancer subtype inference")
|
| 407 |
+
aeon_results = _run_aeon_inference_with_model(
|
| 408 |
+
features,
|
| 409 |
+
model_cache.aeon_model,
|
| 410 |
+
model_cache.device,
|
| 411 |
+
site_type,
|
| 412 |
+
num_workers,
|
| 413 |
+
sex,
|
| 414 |
+
tissue_site_idx,
|
| 415 |
+
)
|
| 416 |
+
else:
|
| 417 |
+
cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
|
| 418 |
+
aeon_results = pd.DataFrame(
|
| 419 |
+
{
|
| 420 |
+
"Cancer Subtype": [cancer_subtype_code],
|
| 421 |
+
"Confidence": [1.0],
|
| 422 |
+
}
|
| 423 |
+
)
|
| 424 |
+
logger.info(f"Using user-supplied cancer subtype: {cancer_subtype}")
|
| 425 |
|
| 426 |
+
# Step 6: Run Paladin to predict biomarkers
|
| 427 |
+
if len(aeon_results) == 0:
|
| 428 |
+
logger.warning("No Aeon results, skipping Paladin inference")
|
| 429 |
+
return None, None
|
| 430 |
+
|
| 431 |
+
progress(0.95, desc="Running Paladin for biomarker inference")
|
| 432 |
+
paladin_results = _run_paladin_inference_with_models(
|
| 433 |
+
features, aeon_results, site_type, model_cache, num_workers
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
aeon_results.set_index("Cancer Subtype", inplace=True)
|
| 437 |
+
|
| 438 |
+
return aeon_results, paladin_results
|
| 439 |
+
finally:
|
| 440 |
+
# Clean up models to free GPU memory
|
| 441 |
+
model_cache.cleanup()
|
| 442 |
|
| 443 |
|
| 444 |
# ============================================================================
|
|
|
|
| 581 |
Returns:
|
| 582 |
Tuple of (aeon_results, paladin_results)
|
| 583 |
"""
|
| 584 |
+
# Step 1: Extract CTransPath features with PRE-LOADED model
|
|
|
|
| 585 |
progress(0.3, desc="Extracting CTransPath features")
|
| 586 |
ctranspath_features, coords = _extract_ctranspath_features(
|
| 587 |
+
coords, slide_path, attrs, num_workers, model=model_cache.ctranspath_model
|
| 588 |
)
|
| 589 |
|
| 590 |
# Step 2: Filter features using pre-loaded marker classifier
|
|
|
|
| 603 |
f"Filtered from {len(coords)} to {len(filtered_coords)} tiles using marker classifier"
|
| 604 |
)
|
| 605 |
|
| 606 |
+
# Step 3: Extract Optimus features with PRE-LOADED model
|
| 607 |
progress(0.5, desc="Extracting Optimus features")
|
| 608 |
+
features = _extract_optimus_features(
|
| 609 |
+
filtered_coords, slide_path, attrs, num_workers, model=model_cache.optimus_model
|
| 610 |
+
)
|
| 611 |
|
| 612 |
# Step 4: Run Aeon inference with pre-loaded model (if cancer subtype unknown)
|
| 613 |
aeon_results = None
|
|
|
|
| 615 |
|
| 616 |
# Check if cancer subtype is unknown
|
| 617 |
if cancer_subtype in ["Unknown", None]:
|
| 618 |
+
logger.info(
|
| 619 |
+
"Running Aeon inference with PRE-LOADED model (cancer subtype unknown)"
|
| 620 |
+
)
|
| 621 |
aeon_results = _run_aeon_inference_with_model(
|
| 622 |
features,
|
| 623 |
model_cache.aeon_model, # Use pre-loaded Aeon model
|
|
|
|
| 646 |
return aeon_results, paladin_results
|
| 647 |
|
| 648 |
|
| 649 |
+
# Removed: analyze_slide_with_models merged into analyze_slide below
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 650 |
|
| 651 |
|
| 652 |
def analyze_slide(
|
|
|
|
| 661 |
num_workers=4,
|
| 662 |
progress=gr.Progress(track_tqdm=True),
|
| 663 |
request: gr.Request = None,
|
| 664 |
+
model_cache=None,
|
| 665 |
):
|
| 666 |
"""Analyze a whole slide image for cancer subtype and biomarker prediction.
|
| 667 |
|
| 668 |
+
This function works in two modes:
|
| 669 |
+
1. **Single-slide mode** (model_cache=None): Loads models, analyzes one slide, cleans up
|
| 670 |
+
2. **Batch mode** (model_cache provided): Uses pre-loaded models for efficiency
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
|
| 672 |
Args:
|
| 673 |
slide_path: Path to the whole slide image file
|
| 674 |
seg_config: Segmentation configuration, one of "Biopsy", "Resection", or "TCGA"
|
| 675 |
site_type: Site type, either "Primary" or "Metastatic"
|
| 676 |
+
sex: Patient sex ("Unknown", "Male", "Female")
|
| 677 |
+
tissue_site: Tissue site name
|
| 678 |
cancer_subtype: Cancer subtype (OncoTree code or "Unknown" for inference)
|
| 679 |
cancer_subtype_name_map: Dictionary mapping cancer subtype names to codes
|
| 680 |
ihc_subtype: IHC subtype for breast cancer (optional)
|
| 681 |
num_workers: Number of worker processes for feature extraction
|
| 682 |
progress: Gradio progress tracker for UI updates
|
| 683 |
+
request: Gradio request object (for HF Spaces authentication)
|
| 684 |
+
model_cache: Optional ModelCache with pre-loaded models (for batch processing)
|
| 685 |
|
| 686 |
Returns:
|
| 687 |
tuple: (slide_mask, aeon_results, paladin_results)
|
|
|
|
| 740 |
)
|
| 741 |
logger.info("Slide mask drawn")
|
| 742 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 743 |
# Convert sex and tissue_site to indices for Aeon model
|
| 744 |
from mosaic.inference.data import encode_sex, encode_tissue_site
|
| 745 |
|
|
|
|
| 751 |
if tissue_site is not None:
|
| 752 |
tissue_site_idx = encode_tissue_site(tissue_site)
|
| 753 |
|
| 754 |
+
# Run inference pipeline - two modes based on model_cache
|
| 755 |
+
if model_cache is not None:
|
| 756 |
+
# Batch mode: use pre-loaded models
|
| 757 |
+
logger.info("Using pre-loaded models from ModelCache (batch mode)")
|
| 758 |
+
aeon_results, paladin_results = _run_inference_pipeline_with_models(
|
| 759 |
coords,
|
| 760 |
slide_path,
|
| 761 |
attrs,
|
|
|
|
| 764 |
tissue_site_idx,
|
| 765 |
cancer_subtype,
|
| 766 |
cancer_subtype_name_map,
|
| 767 |
+
model_cache,
|
| 768 |
num_workers,
|
| 769 |
progress,
|
| 770 |
)
|
| 771 |
else:
|
| 772 |
+
# Single-slide mode: load models on-demand
|
| 773 |
+
# Check if user is logged in for longer GPU duration (HF Spaces only)
|
| 774 |
+
is_logged_in = False
|
| 775 |
+
username = "anonymous"
|
| 776 |
+
if request is not None:
|
| 777 |
+
try:
|
| 778 |
+
# Check if user is logged in via JWT token in referer
|
| 779 |
+
# HF Spaces doesn't populate request.username but includes JWT in URL
|
| 780 |
+
if hasattr(request, "headers"):
|
| 781 |
+
referer = request.headers.get("referer", "")
|
| 782 |
+
if "__sign=" in referer:
|
| 783 |
+
# Extract and decode JWT token
|
| 784 |
+
import re
|
| 785 |
+
import json
|
| 786 |
+
import base64
|
| 787 |
+
|
| 788 |
+
match = re.search(r"__sign=([^&]+)", referer)
|
| 789 |
+
if match:
|
| 790 |
+
token = match.group(1)
|
| 791 |
+
try:
|
| 792 |
+
# JWT format: header.payload.signature
|
| 793 |
+
# We only need the payload (middle part)
|
| 794 |
+
parts = token.split(".")
|
| 795 |
+
if len(parts) == 3:
|
| 796 |
+
# Decode base64 payload (add padding if needed)
|
| 797 |
+
payload = parts[1]
|
| 798 |
+
payload += "=" * (4 - len(payload) % 4)
|
| 799 |
+
decoded = base64.urlsafe_b64decode(payload)
|
| 800 |
+
token_data = json.loads(decoded)
|
| 801 |
+
|
| 802 |
+
# Check if user is in token
|
| 803 |
+
if (
|
| 804 |
+
"onBehalfOf" in token_data
|
| 805 |
+
and "user" in token_data["onBehalfOf"]
|
| 806 |
+
):
|
| 807 |
+
username = token_data["onBehalfOf"]["user"]
|
| 808 |
+
is_logged_in = True
|
| 809 |
+
logger.info(
|
| 810 |
+
f"Found user in JWT token: {username}"
|
| 811 |
+
)
|
| 812 |
+
except Exception as e:
|
| 813 |
+
logger.warning(f"Failed to decode JWT: {e}")
|
| 814 |
+
|
| 815 |
+
if IS_ZEROGPU:
|
| 816 |
+
logger.info(f"User: {username} | Logged in: {is_logged_in}")
|
| 817 |
+
except Exception as e:
|
| 818 |
+
logger.warning(f"Failed to detect user: {e}")
|
| 819 |
+
import traceback
|
| 820 |
+
|
| 821 |
+
logger.warning(traceback.format_exc())
|
| 822 |
+
|
| 823 |
+
if is_logged_in:
|
| 824 |
+
if IS_ZEROGPU:
|
| 825 |
+
logger.info("Using 300s GPU allocation (logged-in user)")
|
| 826 |
+
aeon_results, paladin_results = _run_inference_pipeline_pro(
|
| 827 |
+
coords,
|
| 828 |
+
slide_path,
|
| 829 |
+
attrs,
|
| 830 |
+
site_type,
|
| 831 |
+
sex_idx,
|
| 832 |
+
tissue_site_idx,
|
| 833 |
+
cancer_subtype,
|
| 834 |
+
cancer_subtype_name_map,
|
| 835 |
+
num_workers,
|
| 836 |
+
progress,
|
| 837 |
+
)
|
| 838 |
+
else:
|
| 839 |
+
if IS_ZEROGPU:
|
| 840 |
+
logger.info("Using 60s GPU allocation (anonymous user)")
|
| 841 |
+
aeon_results, paladin_results = _run_inference_pipeline_free(
|
| 842 |
+
coords,
|
| 843 |
+
slide_path,
|
| 844 |
+
attrs,
|
| 845 |
+
site_type,
|
| 846 |
+
sex_idx,
|
| 847 |
+
tissue_site_idx,
|
| 848 |
+
cancer_subtype,
|
| 849 |
+
cancer_subtype_name_map,
|
| 850 |
+
num_workers,
|
| 851 |
+
progress,
|
| 852 |
+
)
|
| 853 |
|
| 854 |
return slide_mask, aeon_results, paladin_results
|
src/mosaic/batch_analysis.py
DELETED
|
@@ -1,238 +0,0 @@
|
|
| 1 |
-
"""Batch processing coordinator for multi-slide analysis.
|
| 2 |
-
|
| 3 |
-
This module provides optimized batch processing functionality that loads
|
| 4 |
-
models once and reuses them across multiple slides, significantly reducing
|
| 5 |
-
overhead compared to processing slides individually.
|
| 6 |
-
"""
|
| 7 |
-
|
| 8 |
-
from typing import Dict, List, Optional, Tuple
|
| 9 |
-
import pandas as pd
|
| 10 |
-
import time
|
| 11 |
-
from loguru import logger
|
| 12 |
-
|
| 13 |
-
from mosaic.model_manager import load_all_models
|
| 14 |
-
from mosaic.analysis import analyze_slide_with_models
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
def analyze_slides_batch(
|
| 18 |
-
slides: List[str],
|
| 19 |
-
settings_df: pd.DataFrame,
|
| 20 |
-
cancer_subtype_name_map: Dict[str, str],
|
| 21 |
-
num_workers: int = 4,
|
| 22 |
-
aggressive_memory_mgmt: Optional[bool] = None,
|
| 23 |
-
progress=None,
|
| 24 |
-
) -> Tuple[List[Tuple], List[pd.DataFrame], List[pd.DataFrame]]:
|
| 25 |
-
"""Analyze multiple slides with models loaded once for batch processing.
|
| 26 |
-
|
| 27 |
-
This function provides significant performance improvements over sequential
|
| 28 |
-
processing by loading all models once at the start, processing all slides
|
| 29 |
-
with the pre-loaded models, and cleaning up at the end.
|
| 30 |
-
|
| 31 |
-
Performance Benefits:
|
| 32 |
-
- ~90% reduction in model loading operations
|
| 33 |
-
- 25-45% overall speedup depending on model loading overhead
|
| 34 |
-
- Memory-efficient: same peak memory as single-slide processing
|
| 35 |
-
|
| 36 |
-
Args:
|
| 37 |
-
slides: List of slide file paths
|
| 38 |
-
settings_df: DataFrame with columns matching SETTINGS_COLUMNS from ui/utils.py
|
| 39 |
-
cancer_subtype_name_map: Dict mapping cancer subtype display names to OncoTree codes
|
| 40 |
-
num_workers: Number of CPU workers for data loading (default: 4)
|
| 41 |
-
aggressive_memory_mgmt: Memory management strategy:
|
| 42 |
-
- None: Auto-detect based on GPU type (T4 = True, A100 = False)
|
| 43 |
-
- True: T4-style aggressive cleanup (load/delete Paladin models per slide)
|
| 44 |
-
- False: Cache Paladin models across slides (requires >40GB GPU memory)
|
| 45 |
-
progress: Optional Gradio progress tracker
|
| 46 |
-
|
| 47 |
-
Returns:
|
| 48 |
-
Tuple of (all_slide_masks, all_aeon_results, all_paladin_results):
|
| 49 |
-
- all_slide_masks: List of (slide_mask_image, slide_name) tuples
|
| 50 |
-
- all_aeon_results: List of DataFrames with Aeon cancer subtype predictions
|
| 51 |
-
- all_paladin_results: List of DataFrames with Paladin biomarker predictions
|
| 52 |
-
|
| 53 |
-
Example:
|
| 54 |
-
```python
|
| 55 |
-
slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
|
| 56 |
-
settings_df = pd.DataFrame({
|
| 57 |
-
"Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
|
| 58 |
-
"Site Type": ["Primary", "Primary", "Metastatic"],
|
| 59 |
-
"Sex": ["Male", "Female", "Unknown"],
|
| 60 |
-
"Tissue Site": ["Lung", "Breast", "Unknown"],
|
| 61 |
-
"Cancer Subtype": ["Unknown", "Unknown", "LUAD"],
|
| 62 |
-
"IHC Subtype": ["", "HR+/HER2-", ""],
|
| 63 |
-
"Segmentation Config": ["Biopsy", "Resection", "Biopsy"],
|
| 64 |
-
})
|
| 65 |
-
|
| 66 |
-
masks, aeon, paladin = analyze_slides_batch(
|
| 67 |
-
slides, settings_df, cancer_subtype_name_map
|
| 68 |
-
)
|
| 69 |
-
```
|
| 70 |
-
|
| 71 |
-
Notes:
|
| 72 |
-
- GPU memory requirements: ~9-15GB for typical batches
|
| 73 |
-
- T4 GPUs (16GB): Uses aggressive memory management automatically
|
| 74 |
-
- A100 GPUs (80GB): Can cache Paladin models for better performance
|
| 75 |
-
- Maintains backward compatibility: single slides can still use analyze_slide()
|
| 76 |
-
"""
|
| 77 |
-
if progress is None:
|
| 78 |
-
progress = lambda frac, desc: None # No-op progress function
|
| 79 |
-
|
| 80 |
-
num_slides = len(slides)
|
| 81 |
-
batch_start_time = time.time()
|
| 82 |
-
|
| 83 |
-
logger.info("=" * 80)
|
| 84 |
-
logger.info(f"BATCH PROCESSING: Starting analysis of {num_slides} slides")
|
| 85 |
-
logger.info("=" * 80)
|
| 86 |
-
|
| 87 |
-
# Step 1: Load all models once
|
| 88 |
-
progress(0.0, desc="Loading models for batch processing")
|
| 89 |
-
model_load_start = time.time()
|
| 90 |
-
|
| 91 |
-
try:
|
| 92 |
-
model_cache = load_all_models(
|
| 93 |
-
use_gpu=True,
|
| 94 |
-
aggressive_memory_mgmt=aggressive_memory_mgmt,
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
model_load_time = time.time() - model_load_start
|
| 98 |
-
logger.info(f"Model loading completed in {model_load_time:.2f}s")
|
| 99 |
-
logger.info("")
|
| 100 |
-
|
| 101 |
-
# Log memory strategy
|
| 102 |
-
if model_cache.aggressive_memory_mgmt:
|
| 103 |
-
logger.info(
|
| 104 |
-
"Memory strategy: AGGRESSIVE (T4-style) - "
|
| 105 |
-
"Paladin models loaded/freed per slide"
|
| 106 |
-
)
|
| 107 |
-
else:
|
| 108 |
-
logger.info(
|
| 109 |
-
"Using caching strategy (A100-style): "
|
| 110 |
-
"Paladin models will be cached across slides"
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
except Exception as e:
|
| 114 |
-
logger.error(f"Failed to load models: {e}")
|
| 115 |
-
raise
|
| 116 |
-
|
| 117 |
-
# Step 2: Process each slide with pre-loaded models
|
| 118 |
-
all_slide_masks = []
|
| 119 |
-
all_aeon_results = []
|
| 120 |
-
all_paladin_results = []
|
| 121 |
-
slide_times = []
|
| 122 |
-
|
| 123 |
-
logger.info("=" * 80)
|
| 124 |
-
logger.info("Processing slides with PRE-LOADED models (no model reloading!)")
|
| 125 |
-
logger.info("=" * 80)
|
| 126 |
-
|
| 127 |
-
try:
|
| 128 |
-
for idx, (slide_path, (_, row)) in enumerate(zip(slides, settings_df.iterrows())):
|
| 129 |
-
slide_name = slide_path.split("/")[-1] if "/" in slide_path else slide_path
|
| 130 |
-
|
| 131 |
-
# Update progress
|
| 132 |
-
progress_frac = (idx + 0.1) / num_slides
|
| 133 |
-
progress(progress_frac, desc=f"Analyzing slide {idx + 1}/{num_slides}: {slide_name}")
|
| 134 |
-
|
| 135 |
-
logger.info("")
|
| 136 |
-
logger.info(f"[{idx + 1}/{num_slides}] Processing: {slide_name}")
|
| 137 |
-
logger.info(f" Using pre-loaded models (no disk I/O for core models)")
|
| 138 |
-
slide_start_time = time.time()
|
| 139 |
-
|
| 140 |
-
try:
|
| 141 |
-
# Use batch-optimized analysis with pre-loaded models
|
| 142 |
-
slide_mask, aeon_results, paladin_results = analyze_slide_with_models(
|
| 143 |
-
slide_path=slide_path,
|
| 144 |
-
seg_config=row["Segmentation Config"],
|
| 145 |
-
site_type=row["Site Type"],
|
| 146 |
-
sex=row.get("Sex", "Unknown"),
|
| 147 |
-
tissue_site=row.get("Tissue Site", "Unknown"),
|
| 148 |
-
cancer_subtype=row["Cancer Subtype"],
|
| 149 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 150 |
-
model_cache=model_cache,
|
| 151 |
-
ihc_subtype=row.get("IHC Subtype", ""),
|
| 152 |
-
num_workers=num_workers,
|
| 153 |
-
progress=progress,
|
| 154 |
-
)
|
| 155 |
-
|
| 156 |
-
slide_time = time.time() - slide_start_time
|
| 157 |
-
slide_times.append(slide_time)
|
| 158 |
-
|
| 159 |
-
# Collect results
|
| 160 |
-
if slide_mask is not None:
|
| 161 |
-
all_slide_masks.append((slide_mask, slide_name))
|
| 162 |
-
|
| 163 |
-
if aeon_results is not None:
|
| 164 |
-
# Add slide name to results for multi-slide batches
|
| 165 |
-
if num_slides > 1:
|
| 166 |
-
aeon_results.columns = [f"{slide_name}"]
|
| 167 |
-
all_aeon_results.append(aeon_results)
|
| 168 |
-
|
| 169 |
-
if paladin_results is not None:
|
| 170 |
-
# Add slide name column
|
| 171 |
-
paladin_results.insert(
|
| 172 |
-
0, "Slide", pd.Series([slide_name] * len(paladin_results))
|
| 173 |
-
)
|
| 174 |
-
all_paladin_results.append(paladin_results)
|
| 175 |
-
|
| 176 |
-
logger.info(f"[{idx + 1}/{num_slides}] ✓ Completed in {slide_time:.2f}s")
|
| 177 |
-
|
| 178 |
-
except Exception as e:
|
| 179 |
-
slide_time = time.time() - slide_start_time
|
| 180 |
-
slide_times.append(slide_time)
|
| 181 |
-
logger.exception(f"[{idx + 1}/{num_slides}] ✗ Failed after {slide_time:.2f}s: {e}")
|
| 182 |
-
# Continue with next slide instead of failing entire batch
|
| 183 |
-
continue
|
| 184 |
-
|
| 185 |
-
finally:
|
| 186 |
-
# Step 3: Always cleanup models (even if there were errors)
|
| 187 |
-
logger.info("")
|
| 188 |
-
logger.info("=" * 80)
|
| 189 |
-
logger.info("Cleaning up models...")
|
| 190 |
-
progress(0.99, desc="Cleaning up models")
|
| 191 |
-
model_cache.cleanup()
|
| 192 |
-
logger.info("✓ Model cleanup complete")
|
| 193 |
-
|
| 194 |
-
# Calculate batch statistics
|
| 195 |
-
batch_total_time = time.time() - batch_start_time
|
| 196 |
-
num_successful = len(all_slide_masks)
|
| 197 |
-
num_failed = num_slides - num_successful
|
| 198 |
-
|
| 199 |
-
# Log comprehensive summary
|
| 200 |
-
logger.info("=" * 80)
|
| 201 |
-
logger.info("BATCH PROCESSING SUMMARY")
|
| 202 |
-
logger.info("=" * 80)
|
| 203 |
-
logger.info(f"Total slides: {num_slides}")
|
| 204 |
-
logger.info(f"Successfully processed: {num_successful}")
|
| 205 |
-
logger.info(f"Failed: {num_failed}")
|
| 206 |
-
logger.info("")
|
| 207 |
-
logger.info(f"Model loading time: {model_load_time:.2f}s (done ONCE for entire batch)")
|
| 208 |
-
logger.info(f"Total batch time: {batch_total_time:.2f}s")
|
| 209 |
-
|
| 210 |
-
if slide_times:
|
| 211 |
-
avg_slide_time = sum(slide_times) / len(slide_times)
|
| 212 |
-
min_slide_time = min(slide_times)
|
| 213 |
-
max_slide_time = max(slide_times)
|
| 214 |
-
total_slide_time = sum(slide_times)
|
| 215 |
-
|
| 216 |
-
logger.info("")
|
| 217 |
-
logger.info("Per-slide processing times:")
|
| 218 |
-
logger.info(f" Average: {avg_slide_time:.2f}s")
|
| 219 |
-
logger.info(f" Min: {min_slide_time:.2f}s")
|
| 220 |
-
logger.info(f" Max: {max_slide_time:.2f}s")
|
| 221 |
-
logger.info(f" Total: {total_slide_time:.2f}s")
|
| 222 |
-
|
| 223 |
-
# Calculate efficiency
|
| 224 |
-
overhead_time = batch_total_time - total_slide_time
|
| 225 |
-
logger.info("")
|
| 226 |
-
logger.info(f"Batch overhead: {overhead_time:.2f}s ({overhead_time/batch_total_time*100:.1f}%)")
|
| 227 |
-
logger.info(f"Slide processing: {total_slide_time:.2f}s ({total_slide_time/batch_total_time*100:.1f}%)")
|
| 228 |
-
|
| 229 |
-
logger.info("")
|
| 230 |
-
logger.info("✓ Batch processing optimization benefits:")
|
| 231 |
-
logger.info(" - Models loaded ONCE (not once per slide)")
|
| 232 |
-
logger.info(" - Reduced disk I/O for model loading")
|
| 233 |
-
logger.info(f" - Processed {num_slides} slides with shared model cache")
|
| 234 |
-
logger.info("=" * 80)
|
| 235 |
-
|
| 236 |
-
progress(1.0, desc=f"Batch analysis complete ({num_successful}/{num_slides} successful)")
|
| 237 |
-
|
| 238 |
-
return all_slide_masks, all_aeon_results, all_paladin_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/mosaic/gradio_app.py
CHANGED
|
@@ -25,15 +25,15 @@ from mosaic.ui.utils import (
|
|
| 25 |
SEX_OPTIONS,
|
| 26 |
)
|
| 27 |
from mosaic.analysis import analyze_slide
|
| 28 |
-
from mosaic.
|
| 29 |
|
| 30 |
|
| 31 |
def download_and_process_models():
|
| 32 |
-
"""Download models from HuggingFace and initialize cancer subtype mappings.
|
| 33 |
|
| 34 |
-
Downloads the
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
tuple: (cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
|
|
@@ -41,47 +41,69 @@ def download_and_process_models():
|
|
| 41 |
- reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
|
| 42 |
- cancer_subtypes: List of all supported cancer subtype codes
|
| 43 |
"""
|
| 44 |
-
# Download to HF cache directory
|
| 45 |
-
#
|
| 46 |
-
logger.info(
|
|
|
|
|
|
|
| 47 |
cache_dir = snapshot_download(
|
| 48 |
repo_id="PDM-Group/paladin-aeon-models",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# No local_dir - use HF cache
|
| 50 |
)
|
| 51 |
-
logger.info(f"
|
| 52 |
|
| 53 |
# Set the data directory for other modules to use
|
| 54 |
set_data_directory(cache_dir)
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
model_map = pd.read_csv(
|
| 57 |
Path(cache_dir) / "paladin_model_map.csv",
|
| 58 |
)
|
| 59 |
cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
|
| 60 |
cancer_subtype_name_map = {"Unknown": "UNK"}
|
| 61 |
-
cancer_subtype_name_map.update(
|
| 62 |
-
f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes
|
| 63 |
-
|
| 64 |
reversed_cancer_subtype_name_map = {
|
| 65 |
value: key for key, value in cancer_subtype_name_map.items()
|
| 66 |
}
|
| 67 |
-
|
| 68 |
-
# Set the global maps in the UI module
|
| 69 |
-
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
|
| 70 |
-
|
| 71 |
-
return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
|
| 72 |
-
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
def main():
|
| 78 |
"""Main entry point for the Mosaic application.
|
| 79 |
-
|
| 80 |
Parses command-line arguments and routes to the appropriate mode:
|
| 81 |
- Single slide processing (--slide-path)
|
| 82 |
- Batch processing (--slide-csv)
|
| 83 |
- Web interface (default, no slide arguments)
|
| 84 |
-
|
| 85 |
Command-line arguments control analysis parameters like site type,
|
| 86 |
cancer subtype, segmentation configuration, and output directory.
|
| 87 |
"""
|
|
@@ -160,7 +182,9 @@ def main():
|
|
| 160 |
logger.add("debug.log", level="DEBUG")
|
| 161 |
logger.debug("Debug logging enabled")
|
| 162 |
|
| 163 |
-
cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes =
|
|
|
|
|
|
|
| 164 |
|
| 165 |
if args.slide_path and not args.slide_csv:
|
| 166 |
# Single slide processing mode
|
|
@@ -180,7 +204,12 @@ def main():
|
|
| 180 |
],
|
| 181 |
columns=SETTINGS_COLUMNS,
|
| 182 |
)
|
| 183 |
-
settings_df = validate_settings(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 185 |
args.slide_path,
|
| 186 |
args.segmentation_config,
|
|
@@ -218,24 +247,62 @@ def main():
|
|
| 218 |
# Load and validate settings
|
| 219 |
settings_df = load_settings(args.slide_csv)
|
| 220 |
settings_df = validate_settings(
|
| 221 |
-
settings_df,
|
|
|
|
|
|
|
|
|
|
| 222 |
)
|
| 223 |
|
| 224 |
# Extract slide paths
|
| 225 |
slides = settings_df["Slide"].tolist()
|
| 226 |
|
| 227 |
-
logger.info(
|
| 228 |
-
|
| 229 |
-
# Use batch processing (models loaded once)
|
| 230 |
-
all_slide_masks, all_aeon_results, all_paladin_results = analyze_slides_batch(
|
| 231 |
-
slides=slides,
|
| 232 |
-
settings_df=settings_df,
|
| 233 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 234 |
-
num_workers=args.num_workers,
|
| 235 |
-
aggressive_memory_mgmt=None, # Auto-detect GPU type
|
| 236 |
-
progress=None,
|
| 237 |
)
|
| 238 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
# Save individual slide results
|
| 240 |
for idx, (slide_mask, slide_name) in enumerate(all_slide_masks):
|
| 241 |
mask_path = output_dir / f"{slide_name}_mask.png"
|
|
@@ -252,7 +319,9 @@ def main():
|
|
| 252 |
if all_paladin_results:
|
| 253 |
combined_paladin = pd.concat(all_paladin_results, ignore_index=True)
|
| 254 |
for slide_name in combined_paladin["Slide"].unique():
|
| 255 |
-
slide_paladin = combined_paladin[
|
|
|
|
|
|
|
| 256 |
paladin_output_path = output_dir / f"{slide_name}_paladin_results.csv"
|
| 257 |
slide_paladin.to_csv(paladin_output_path, index=False)
|
| 258 |
logger.info(f"Saved Paladin results to {paladin_output_path}")
|
|
|
|
| 25 |
SEX_OPTIONS,
|
| 26 |
)
|
| 27 |
from mosaic.analysis import analyze_slide
|
| 28 |
+
from mosaic.model_manager import load_all_models
|
| 29 |
|
| 30 |
|
| 31 |
def download_and_process_models():
|
| 32 |
+
"""Download essential models from HuggingFace and initialize cancer subtype mappings.
|
| 33 |
|
| 34 |
+
Downloads only the core models (CTransPath, Optimus, Aeon, marker classifier) and
|
| 35 |
+
metadata files from the PDM-Group HuggingFace repository. Paladin models are
|
| 36 |
+
downloaded on-demand when needed for inference.
|
| 37 |
|
| 38 |
Returns:
|
| 39 |
tuple: (cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes)
|
|
|
|
| 41 |
- reversed_cancer_subtype_name_map: Dict mapping OncoTree codes to display names
|
| 42 |
- cancer_subtypes: List of all supported cancer subtype codes
|
| 43 |
"""
|
| 44 |
+
# Download only essential files to HF cache directory
|
| 45 |
+
# Paladin models will be downloaded on-demand
|
| 46 |
+
logger.info(
|
| 47 |
+
"Downloading essential models from HuggingFace Hub (Paladin models loaded on-demand)..."
|
| 48 |
+
)
|
| 49 |
cache_dir = snapshot_download(
|
| 50 |
repo_id="PDM-Group/paladin-aeon-models",
|
| 51 |
+
allow_patterns=[
|
| 52 |
+
"*.csv", # Model maps and metadata
|
| 53 |
+
"ctranspath.pth", # CTransPath model
|
| 54 |
+
"aeon_model.pkl", # Aeon model
|
| 55 |
+
"marker_classifier.pkl", # Marker classifier
|
| 56 |
+
"tissue_site_*", # Tissue site mappings
|
| 57 |
+
],
|
| 58 |
# No local_dir - use HF cache
|
| 59 |
)
|
| 60 |
+
logger.info(f"Essential models downloaded to: {cache_dir}")
|
| 61 |
|
| 62 |
# Set the data directory for other modules to use
|
| 63 |
set_data_directory(cache_dir)
|
| 64 |
|
| 65 |
+
# Pre-download Optimus model from bioptimus/H-optimus-0
|
| 66 |
+
# This ensures it's cached at startup since it's needed for every slide
|
| 67 |
+
logger.info("Pre-downloading Optimus model from bioptimus/H-optimus-0...")
|
| 68 |
+
from mussel.models import ModelType, get_model_factory
|
| 69 |
+
|
| 70 |
+
optimus_factory = get_model_factory(ModelType.OPTIMUS)
|
| 71 |
+
# This will trigger the download and cache the model
|
| 72 |
+
_ = optimus_factory.get_model(
|
| 73 |
+
model_path="hf-hub:bioptimus/H-optimus-0",
|
| 74 |
+
use_gpu=False, # Just download, don't load to GPU yet
|
| 75 |
+
gpu_device_id=None,
|
| 76 |
+
)
|
| 77 |
+
logger.info("✓ Optimus model cached")
|
| 78 |
+
|
| 79 |
model_map = pd.read_csv(
|
| 80 |
Path(cache_dir) / "paladin_model_map.csv",
|
| 81 |
)
|
| 82 |
cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
|
| 83 |
cancer_subtype_name_map = {"Unknown": "UNK"}
|
| 84 |
+
cancer_subtype_name_map.update(
|
| 85 |
+
{f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes}
|
| 86 |
+
)
|
| 87 |
reversed_cancer_subtype_name_map = {
|
| 88 |
value: key for key, value in cancer_subtype_name_map.items()
|
| 89 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
+
# Set the global maps in the UI module
|
| 92 |
+
set_cancer_subtype_maps(
|
| 93 |
+
cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
|
| 94 |
+
)
|
| 95 |
|
| 96 |
+
return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
|
| 97 |
|
| 98 |
|
| 99 |
def main():
|
| 100 |
"""Main entry point for the Mosaic application.
|
| 101 |
+
|
| 102 |
Parses command-line arguments and routes to the appropriate mode:
|
| 103 |
- Single slide processing (--slide-path)
|
| 104 |
- Batch processing (--slide-csv)
|
| 105 |
- Web interface (default, no slide arguments)
|
| 106 |
+
|
| 107 |
Command-line arguments control analysis parameters like site type,
|
| 108 |
cancer subtype, segmentation configuration, and output directory.
|
| 109 |
"""
|
|
|
|
| 182 |
logger.add("debug.log", level="DEBUG")
|
| 183 |
logger.debug("Debug logging enabled")
|
| 184 |
|
| 185 |
+
cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes = (
|
| 186 |
+
download_and_process_models()
|
| 187 |
+
)
|
| 188 |
|
| 189 |
if args.slide_path and not args.slide_csv:
|
| 190 |
# Single slide processing mode
|
|
|
|
| 204 |
],
|
| 205 |
columns=SETTINGS_COLUMNS,
|
| 206 |
)
|
| 207 |
+
settings_df = validate_settings(
|
| 208 |
+
settings_df,
|
| 209 |
+
cancer_subtype_name_map,
|
| 210 |
+
cancer_subtypes,
|
| 211 |
+
reversed_cancer_subtype_name_map,
|
| 212 |
+
)
|
| 213 |
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 214 |
args.slide_path,
|
| 215 |
args.segmentation_config,
|
|
|
|
| 247 |
# Load and validate settings
|
| 248 |
settings_df = load_settings(args.slide_csv)
|
| 249 |
settings_df = validate_settings(
|
| 250 |
+
settings_df,
|
| 251 |
+
cancer_subtype_name_map,
|
| 252 |
+
cancer_subtypes,
|
| 253 |
+
reversed_cancer_subtype_name_map,
|
| 254 |
)
|
| 255 |
|
| 256 |
# Extract slide paths
|
| 257 |
slides = settings_df["Slide"].tolist()
|
| 258 |
|
| 259 |
+
logger.info(
|
| 260 |
+
f"Processing {len(slides)} slides in batch mode with models loaded once"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
)
|
| 262 |
|
| 263 |
+
# Load models once for batch processing
|
| 264 |
+
model_cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
| 265 |
+
|
| 266 |
+
all_slide_masks = []
|
| 267 |
+
all_aeon_results = []
|
| 268 |
+
all_paladin_results = []
|
| 269 |
+
|
| 270 |
+
try:
|
| 271 |
+
# Process each slide with pre-loaded models
|
| 272 |
+
for idx, slide_path in enumerate(slides):
|
| 273 |
+
row = settings_df.iloc[idx]
|
| 274 |
+
slide_name = row["Slide"]
|
| 275 |
+
|
| 276 |
+
logger.info(f"[{idx + 1}/{len(slides)}] Processing: {slide_name}")
|
| 277 |
+
|
| 278 |
+
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 279 |
+
slide_path=slide_path,
|
| 280 |
+
seg_config=row["Segmentation Config"],
|
| 281 |
+
site_type=row["Site Type"],
|
| 282 |
+
sex=row.get("Sex", "Unknown"),
|
| 283 |
+
tissue_site=row.get("Tissue Site", "Unknown"),
|
| 284 |
+
cancer_subtype=row["Cancer Subtype"],
|
| 285 |
+
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 286 |
+
ihc_subtype=row.get("IHC Subtype", ""),
|
| 287 |
+
num_workers=args.num_workers,
|
| 288 |
+
progress=lambda frac, desc: None, # No-op progress for CLI
|
| 289 |
+
request=None,
|
| 290 |
+
model_cache=model_cache,
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
if slide_mask is not None:
|
| 294 |
+
all_slide_masks.append((slide_mask, slide_name))
|
| 295 |
+
if aeon_results is not None:
|
| 296 |
+
all_aeon_results.append(aeon_results)
|
| 297 |
+
if paladin_results is not None:
|
| 298 |
+
paladin_results.insert(
|
| 299 |
+
0, "Slide", pd.Series([slide_name] * len(paladin_results))
|
| 300 |
+
)
|
| 301 |
+
all_paladin_results.append(paladin_results)
|
| 302 |
+
finally:
|
| 303 |
+
logger.info("Cleaning up model cache")
|
| 304 |
+
model_cache.cleanup()
|
| 305 |
+
|
| 306 |
# Save individual slide results
|
| 307 |
for idx, (slide_mask, slide_name) in enumerate(all_slide_masks):
|
| 308 |
mask_path = output_dir / f"{slide_name}_mask.png"
|
|
|
|
| 319 |
if all_paladin_results:
|
| 320 |
combined_paladin = pd.concat(all_paladin_results, ignore_index=True)
|
| 321 |
for slide_name in combined_paladin["Slide"].unique():
|
| 322 |
+
slide_paladin = combined_paladin[
|
| 323 |
+
combined_paladin["Slide"] == slide_name
|
| 324 |
+
]
|
| 325 |
paladin_output_path = output_dir / f"{slide_name}_paladin_results.csv"
|
| 326 |
slide_paladin.to_csv(paladin_output_path, index=False)
|
| 327 |
logger.info(f"Saved Paladin results to {paladin_output_path}")
|
src/mosaic/inference/aeon.py
CHANGED
|
@@ -80,8 +80,12 @@ def run_with_model(
|
|
| 80 |
target_dict = json.loads(target_dict_str)
|
| 81 |
|
| 82 |
histologies = target_dict["histologies"]
|
| 83 |
-
INT_TO_CANCER_TYPE_MAP_LOCAL = {
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# Calculate col_indices_to_drop using local mapping
|
| 87 |
col_indices_to_drop_local = [
|
|
@@ -100,7 +104,9 @@ def run_with_model(
|
|
| 100 |
tissue_site_idx=tissue_site_idx,
|
| 101 |
n_max_tiles=20000,
|
| 102 |
)
|
| 103 |
-
dataloader = DataLoader(
|
|
|
|
|
|
|
| 104 |
|
| 105 |
results = []
|
| 106 |
batch = next(iter(dataloader))
|
|
@@ -140,8 +146,14 @@ def run_with_model(
|
|
| 140 |
|
| 141 |
|
| 142 |
def run(
|
| 143 |
-
features,
|
| 144 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
):
|
| 146 |
"""Run Aeon model inference for cancer subtype prediction.
|
| 147 |
|
|
@@ -176,12 +188,20 @@ def run(
|
|
| 176 |
target_dict_str = f.read().strip().replace("'", '"')
|
| 177 |
target_dict = json.loads(target_dict_str)
|
| 178 |
|
| 179 |
-
histologies = target_dict[
|
| 180 |
-
INT_TO_CANCER_TYPE_MAP_LOCAL = {
|
| 181 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 182 |
|
| 183 |
# Calculate col_indices_to_drop using local mapping
|
| 184 |
-
col_indices_to_drop_local = [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
|
| 187 |
|
|
@@ -306,7 +326,9 @@ def main():
|
|
| 306 |
tissue_site_idx = None
|
| 307 |
if opt.tissue_site:
|
| 308 |
tissue_site_idx = encode_tissue_site(opt.tissue_site)
|
| 309 |
-
logger.info(
|
|
|
|
|
|
|
| 310 |
|
| 311 |
results_df, part_embedding = run(
|
| 312 |
features=features,
|
|
|
|
| 80 |
target_dict = json.loads(target_dict_str)
|
| 81 |
|
| 82 |
histologies = target_dict["histologies"]
|
| 83 |
+
INT_TO_CANCER_TYPE_MAP_LOCAL = {
|
| 84 |
+
i: histology for i, histology in enumerate(histologies)
|
| 85 |
+
}
|
| 86 |
+
CANCER_TYPE_TO_INT_MAP_LOCAL = {
|
| 87 |
+
v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()
|
| 88 |
+
}
|
| 89 |
|
| 90 |
# Calculate col_indices_to_drop using local mapping
|
| 91 |
col_indices_to_drop_local = [
|
|
|
|
| 104 |
tissue_site_idx=tissue_site_idx,
|
| 105 |
n_max_tiles=20000,
|
| 106 |
)
|
| 107 |
+
dataloader = DataLoader(
|
| 108 |
+
dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers
|
| 109 |
+
)
|
| 110 |
|
| 111 |
results = []
|
| 112 |
batch = next(iter(dataloader))
|
|
|
|
| 146 |
|
| 147 |
|
| 148 |
def run(
|
| 149 |
+
features,
|
| 150 |
+
model_path,
|
| 151 |
+
metastatic=False,
|
| 152 |
+
batch_size=8,
|
| 153 |
+
num_workers=8,
|
| 154 |
+
use_cpu=False,
|
| 155 |
+
sex=None,
|
| 156 |
+
tissue_site_idx=None,
|
| 157 |
):
|
| 158 |
"""Run Aeon model inference for cancer subtype prediction.
|
| 159 |
|
|
|
|
| 188 |
target_dict_str = f.read().strip().replace("'", '"')
|
| 189 |
target_dict = json.loads(target_dict_str)
|
| 190 |
|
| 191 |
+
histologies = target_dict["histologies"]
|
| 192 |
+
INT_TO_CANCER_TYPE_MAP_LOCAL = {
|
| 193 |
+
i: histology for i, histology in enumerate(histologies)
|
| 194 |
+
}
|
| 195 |
+
CANCER_TYPE_TO_INT_MAP_LOCAL = {
|
| 196 |
+
v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()
|
| 197 |
+
}
|
| 198 |
|
| 199 |
# Calculate col_indices_to_drop using local mapping
|
| 200 |
+
col_indices_to_drop_local = [
|
| 201 |
+
CANCER_TYPE_TO_INT_MAP_LOCAL[x]
|
| 202 |
+
for x in CANCER_TYPES_TO_DROP
|
| 203 |
+
if x in CANCER_TYPE_TO_INT_MAP_LOCAL
|
| 204 |
+
]
|
| 205 |
|
| 206 |
site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
|
| 207 |
|
|
|
|
| 326 |
tissue_site_idx = None
|
| 327 |
if opt.tissue_site:
|
| 328 |
tissue_site_idx = encode_tissue_site(opt.tissue_site)
|
| 329 |
+
logger.info(
|
| 330 |
+
f"Using tissue site: {opt.tissue_site} (encoded as {tissue_site_idx})"
|
| 331 |
+
)
|
| 332 |
|
| 333 |
results_df, part_embedding = run(
|
| 334 |
features=features,
|
src/mosaic/inference/data.py
CHANGED
|
@@ -212,10 +212,10 @@ DEFAULT_TISSUE_SITE_IDX = 8
|
|
| 212 |
|
| 213 |
def get_tissue_site_map():
|
| 214 |
"""Load tissue site name → index mapping from CSV.
|
| 215 |
-
|
| 216 |
Returns:
|
| 217 |
dict: Mapping of tissue site names to indices (0-56)
|
| 218 |
-
|
| 219 |
Raises:
|
| 220 |
FileNotFoundError: If the tissue site CSV file is not found
|
| 221 |
"""
|
|
@@ -232,17 +232,17 @@ def get_tissue_site_map():
|
|
| 232 |
f"Tissue site mapping file not found at {csv_path}. "
|
| 233 |
f"Please ensure the data directory contains 'tissue_site_original_to_idx.csv'."
|
| 234 |
) from e
|
| 235 |
-
|
| 236 |
_TISSUE_SITE_MAP = {}
|
| 237 |
for _, row in df.iterrows():
|
| 238 |
-
_TISSUE_SITE_MAP[row[
|
| 239 |
-
|
| 240 |
return _TISSUE_SITE_MAP
|
| 241 |
|
| 242 |
|
| 243 |
def get_tissue_site_options():
|
| 244 |
"""Get sorted unique tissue site names for UI dropdowns.
|
| 245 |
-
|
| 246 |
Returns:
|
| 247 |
list: Sorted list of unique tissue site names
|
| 248 |
"""
|
|
@@ -258,7 +258,7 @@ def get_sex_map():
|
|
| 258 |
|
| 259 |
Returns:
|
| 260 |
dict: Mapping of sex values to indices (0-2)
|
| 261 |
-
|
| 262 |
Raises:
|
| 263 |
FileNotFoundError: If the sex mapping CSV file is not found
|
| 264 |
"""
|
|
@@ -278,7 +278,7 @@ def get_sex_map():
|
|
| 278 |
|
| 279 |
_SEX_MAP = {}
|
| 280 |
for _, row in df.iterrows():
|
| 281 |
-
_SEX_MAP[row[
|
| 282 |
|
| 283 |
return _SEX_MAP
|
| 284 |
|
|
@@ -299,10 +299,10 @@ def encode_sex(sex):
|
|
| 299 |
|
| 300 |
def encode_tissue_site(site_name):
|
| 301 |
"""Convert tissue site name to index (0-56).
|
| 302 |
-
|
| 303 |
Args:
|
| 304 |
site_name: Tissue site name from CSV
|
| 305 |
-
|
| 306 |
Returns:
|
| 307 |
int: Tissue site index, defaults to DEFAULT_TISSUE_SITE_IDX ("Not Applicable")
|
| 308 |
"""
|
|
@@ -312,11 +312,11 @@ def encode_tissue_site(site_name):
|
|
| 312 |
|
| 313 |
def tissue_site_to_one_hot(site_idx, num_classes=57):
|
| 314 |
"""Convert tissue site index to one-hot vector.
|
| 315 |
-
|
| 316 |
Args:
|
| 317 |
site_idx: Index value (0-56 for tissue site, 0-2 for sex)
|
| 318 |
num_classes: Number of classes (57 for tissue site, 3 for sex)
|
| 319 |
-
|
| 320 |
Returns:
|
| 321 |
list: One-hot encoded vector
|
| 322 |
"""
|
|
@@ -395,22 +395,18 @@ class TileFeatureTensorDataset(Dataset):
|
|
| 395 |
Returns:
|
| 396 |
dict: the item
|
| 397 |
"""
|
| 398 |
-
result = {
|
| 399 |
-
|
| 400 |
-
"tile_tensor": self.features
|
| 401 |
-
}
|
| 402 |
-
|
| 403 |
# Add sex and tissue_site if provided (for Aeon)
|
| 404 |
if self.sex is not None:
|
| 405 |
result["SEX"] = torch.tensor(
|
| 406 |
-
tissue_site_to_one_hot(self.sex, num_classes=3),
|
| 407 |
-
dtype=torch.float32
|
| 408 |
)
|
| 409 |
-
|
| 410 |
if self.tissue_site_idx is not None:
|
| 411 |
result["TISSUE_SITE"] = torch.tensor(
|
| 412 |
tissue_site_to_one_hot(self.tissue_site_idx, num_classes=57),
|
| 413 |
-
dtype=torch.float32
|
| 414 |
)
|
| 415 |
-
|
| 416 |
return result
|
|
|
|
| 212 |
|
| 213 |
def get_tissue_site_map():
|
| 214 |
"""Load tissue site name → index mapping from CSV.
|
| 215 |
+
|
| 216 |
Returns:
|
| 217 |
dict: Mapping of tissue site names to indices (0-56)
|
| 218 |
+
|
| 219 |
Raises:
|
| 220 |
FileNotFoundError: If the tissue site CSV file is not found
|
| 221 |
"""
|
|
|
|
| 232 |
f"Tissue site mapping file not found at {csv_path}. "
|
| 233 |
f"Please ensure the data directory contains 'tissue_site_original_to_idx.csv'."
|
| 234 |
) from e
|
| 235 |
+
|
| 236 |
_TISSUE_SITE_MAP = {}
|
| 237 |
for _, row in df.iterrows():
|
| 238 |
+
_TISSUE_SITE_MAP[row["TISSUE_SITE"]] = int(row["idx"])
|
| 239 |
+
|
| 240 |
return _TISSUE_SITE_MAP
|
| 241 |
|
| 242 |
|
| 243 |
def get_tissue_site_options():
|
| 244 |
"""Get sorted unique tissue site names for UI dropdowns.
|
| 245 |
+
|
| 246 |
Returns:
|
| 247 |
list: Sorted list of unique tissue site names
|
| 248 |
"""
|
|
|
|
| 258 |
|
| 259 |
Returns:
|
| 260 |
dict: Mapping of sex values to indices (0-2)
|
| 261 |
+
|
| 262 |
Raises:
|
| 263 |
FileNotFoundError: If the sex mapping CSV file is not found
|
| 264 |
"""
|
|
|
|
| 278 |
|
| 279 |
_SEX_MAP = {}
|
| 280 |
for _, row in df.iterrows():
|
| 281 |
+
_SEX_MAP[row["SEX"]] = int(row["idx"])
|
| 282 |
|
| 283 |
return _SEX_MAP
|
| 284 |
|
|
|
|
| 299 |
|
| 300 |
def encode_tissue_site(site_name):
|
| 301 |
"""Convert tissue site name to index (0-56).
|
| 302 |
+
|
| 303 |
Args:
|
| 304 |
site_name: Tissue site name from CSV
|
| 305 |
+
|
| 306 |
Returns:
|
| 307 |
int: Tissue site index, defaults to DEFAULT_TISSUE_SITE_IDX ("Not Applicable")
|
| 308 |
"""
|
|
|
|
| 312 |
|
| 313 |
def tissue_site_to_one_hot(site_idx, num_classes=57):
|
| 314 |
"""Convert tissue site index to one-hot vector.
|
| 315 |
+
|
| 316 |
Args:
|
| 317 |
site_idx: Index value (0-56 for tissue site, 0-2 for sex)
|
| 318 |
num_classes: Number of classes (57 for tissue site, 3 for sex)
|
| 319 |
+
|
| 320 |
Returns:
|
| 321 |
list: One-hot encoded vector
|
| 322 |
"""
|
|
|
|
| 395 |
Returns:
|
| 396 |
dict: the item
|
| 397 |
"""
|
| 398 |
+
result = {"site": self.site_type.value, "tile_tensor": self.features}
|
| 399 |
+
|
|
|
|
|
|
|
|
|
|
| 400 |
# Add sex and tissue_site if provided (for Aeon)
|
| 401 |
if self.sex is not None:
|
| 402 |
result["SEX"] = torch.tensor(
|
| 403 |
+
tissue_site_to_one_hot(self.sex, num_classes=3), dtype=torch.float32
|
|
|
|
| 404 |
)
|
| 405 |
+
|
| 406 |
if self.tissue_site_idx is not None:
|
| 407 |
result["TISSUE_SITE"] = torch.tensor(
|
| 408 |
tissue_site_to_one_hot(self.tissue_site_idx, num_classes=57),
|
| 409 |
+
dtype=torch.float32,
|
| 410 |
)
|
| 411 |
+
|
| 412 |
return result
|
src/mosaic/inference/paladin.py
CHANGED
|
@@ -38,10 +38,10 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
|
|
| 38 |
|
| 39 |
A dict is returned, mapping each cancer_subtype to a table mapping a
|
| 40 |
target to the pathname for the model that predicts it.
|
| 41 |
-
|
| 42 |
Args:
|
| 43 |
model_map_path: Path to the CSV file containing the model map
|
| 44 |
-
|
| 45 |
Returns:
|
| 46 |
Dictionary mapping cancer subtypes to their target-specific models
|
| 47 |
"""
|
|
@@ -58,10 +58,10 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
|
|
| 58 |
|
| 59 |
def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
|
| 60 |
"""Load Aeon output table with cancer subtypes and confidence values.
|
| 61 |
-
|
| 62 |
Args:
|
| 63 |
df: DataFrame with columns 'Cancer Subtype' and 'Confidence'
|
| 64 |
-
|
| 65 |
Returns:
|
| 66 |
Dictionary mapping cancer subtypes to their confidence scores
|
| 67 |
"""
|
|
@@ -75,11 +75,11 @@ def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
|
|
| 75 |
|
| 76 |
def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
|
| 77 |
"""Select the top k cancer subtypes based on Aeon confidence scores.
|
| 78 |
-
|
| 79 |
Args:
|
| 80 |
aeon_scores: Dictionary mapping cancer subtypes to confidence scores
|
| 81 |
k: Number of top subtypes to select (default: 1)
|
| 82 |
-
|
| 83 |
Returns:
|
| 84 |
List of cancer subtype codes sorted by confidence (highest first)
|
| 85 |
"""
|
|
@@ -91,11 +91,11 @@ def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
|
|
| 91 |
|
| 92 |
def select_models(cancer_subtypes: list[str], model_map: dict[Any, Any]) -> list[Any]:
|
| 93 |
"""Select Paladin models for the given cancer subtypes.
|
| 94 |
-
|
| 95 |
Args:
|
| 96 |
cancer_subtypes: List of cancer subtype codes
|
| 97 |
model_map: Dictionary mapping cancer subtypes to their models
|
| 98 |
-
|
| 99 |
Returns:
|
| 100 |
List of tuples (cancer_subtype, target, model_path)
|
| 101 |
"""
|
|
@@ -188,13 +188,13 @@ def run_model(device, dataset, model_path: str, num_workers, batch_size) -> floa
|
|
| 188 |
|
| 189 |
def logits_to_point_estimates(logits):
|
| 190 |
"""Convert model logits to point estimates for beta-binomial distribution.
|
| 191 |
-
|
| 192 |
The logits tensor contains alpha and beta parameters interleaved.
|
| 193 |
This function computes the mean of the beta-binomial distribution: alpha/(alpha+beta).
|
| 194 |
-
|
| 195 |
Args:
|
| 196 |
logits: Tensor of shape (batch_size, 2*(n_tasks)) with alpha/beta parameters
|
| 197 |
-
|
| 198 |
Returns:
|
| 199 |
Tensor of shape (batch_size, n_tasks) with point estimates
|
| 200 |
"""
|
|
@@ -215,10 +215,10 @@ def run(
|
|
| 215 |
use_cpu: bool = False,
|
| 216 |
):
|
| 217 |
"""Run Paladin inference for biomarker prediction on a single slide.
|
| 218 |
-
|
| 219 |
Uses either Aeon predictions or user-provided cancer subtype codes to select
|
| 220 |
the appropriate Paladin models for biomarker prediction.
|
| 221 |
-
|
| 222 |
Args:
|
| 223 |
features: NumPy array of tile features extracted from the WSI
|
| 224 |
aeon_results: DataFrame with Aeon predictions (Cancer Subtype, Confidence)
|
|
@@ -229,10 +229,10 @@ def run(
|
|
| 229 |
batch_size: Batch size for inference
|
| 230 |
num_workers: Number of workers for data loading
|
| 231 |
use_cpu: Force CPU usage instead of GPU
|
| 232 |
-
|
| 233 |
Returns:
|
| 234 |
DataFrame with columns: Cancer Subtype, Target, Score
|
| 235 |
-
|
| 236 |
Note:
|
| 237 |
Either aeon_results or cancer_subtype_codes must be provided, but not both.
|
| 238 |
Either model_map_path or model_path must be provided, but not both.
|
|
|
|
| 38 |
|
| 39 |
A dict is returned, mapping each cancer_subtype to a table mapping a
|
| 40 |
target to the pathname for the model that predicts it.
|
| 41 |
+
|
| 42 |
Args:
|
| 43 |
model_map_path: Path to the CSV file containing the model map
|
| 44 |
+
|
| 45 |
Returns:
|
| 46 |
Dictionary mapping cancer subtypes to their target-specific models
|
| 47 |
"""
|
|
|
|
| 58 |
|
| 59 |
def load_aeon_scores(df: pd.DataFrame) -> dict[str, float]:
|
| 60 |
"""Load Aeon output table with cancer subtypes and confidence values.
|
| 61 |
+
|
| 62 |
Args:
|
| 63 |
df: DataFrame with columns 'Cancer Subtype' and 'Confidence'
|
| 64 |
+
|
| 65 |
Returns:
|
| 66 |
Dictionary mapping cancer subtypes to their confidence scores
|
| 67 |
"""
|
|
|
|
| 75 |
|
| 76 |
def select_cancer_subtypes(aeon_scores: dict[str, float], k=1) -> list[str]:
|
| 77 |
"""Select the top k cancer subtypes based on Aeon confidence scores.
|
| 78 |
+
|
| 79 |
Args:
|
| 80 |
aeon_scores: Dictionary mapping cancer subtypes to confidence scores
|
| 81 |
k: Number of top subtypes to select (default: 1)
|
| 82 |
+
|
| 83 |
Returns:
|
| 84 |
List of cancer subtype codes sorted by confidence (highest first)
|
| 85 |
"""
|
|
|
|
| 91 |
|
| 92 |
def select_models(cancer_subtypes: list[str], model_map: dict[Any, Any]) -> list[Any]:
|
| 93 |
"""Select Paladin models for the given cancer subtypes.
|
| 94 |
+
|
| 95 |
Args:
|
| 96 |
cancer_subtypes: List of cancer subtype codes
|
| 97 |
model_map: Dictionary mapping cancer subtypes to their models
|
| 98 |
+
|
| 99 |
Returns:
|
| 100 |
List of tuples (cancer_subtype, target, model_path)
|
| 101 |
"""
|
|
|
|
| 188 |
|
| 189 |
def logits_to_point_estimates(logits):
|
| 190 |
"""Convert model logits to point estimates for beta-binomial distribution.
|
| 191 |
+
|
| 192 |
The logits tensor contains alpha and beta parameters interleaved.
|
| 193 |
This function computes the mean of the beta-binomial distribution: alpha/(alpha+beta).
|
| 194 |
+
|
| 195 |
Args:
|
| 196 |
logits: Tensor of shape (batch_size, 2*(n_tasks)) with alpha/beta parameters
|
| 197 |
+
|
| 198 |
Returns:
|
| 199 |
Tensor of shape (batch_size, n_tasks) with point estimates
|
| 200 |
"""
|
|
|
|
| 215 |
use_cpu: bool = False,
|
| 216 |
):
|
| 217 |
"""Run Paladin inference for biomarker prediction on a single slide.
|
| 218 |
+
|
| 219 |
Uses either Aeon predictions or user-provided cancer subtype codes to select
|
| 220 |
the appropriate Paladin models for biomarker prediction.
|
| 221 |
+
|
| 222 |
Args:
|
| 223 |
features: NumPy array of tile features extracted from the WSI
|
| 224 |
aeon_results: DataFrame with Aeon predictions (Cancer Subtype, Confidence)
|
|
|
|
| 229 |
batch_size: Batch size for inference
|
| 230 |
num_workers: Number of workers for data loading
|
| 231 |
use_cpu: Force CPU usage instead of GPU
|
| 232 |
+
|
| 233 |
Returns:
|
| 234 |
DataFrame with columns: Cancer Subtype, Target, Score
|
| 235 |
+
|
| 236 |
Note:
|
| 237 |
Either aeon_results or cancer_subtype_codes must be provided, but not both.
|
| 238 |
Either model_map_path or model_path must be provided, but not both.
|
src/mosaic/model_manager.py
CHANGED
|
@@ -13,6 +13,7 @@ import torch
|
|
| 13 |
from loguru import logger
|
| 14 |
|
| 15 |
from mosaic.data_directory import get_data_directory
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
class ModelCache:
|
|
@@ -50,7 +51,9 @@ class ModelCache:
|
|
| 50 |
self.paladin_models: Dict[tuple, torch.nn.Module] = {}
|
| 51 |
self.is_t4_gpu = is_t4_gpu
|
| 52 |
self.aggressive_memory_mgmt = aggressive_memory_mgmt
|
| 53 |
-
self.device = device or torch.device(
|
|
|
|
|
|
|
| 54 |
|
| 55 |
def cleanup_paladin(self):
|
| 56 |
"""Aggressively free all Paladin models from memory.
|
|
@@ -78,15 +81,18 @@ class ModelCache:
|
|
| 78 |
self.cleanup_paladin()
|
| 79 |
|
| 80 |
# Clean up core models
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
self.
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
# Force garbage collection and GPU cache clearing
|
| 92 |
gc.collect()
|
|
@@ -147,7 +153,9 @@ def load_all_models(
|
|
| 147 |
if is_t4_gpu:
|
| 148 |
logger.info(" → Paladin models will be loaded and freed per slide")
|
| 149 |
else:
|
| 150 |
-
logger.info(
|
|
|
|
|
|
|
| 151 |
elif use_gpu and not torch.cuda.is_available():
|
| 152 |
logger.warning("GPU requested but CUDA not available, falling back to CPU")
|
| 153 |
use_gpu = False
|
|
@@ -165,24 +173,37 @@ def load_all_models(
|
|
| 165 |
if not ctranspath_path.exists():
|
| 166 |
raise FileNotFoundError(f"CTransPath model not found at {ctranspath_path}")
|
| 167 |
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
if not optimus_path.exists():
|
| 176 |
-
raise FileNotFoundError(f"Optimus model not found at {optimus_path}")
|
| 177 |
|
| 178 |
-
#
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
# Load Marker Classifier
|
| 182 |
logger.info("Loading Marker Classifier...")
|
| 183 |
marker_classifier_path = data_dir / "marker_classifier.pkl"
|
| 184 |
if not marker_classifier_path.exists():
|
| 185 |
-
raise FileNotFoundError(
|
|
|
|
|
|
|
| 186 |
|
| 187 |
with open(marker_classifier_path, "rb") as f:
|
| 188 |
marker_classifier = pickle.load(f) # nosec
|
|
@@ -238,12 +259,14 @@ def load_paladin_model_for_inference(
|
|
| 238 |
cache: ModelCache,
|
| 239 |
model_path: Path,
|
| 240 |
) -> torch.nn.Module:
|
| 241 |
-
"""Load a single Paladin model for inference.
|
| 242 |
|
| 243 |
Implements adaptive loading strategy:
|
| 244 |
- T4 GPU (aggressive mode): Load model fresh, caller must delete after use
|
| 245 |
- A100 GPU (caching mode): Check cache, load if needed, return cached model
|
| 246 |
|
|
|
|
|
|
|
| 247 |
Args:
|
| 248 |
cache: ModelCache instance managing loaded models
|
| 249 |
model_path: Path to the Paladin model file
|
|
@@ -255,6 +278,8 @@ def load_paladin_model_for_inference(
|
|
| 255 |
On T4 GPUs, caller MUST delete the model and call torch.cuda.empty_cache()
|
| 256 |
after inference to avoid OOM errors.
|
| 257 |
"""
|
|
|
|
|
|
|
| 258 |
model_key = str(model_path)
|
| 259 |
|
| 260 |
# Check cache first (only used in non-aggressive mode)
|
|
@@ -262,11 +287,32 @@ def load_paladin_model_for_inference(
|
|
| 262 |
logger.info(f" ✓ Using CACHED Paladin model: {model_path.name} (no disk I/O!)")
|
| 263 |
return cache.paladin_models[model_key]
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
# Load model from disk
|
| 266 |
if cache.aggressive_memory_mgmt:
|
| 267 |
-
logger.info(
|
|
|
|
|
|
|
| 268 |
else:
|
| 269 |
-
logger.info(
|
|
|
|
|
|
|
| 270 |
|
| 271 |
with open(model_path, "rb") as f:
|
| 272 |
model = pickle.load(f) # nosec
|
|
|
|
| 13 |
from loguru import logger
|
| 14 |
|
| 15 |
from mosaic.data_directory import get_data_directory
|
| 16 |
+
from mussel.models import ModelType, get_model_factory
|
| 17 |
|
| 18 |
|
| 19 |
class ModelCache:
|
|
|
|
| 51 |
self.paladin_models: Dict[tuple, torch.nn.Module] = {}
|
| 52 |
self.is_t4_gpu = is_t4_gpu
|
| 53 |
self.aggressive_memory_mgmt = aggressive_memory_mgmt
|
| 54 |
+
self.device = device or torch.device(
|
| 55 |
+
"cuda" if torch.cuda.is_available() else "cpu"
|
| 56 |
+
)
|
| 57 |
|
| 58 |
def cleanup_paladin(self):
|
| 59 |
"""Aggressively free all Paladin models from memory.
|
|
|
|
| 81 |
self.cleanup_paladin()
|
| 82 |
|
| 83 |
# Clean up core models
|
| 84 |
+
if self.ctranspath_model is not None:
|
| 85 |
+
del self.ctranspath_model
|
| 86 |
+
self.ctranspath_model = None
|
| 87 |
+
if self.optimus_model is not None:
|
| 88 |
+
del self.optimus_model
|
| 89 |
+
self.optimus_model = None
|
| 90 |
+
if self.marker_classifier is not None:
|
| 91 |
+
del self.marker_classifier
|
| 92 |
+
self.marker_classifier = None
|
| 93 |
+
if self.aeon_model is not None:
|
| 94 |
+
del self.aeon_model
|
| 95 |
+
self.aeon_model = None
|
| 96 |
|
| 97 |
# Force garbage collection and GPU cache clearing
|
| 98 |
gc.collect()
|
|
|
|
| 153 |
if is_t4_gpu:
|
| 154 |
logger.info(" → Paladin models will be loaded and freed per slide")
|
| 155 |
else:
|
| 156 |
+
logger.info(
|
| 157 |
+
" → Paladin models will be cached and reused across slides"
|
| 158 |
+
)
|
| 159 |
elif use_gpu and not torch.cuda.is_available():
|
| 160 |
logger.warning("GPU requested but CUDA not available, falling back to CPU")
|
| 161 |
use_gpu = False
|
|
|
|
| 173 |
if not ctranspath_path.exists():
|
| 174 |
raise FileNotFoundError(f"CTransPath model not found at {ctranspath_path}")
|
| 175 |
|
| 176 |
+
ctranspath_factory = get_model_factory(ModelType.CTRANSPATH)
|
| 177 |
+
ctranspath_model = ctranspath_factory.get_model(
|
| 178 |
+
str(ctranspath_path), use_gpu=use_gpu, gpu_device_id=0 if use_gpu else None
|
| 179 |
+
)
|
| 180 |
+
logger.info("✓ CTransPath model loaded")
|
| 181 |
|
| 182 |
+
if use_gpu and torch.cuda.is_available():
|
| 183 |
+
mem = torch.cuda.memory_allocated() / (1024**3)
|
| 184 |
+
logger.info(f" GPU memory: {mem:.2f} GB")
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
# Load Optimus model from Hugging Face Hub
|
| 187 |
+
logger.info("Loading Optimus model from bioptimus/H-optimus-0...")
|
| 188 |
+
optimus_factory = get_model_factory(ModelType.OPTIMUS)
|
| 189 |
+
optimus_model = optimus_factory.get_model(
|
| 190 |
+
model_path="hf-hub:bioptimus/H-optimus-0",
|
| 191 |
+
use_gpu=use_gpu,
|
| 192 |
+
gpu_device_id=0 if use_gpu else None,
|
| 193 |
+
)
|
| 194 |
+
logger.info("✓ Optimus model loaded")
|
| 195 |
+
|
| 196 |
+
if use_gpu and torch.cuda.is_available():
|
| 197 |
+
mem = torch.cuda.memory_allocated() / (1024**3)
|
| 198 |
+
logger.info(f" GPU memory: {mem:.2f} GB")
|
| 199 |
|
| 200 |
# Load Marker Classifier
|
| 201 |
logger.info("Loading Marker Classifier...")
|
| 202 |
marker_classifier_path = data_dir / "marker_classifier.pkl"
|
| 203 |
if not marker_classifier_path.exists():
|
| 204 |
+
raise FileNotFoundError(
|
| 205 |
+
f"Marker classifier not found at {marker_classifier_path}"
|
| 206 |
+
)
|
| 207 |
|
| 208 |
with open(marker_classifier_path, "rb") as f:
|
| 209 |
marker_classifier = pickle.load(f) # nosec
|
|
|
|
| 259 |
cache: ModelCache,
|
| 260 |
model_path: Path,
|
| 261 |
) -> torch.nn.Module:
|
| 262 |
+
"""Load a single Paladin model for inference, downloading on-demand if needed.
|
| 263 |
|
| 264 |
Implements adaptive loading strategy:
|
| 265 |
- T4 GPU (aggressive mode): Load model fresh, caller must delete after use
|
| 266 |
- A100 GPU (caching mode): Check cache, load if needed, return cached model
|
| 267 |
|
| 268 |
+
If the model file doesn't exist locally, downloads it from HuggingFace Hub.
|
| 269 |
+
|
| 270 |
Args:
|
| 271 |
cache: ModelCache instance managing loaded models
|
| 272 |
model_path: Path to the Paladin model file
|
|
|
|
| 278 |
On T4 GPUs, caller MUST delete the model and call torch.cuda.empty_cache()
|
| 279 |
after inference to avoid OOM errors.
|
| 280 |
"""
|
| 281 |
+
from huggingface_hub import hf_hub_download
|
| 282 |
+
|
| 283 |
model_key = str(model_path)
|
| 284 |
|
| 285 |
# Check cache first (only used in non-aggressive mode)
|
|
|
|
| 287 |
logger.info(f" ✓ Using CACHED Paladin model: {model_path.name} (no disk I/O!)")
|
| 288 |
return cache.paladin_models[model_key]
|
| 289 |
|
| 290 |
+
# Download model from HF Hub if it doesn't exist locally
|
| 291 |
+
if not model_path.exists():
|
| 292 |
+
logger.info(
|
| 293 |
+
f" ⬇ Downloading Paladin model from HuggingFace Hub: {model_path.name}"
|
| 294 |
+
)
|
| 295 |
+
# Extract the relative path from the data directory
|
| 296 |
+
data_dir = get_data_directory()
|
| 297 |
+
relative_path = model_path.relative_to(data_dir)
|
| 298 |
+
|
| 299 |
+
downloaded_path = hf_hub_download(
|
| 300 |
+
repo_id="PDM-Group/paladin-aeon-models",
|
| 301 |
+
filename=str(relative_path),
|
| 302 |
+
cache_dir=data_dir.parent.parent, # Use HF cache directory
|
| 303 |
+
)
|
| 304 |
+
model_path = Path(downloaded_path)
|
| 305 |
+
logger.info(f" ✓ Downloaded to: {model_path}")
|
| 306 |
+
|
| 307 |
# Load model from disk
|
| 308 |
if cache.aggressive_memory_mgmt:
|
| 309 |
+
logger.info(
|
| 310 |
+
f" → Loading Paladin model: {model_path.name} (will free after use)"
|
| 311 |
+
)
|
| 312 |
else:
|
| 313 |
+
logger.info(
|
| 314 |
+
f" → Loading Paladin model: {model_path.name} (will cache for reuse)"
|
| 315 |
+
)
|
| 316 |
|
| 317 |
with open(model_path, "rb") as f:
|
| 318 |
model = pickle.load(f) # nosec
|
src/mosaic/ui/app.py
CHANGED
|
@@ -24,7 +24,7 @@ from mosaic.ui.utils import (
|
|
| 24 |
SETTINGS_COLUMNS,
|
| 25 |
)
|
| 26 |
from mosaic.analysis import analyze_slide
|
| 27 |
-
from mosaic.
|
| 28 |
|
| 29 |
current_dir = Path(__file__).parent.parent
|
| 30 |
|
|
@@ -45,6 +45,12 @@ def set_cancer_subtype_maps(csn_map, rcsn_map, cs):
|
|
| 45 |
def analyze_slides(
|
| 46 |
slides,
|
| 47 |
settings_input,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
user_dir,
|
| 49 |
progress=gr.Progress(track_tqdm=True),
|
| 50 |
request: gr.Request = None,
|
|
@@ -52,61 +58,112 @@ def analyze_slides(
|
|
| 52 |
if slides is None or len(slides) == 0:
|
| 53 |
raise gr.Error("Please upload at least one slide.")
|
| 54 |
if user_dir is None:
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
settings_input = validate_settings(
|
| 57 |
-
settings_input,
|
|
|
|
|
|
|
|
|
|
| 58 |
)
|
| 59 |
if len(slides) != len(settings_input):
|
| 60 |
raise gr.Error("Missing settings for uploaded slides")
|
| 61 |
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
if len(slides) > 1:
|
| 65 |
-
logger.info(f"
|
| 66 |
-
progress(0.0, desc=f"
|
| 67 |
-
|
| 68 |
-
all_slide_masks, all_aeon_results, all_paladin_results = analyze_slides_batch(
|
| 69 |
-
slides=slides,
|
| 70 |
-
settings_df=settings_input,
|
| 71 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 72 |
-
num_workers=4,
|
| 73 |
-
aggressive_memory_mgmt=None, # Auto-detect GPU type
|
| 74 |
-
progress=progress,
|
| 75 |
-
)
|
| 76 |
else:
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
)
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
progress(0.99, desc="Analysis complete, wrapping up results")
|
| 112 |
|
|
@@ -155,7 +212,8 @@ def analyze_slides(
|
|
| 155 |
|
| 156 |
progress(1.0, desc="All done!")
|
| 157 |
|
| 158 |
-
|
|
|
|
| 159 |
all_slide_masks,
|
| 160 |
combined_aeon_results,
|
| 161 |
aeon_output,
|
|
@@ -273,17 +331,20 @@ def launch_gradio(server_name, server_port, share):
|
|
| 273 |
)
|
| 274 |
def clear_fn():
|
| 275 |
return (
|
| 276 |
-
None,
|
| 277 |
-
None,
|
| 278 |
-
None,
|
| 279 |
-
|
| 280 |
-
gr.Dataframe(visible=False),
|
| 281 |
-
gr.DownloadButton(visible=False),
|
| 282 |
-
gr.Dataframe(visible=False),
|
| 283 |
-
gr.File(visible=False),
|
| 284 |
)
|
| 285 |
|
| 286 |
-
def get_settings(
|
|
|
|
|
|
|
|
|
|
| 287 |
if files is None:
|
| 288 |
return pd.DataFrame()
|
| 289 |
settings = []
|
|
@@ -291,22 +352,30 @@ def launch_gradio(server_name, server_port, share):
|
|
| 291 |
filename = file.name if hasattr(file, "name") else file
|
| 292 |
slide_name = filename.split("/")[-1]
|
| 293 |
settings.append(
|
| 294 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
)
|
| 296 |
df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
|
| 297 |
return df
|
| 298 |
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
inputs=[
|
| 311 |
input_slides,
|
| 312 |
site_dropdown,
|
|
@@ -318,22 +387,103 @@ def launch_gradio(server_name, server_port, share):
|
|
| 318 |
],
|
| 319 |
outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
|
| 320 |
)
|
| 321 |
-
def
|
|
|
|
|
|
|
|
|
|
| 322 |
has_ihc = "Breast" in cancer_subtype
|
| 323 |
if not files:
|
| 324 |
return None, None, gr.Dropdown(visible=has_ihc)
|
| 325 |
settings_df = get_settings(
|
| 326 |
-
files,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
)
|
| 328 |
if settings_df is not None:
|
| 329 |
has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
|
| 330 |
visible = files and len(files) > 1
|
| 331 |
return (
|
| 332 |
-
gr.Dataframe(settings_df, visible=visible),
|
| 333 |
gr.File(visible=visible),
|
| 334 |
gr.Dropdown(visible=has_ihc),
|
| 335 |
)
|
| 336 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
@settings_csv.upload(
|
| 338 |
inputs=[settings_csv],
|
| 339 |
outputs=[settings_input],
|
|
@@ -349,6 +499,12 @@ def launch_gradio(server_name, server_port, share):
|
|
| 349 |
inputs=[
|
| 350 |
input_slides,
|
| 351 |
settings_input,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 352 |
user_dir_state,
|
| 353 |
],
|
| 354 |
outputs=[
|
|
@@ -363,9 +519,14 @@ def launch_gradio(server_name, server_port, share):
|
|
| 363 |
show_progress_on=paladin_output_table,
|
| 364 |
)
|
| 365 |
settings_input.change(
|
| 366 |
-
lambda df: validate_settings(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
inputs=[settings_input],
|
| 368 |
-
outputs=[settings_input]
|
| 369 |
)
|
| 370 |
demo.load(
|
| 371 |
create_user_directory,
|
|
|
|
| 24 |
SETTINGS_COLUMNS,
|
| 25 |
)
|
| 26 |
from mosaic.analysis import analyze_slide
|
| 27 |
+
from mosaic.model_manager import load_all_models
|
| 28 |
|
| 29 |
current_dir = Path(__file__).parent.parent
|
| 30 |
|
|
|
|
| 45 |
def analyze_slides(
|
| 46 |
slides,
|
| 47 |
settings_input,
|
| 48 |
+
site_type,
|
| 49 |
+
sex,
|
| 50 |
+
tissue_site,
|
| 51 |
+
cancer_subtype,
|
| 52 |
+
ihc_subtype,
|
| 53 |
+
seg_config,
|
| 54 |
user_dir,
|
| 55 |
progress=gr.Progress(track_tqdm=True),
|
| 56 |
request: gr.Request = None,
|
|
|
|
| 58 |
if slides is None or len(slides) == 0:
|
| 59 |
raise gr.Error("Please upload at least one slide.")
|
| 60 |
if user_dir is None:
|
| 61 |
+
if request is not None:
|
| 62 |
+
user_dir = create_user_directory(None, request)
|
| 63 |
+
if user_dir is None:
|
| 64 |
+
# Fallback to temp directory if session hash not available
|
| 65 |
+
import tempfile
|
| 66 |
+
|
| 67 |
+
user_dir = Path(tempfile.mkdtemp(prefix="mosaic_"))
|
| 68 |
+
|
| 69 |
+
# Handle empty settings_input (e.g., when dataframe is hidden for single slide)
|
| 70 |
+
# Regenerate settings from dropdowns if settings_input is empty
|
| 71 |
+
if settings_input is None or len(settings_input) == 0:
|
| 72 |
+
logger.info("Settings dataframe is empty, regenerating from dropdown values")
|
| 73 |
+
settings = []
|
| 74 |
+
for file in slides:
|
| 75 |
+
filename = file.name if hasattr(file, "name") else file
|
| 76 |
+
slide_name = filename.split("/")[-1]
|
| 77 |
+
settings.append(
|
| 78 |
+
[
|
| 79 |
+
slide_name,
|
| 80 |
+
site_type,
|
| 81 |
+
sex,
|
| 82 |
+
tissue_site,
|
| 83 |
+
cancer_subtype,
|
| 84 |
+
ihc_subtype,
|
| 85 |
+
seg_config,
|
| 86 |
+
]
|
| 87 |
+
)
|
| 88 |
+
settings_input = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
|
| 89 |
+
|
| 90 |
settings_input = validate_settings(
|
| 91 |
+
settings_input,
|
| 92 |
+
cancer_subtype_name_map,
|
| 93 |
+
cancer_subtypes,
|
| 94 |
+
reversed_cancer_subtype_name_map,
|
| 95 |
)
|
| 96 |
if len(slides) != len(settings_input):
|
| 97 |
raise gr.Error("Missing settings for uploaded slides")
|
| 98 |
|
| 99 |
+
all_slide_masks = []
|
| 100 |
+
all_aeon_results = []
|
| 101 |
+
all_paladin_results = []
|
| 102 |
+
|
| 103 |
+
# Load models once (for batch) or per-slide (for single)
|
| 104 |
+
model_cache = None
|
| 105 |
if len(slides) > 1:
|
| 106 |
+
logger.info(f"Batch mode: Loading models once for {len(slides)} slides")
|
| 107 |
+
progress(0.0, desc=f"Loading models for batch processing")
|
| 108 |
+
model_cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
else:
|
| 110 |
+
logger.info("Single-slide mode: models loaded within analyze_slide")
|
| 111 |
+
|
| 112 |
+
try:
|
| 113 |
+
# Process all slides with unified analyze_slide function
|
| 114 |
+
for idx, slide_path in enumerate(slides):
|
| 115 |
+
row = settings_input.iloc[idx]
|
| 116 |
+
slide_name = row["Slide"]
|
| 117 |
+
|
| 118 |
+
logger.info(f"[{idx + 1}/{len(slides)}] Processing: {slide_name}")
|
| 119 |
+
slide_progress = idx / len(slides)
|
| 120 |
+
progress(slide_progress, desc=f"Analyzing slide {idx + 1}/{len(slides)}")
|
| 121 |
+
|
| 122 |
+
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 123 |
+
slide_path=slide_path,
|
| 124 |
+
seg_config=row["Segmentation Config"],
|
| 125 |
+
site_type=row["Site Type"],
|
| 126 |
+
sex=row.get("Sex", "Unknown"),
|
| 127 |
+
tissue_site=row.get("Tissue Site", "Unknown"),
|
| 128 |
+
cancer_subtype=row["Cancer Subtype"],
|
| 129 |
+
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 130 |
+
ihc_subtype=row.get("IHC Subtype", ""),
|
| 131 |
+
num_workers=4,
|
| 132 |
+
progress=progress,
|
| 133 |
+
request=request,
|
| 134 |
+
model_cache=model_cache, # Pre-loaded for batch, None for single
|
| 135 |
+
)
|
| 136 |
|
| 137 |
+
if slide_mask is not None:
|
| 138 |
+
all_slide_masks.append((slide_mask, slide_name))
|
| 139 |
+
if aeon_results is not None:
|
| 140 |
+
all_aeon_results.append(aeon_results)
|
| 141 |
+
if paladin_results is not None:
|
| 142 |
+
paladin_results.insert(
|
| 143 |
+
0, "Slide", pd.Series([slide_name] * len(paladin_results))
|
| 144 |
+
)
|
| 145 |
+
all_paladin_results.append(paladin_results)
|
| 146 |
+
|
| 147 |
+
# Yield intermediate update to show slide masks as they're generated
|
| 148 |
+
# This allows the UI to update incrementally during processing
|
| 149 |
+
yield (
|
| 150 |
+
all_slide_masks.copy(), # Current slide masks
|
| 151 |
+
gr.DataFrame(visible=False), # aeon_output_table (not ready yet)
|
| 152 |
+
gr.DownloadButton(
|
| 153 |
+
visible=False
|
| 154 |
+
), # aeon_download_button (not ready yet)
|
| 155 |
+
None, # paladin_output_table (not ready yet)
|
| 156 |
+
gr.DownloadButton(
|
| 157 |
+
visible=False
|
| 158 |
+
), # paladin_download_button (not ready yet)
|
| 159 |
+
user_dir, # user_dir_state
|
| 160 |
)
|
| 161 |
+
|
| 162 |
+
finally:
|
| 163 |
+
# Clean up model cache if it was loaded for batch processing
|
| 164 |
+
if model_cache is not None:
|
| 165 |
+
logger.info("Cleaning up model cache")
|
| 166 |
+
model_cache.cleanup()
|
| 167 |
|
| 168 |
progress(0.99, desc="Analysis complete, wrapping up results")
|
| 169 |
|
|
|
|
| 212 |
|
| 213 |
progress(1.0, desc="All done!")
|
| 214 |
|
| 215 |
+
# Final yield with complete results
|
| 216 |
+
yield (
|
| 217 |
all_slide_masks,
|
| 218 |
combined_aeon_results,
|
| 219 |
aeon_output,
|
|
|
|
| 331 |
)
|
| 332 |
def clear_fn():
|
| 333 |
return (
|
| 334 |
+
None, # input_slides
|
| 335 |
+
None, # slide_masks
|
| 336 |
+
None, # paladin_output_table
|
| 337 |
+
gr.DownloadButton(visible=False), # paladin_download_button
|
| 338 |
+
gr.Dataframe(visible=False), # aeon_output_table
|
| 339 |
+
gr.DownloadButton(visible=False), # aeon_download_button
|
| 340 |
+
gr.Dataframe(visible=False), # settings_input
|
| 341 |
+
gr.File(visible=False), # settings_csv
|
| 342 |
)
|
| 343 |
|
| 344 |
+
def get_settings(
|
| 345 |
+
files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
|
| 346 |
+
):
|
| 347 |
+
"""Generate initial settings DataFrame from uploaded files and dropdown values."""
|
| 348 |
if files is None:
|
| 349 |
return pd.DataFrame()
|
| 350 |
settings = []
|
|
|
|
| 352 |
filename = file.name if hasattr(file, "name") else file
|
| 353 |
slide_name = filename.split("/")[-1]
|
| 354 |
settings.append(
|
| 355 |
+
[
|
| 356 |
+
slide_name,
|
| 357 |
+
site_type,
|
| 358 |
+
sex,
|
| 359 |
+
tissue_site,
|
| 360 |
+
cancer_subtype,
|
| 361 |
+
ihc_subtype,
|
| 362 |
+
seg_config,
|
| 363 |
+
]
|
| 364 |
)
|
| 365 |
df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
|
| 366 |
return df
|
| 367 |
|
| 368 |
+
def update_settings_column(settings_df, column_name, new_value):
|
| 369 |
+
"""Update a specific column in the settings DataFrame."""
|
| 370 |
+
if settings_df is None or len(settings_df) == 0:
|
| 371 |
+
return settings_df
|
| 372 |
+
# Create a copy to avoid modifying the original
|
| 373 |
+
updated_df = settings_df.copy()
|
| 374 |
+
updated_df[column_name] = new_value
|
| 375 |
+
return updated_df
|
| 376 |
+
|
| 377 |
+
# Handle file uploads - regenerate entire settings table
|
| 378 |
+
@input_slides.change(
|
| 379 |
inputs=[
|
| 380 |
input_slides,
|
| 381 |
site_dropdown,
|
|
|
|
| 387 |
],
|
| 388 |
outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
|
| 389 |
)
|
| 390 |
+
def update_files(
|
| 391 |
+
files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
|
| 392 |
+
):
|
| 393 |
+
"""Handle file upload - regenerate settings table from scratch."""
|
| 394 |
has_ihc = "Breast" in cancer_subtype
|
| 395 |
if not files:
|
| 396 |
return None, None, gr.Dropdown(visible=has_ihc)
|
| 397 |
settings_df = get_settings(
|
| 398 |
+
files,
|
| 399 |
+
site_type,
|
| 400 |
+
sex,
|
| 401 |
+
tissue_site,
|
| 402 |
+
cancer_subtype,
|
| 403 |
+
ihc_subtype,
|
| 404 |
+
seg_config,
|
| 405 |
)
|
| 406 |
if settings_df is not None:
|
| 407 |
has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])
|
| 408 |
visible = files and len(files) > 1
|
| 409 |
return (
|
| 410 |
+
gr.Dataframe(value=settings_df, visible=visible),
|
| 411 |
gr.File(visible=visible),
|
| 412 |
gr.Dropdown(visible=has_ihc),
|
| 413 |
)
|
| 414 |
|
| 415 |
+
# Handle individual dropdown changes - only update the relevant column
|
| 416 |
+
@site_dropdown.change(
|
| 417 |
+
inputs=[settings_input, site_dropdown],
|
| 418 |
+
outputs=[settings_input],
|
| 419 |
+
)
|
| 420 |
+
def update_site_type(settings_df, site_type):
|
| 421 |
+
"""Update Site Type column when dropdown changes."""
|
| 422 |
+
if settings_df is None or len(settings_df) == 0:
|
| 423 |
+
return settings_df
|
| 424 |
+
updated_df = update_settings_column(settings_df, "Site Type", site_type)
|
| 425 |
+
return gr.Dataframe(value=updated_df)
|
| 426 |
+
|
| 427 |
+
@sex_dropdown.change(
|
| 428 |
+
inputs=[settings_input, sex_dropdown],
|
| 429 |
+
outputs=[settings_input],
|
| 430 |
+
)
|
| 431 |
+
def update_sex(settings_df, sex):
|
| 432 |
+
"""Update Sex column when dropdown changes."""
|
| 433 |
+
if settings_df is None or len(settings_df) == 0:
|
| 434 |
+
return settings_df
|
| 435 |
+
updated_df = update_settings_column(settings_df, "Sex", sex)
|
| 436 |
+
return gr.Dataframe(value=updated_df)
|
| 437 |
+
|
| 438 |
+
@tissue_site_dropdown.change(
|
| 439 |
+
inputs=[settings_input, tissue_site_dropdown],
|
| 440 |
+
outputs=[settings_input],
|
| 441 |
+
)
|
| 442 |
+
def update_tissue_site(settings_df, tissue_site):
|
| 443 |
+
"""Update Tissue Site column when dropdown changes."""
|
| 444 |
+
if settings_df is None or len(settings_df) == 0:
|
| 445 |
+
return settings_df
|
| 446 |
+
updated_df = update_settings_column(settings_df, "Tissue Site", tissue_site)
|
| 447 |
+
return gr.Dataframe(value=updated_df)
|
| 448 |
+
|
| 449 |
+
@cancer_subtype_dropdown.change(
|
| 450 |
+
inputs=[settings_input, cancer_subtype_dropdown],
|
| 451 |
+
outputs=[settings_input, ihc_subtype_dropdown],
|
| 452 |
+
)
|
| 453 |
+
def update_cancer_subtype(settings_df, cancer_subtype):
|
| 454 |
+
"""Update Cancer Subtype column when dropdown changes."""
|
| 455 |
+
has_ihc = "Breast" in cancer_subtype
|
| 456 |
+
if settings_df is None or len(settings_df) == 0:
|
| 457 |
+
return settings_df, gr.Dropdown(visible=has_ihc)
|
| 458 |
+
updated_df = update_settings_column(
|
| 459 |
+
settings_df, "Cancer Subtype", cancer_subtype
|
| 460 |
+
)
|
| 461 |
+
return gr.Dataframe(value=updated_df), gr.Dropdown(visible=has_ihc)
|
| 462 |
+
|
| 463 |
+
@ihc_subtype_dropdown.change(
|
| 464 |
+
inputs=[settings_input, ihc_subtype_dropdown],
|
| 465 |
+
outputs=[settings_input],
|
| 466 |
+
)
|
| 467 |
+
def update_ihc_subtype(settings_df, ihc_subtype):
|
| 468 |
+
"""Update IHC Subtype column when dropdown changes."""
|
| 469 |
+
if settings_df is None or len(settings_df) == 0:
|
| 470 |
+
return settings_df
|
| 471 |
+
updated_df = update_settings_column(settings_df, "IHC Subtype", ihc_subtype)
|
| 472 |
+
return gr.Dataframe(value=updated_df)
|
| 473 |
+
|
| 474 |
+
@seg_config_dropdown.change(
|
| 475 |
+
inputs=[settings_input, seg_config_dropdown],
|
| 476 |
+
outputs=[settings_input],
|
| 477 |
+
)
|
| 478 |
+
def update_seg_config(settings_df, seg_config):
|
| 479 |
+
"""Update Segmentation Config column when dropdown changes."""
|
| 480 |
+
if settings_df is None or len(settings_df) == 0:
|
| 481 |
+
return settings_df
|
| 482 |
+
updated_df = update_settings_column(
|
| 483 |
+
settings_df, "Segmentation Config", seg_config
|
| 484 |
+
)
|
| 485 |
+
return gr.Dataframe(value=updated_df)
|
| 486 |
+
|
| 487 |
@settings_csv.upload(
|
| 488 |
inputs=[settings_csv],
|
| 489 |
outputs=[settings_input],
|
|
|
|
| 499 |
inputs=[
|
| 500 |
input_slides,
|
| 501 |
settings_input,
|
| 502 |
+
site_dropdown,
|
| 503 |
+
sex_dropdown,
|
| 504 |
+
tissue_site_dropdown,
|
| 505 |
+
cancer_subtype_dropdown,
|
| 506 |
+
ihc_subtype_dropdown,
|
| 507 |
+
seg_config_dropdown,
|
| 508 |
user_dir_state,
|
| 509 |
],
|
| 510 |
outputs=[
|
|
|
|
| 519 |
show_progress_on=paladin_output_table,
|
| 520 |
)
|
| 521 |
settings_input.change(
|
| 522 |
+
lambda df: validate_settings(
|
| 523 |
+
df,
|
| 524 |
+
cancer_subtype_name_map,
|
| 525 |
+
cancer_subtypes,
|
| 526 |
+
reversed_cancer_subtype_name_map,
|
| 527 |
+
),
|
| 528 |
inputs=[settings_input],
|
| 529 |
+
outputs=[settings_input],
|
| 530 |
)
|
| 531 |
demo.load(
|
| 532 |
create_user_directory,
|
src/mosaic/ui/utils.py
CHANGED
|
@@ -61,13 +61,13 @@ def get_tissue_sites():
|
|
| 61 |
|
| 62 |
def get_oncotree_code_name(code):
|
| 63 |
"""Retrieve the human-readable name for an OncoTree code.
|
| 64 |
-
|
| 65 |
Queries the OncoTree API to get the cancer subtype name corresponding
|
| 66 |
to the given code. Results are cached to avoid repeated API calls.
|
| 67 |
-
|
| 68 |
Args:
|
| 69 |
code: OncoTree code (e.g., "LUAD", "BRCA")
|
| 70 |
-
|
| 71 |
Returns:
|
| 72 |
Human-readable cancer subtype name, or "Unknown" if not found
|
| 73 |
"""
|
|
@@ -108,16 +108,16 @@ def create_user_directory(state, request: gr.Request):
|
|
| 108 |
|
| 109 |
def load_settings(slide_csv_path):
|
| 110 |
"""Load slide analysis settings from CSV file.
|
| 111 |
-
|
| 112 |
Loads the CSV and ensures all required columns are present, adding defaults
|
| 113 |
for optional columns if they are missing.
|
| 114 |
-
|
| 115 |
Args:
|
| 116 |
slide_csv_path: Path to the CSV file containing slide settings
|
| 117 |
-
|
| 118 |
Returns:
|
| 119 |
DataFrame with columns: Slide, Site Type, Cancer Subtype, IHC Subtype, Segmentation Config
|
| 120 |
-
|
| 121 |
Raises:
|
| 122 |
ValueError: If required columns are missing from the CSV
|
| 123 |
"""
|
|
@@ -138,21 +138,26 @@ def load_settings(slide_csv_path):
|
|
| 138 |
return settings_df
|
| 139 |
|
| 140 |
|
| 141 |
-
def validate_settings(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
"""Validate and normalize slide analysis settings.
|
| 143 |
-
|
| 144 |
Checks each row for valid values and normalizes cancer subtype names.
|
| 145 |
Generates warnings for invalid entries and replaces them with defaults.
|
| 146 |
-
|
| 147 |
Args:
|
| 148 |
settings_df: DataFrame with slide settings to validate
|
| 149 |
cancer_subtype_name_map: Dict mapping subtype display names to codes
|
| 150 |
cancer_subtypes: List of valid cancer subtype codes
|
| 151 |
reversed_cancer_subtype_name_map: Dict mapping codes to display names
|
| 152 |
-
|
| 153 |
Returns:
|
| 154 |
Validated DataFrame with normalized values
|
| 155 |
-
|
| 156 |
Note:
|
| 157 |
Invalid entries are replaced with defaults and warnings are displayed
|
| 158 |
to the user via Gradio warnings.
|
|
@@ -215,13 +220,13 @@ def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, rev
|
|
| 215 |
|
| 216 |
def export_to_csv(df):
|
| 217 |
"""Export a DataFrame to CSV file for download.
|
| 218 |
-
|
| 219 |
Args:
|
| 220 |
df: DataFrame to export
|
| 221 |
-
|
| 222 |
Returns:
|
| 223 |
Path to the exported CSV file
|
| 224 |
-
|
| 225 |
Raises:
|
| 226 |
gr.Error: If the DataFrame is None or empty
|
| 227 |
"""
|
|
|
|
| 61 |
|
| 62 |
def get_oncotree_code_name(code):
|
| 63 |
"""Retrieve the human-readable name for an OncoTree code.
|
| 64 |
+
|
| 65 |
Queries the OncoTree API to get the cancer subtype name corresponding
|
| 66 |
to the given code. Results are cached to avoid repeated API calls.
|
| 67 |
+
|
| 68 |
Args:
|
| 69 |
code: OncoTree code (e.g., "LUAD", "BRCA")
|
| 70 |
+
|
| 71 |
Returns:
|
| 72 |
Human-readable cancer subtype name, or "Unknown" if not found
|
| 73 |
"""
|
|
|
|
| 108 |
|
| 109 |
def load_settings(slide_csv_path):
|
| 110 |
"""Load slide analysis settings from CSV file.
|
| 111 |
+
|
| 112 |
Loads the CSV and ensures all required columns are present, adding defaults
|
| 113 |
for optional columns if they are missing.
|
| 114 |
+
|
| 115 |
Args:
|
| 116 |
slide_csv_path: Path to the CSV file containing slide settings
|
| 117 |
+
|
| 118 |
Returns:
|
| 119 |
DataFrame with columns: Slide, Site Type, Cancer Subtype, IHC Subtype, Segmentation Config
|
| 120 |
+
|
| 121 |
Raises:
|
| 122 |
ValueError: If required columns are missing from the CSV
|
| 123 |
"""
|
|
|
|
| 138 |
return settings_df
|
| 139 |
|
| 140 |
|
| 141 |
+
def validate_settings(
|
| 142 |
+
settings_df,
|
| 143 |
+
cancer_subtype_name_map,
|
| 144 |
+
cancer_subtypes,
|
| 145 |
+
reversed_cancer_subtype_name_map,
|
| 146 |
+
):
|
| 147 |
"""Validate and normalize slide analysis settings.
|
| 148 |
+
|
| 149 |
Checks each row for valid values and normalizes cancer subtype names.
|
| 150 |
Generates warnings for invalid entries and replaces them with defaults.
|
| 151 |
+
|
| 152 |
Args:
|
| 153 |
settings_df: DataFrame with slide settings to validate
|
| 154 |
cancer_subtype_name_map: Dict mapping subtype display names to codes
|
| 155 |
cancer_subtypes: List of valid cancer subtype codes
|
| 156 |
reversed_cancer_subtype_name_map: Dict mapping codes to display names
|
| 157 |
+
|
| 158 |
Returns:
|
| 159 |
Validated DataFrame with normalized values
|
| 160 |
+
|
| 161 |
Note:
|
| 162 |
Invalid entries are replaced with defaults and warnings are displayed
|
| 163 |
to the user via Gradio warnings.
|
|
|
|
| 220 |
|
| 221 |
def export_to_csv(df):
|
| 222 |
"""Export a DataFrame to CSV file for download.
|
| 223 |
+
|
| 224 |
Args:
|
| 225 |
df: DataFrame to export
|
| 226 |
+
|
| 227 |
Returns:
|
| 228 |
Path to the exported CSV file
|
| 229 |
+
|
| 230 |
Raises:
|
| 231 |
gr.Error: If the DataFrame is None or empty
|
| 232 |
"""
|
tests/benchmark_batch_performance.py
CHANGED
|
@@ -21,7 +21,9 @@ from mosaic.batch_analysis import analyze_slides_batch
|
|
| 21 |
from mosaic.ui.utils import load_settings, validate_settings
|
| 22 |
|
| 23 |
|
| 24 |
-
def benchmark_sequential_processing(
|
|
|
|
|
|
|
| 25 |
"""Benchmark traditional sequential processing (models loaded per slide)."""
|
| 26 |
logger.info("=" * 80)
|
| 27 |
logger.info("BENCHMARKING: Sequential Processing (OLD METHOD)")
|
|
@@ -51,13 +53,15 @@ def benchmark_sequential_processing(slides, settings_df, cancer_subtype_name_map
|
|
| 51 |
slide_time = time.time() - slide_start
|
| 52 |
logger.info(f"Slide {idx + 1} completed in {slide_time:.2f}s")
|
| 53 |
|
| 54 |
-
results.append(
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
| 61 |
|
| 62 |
total_time = time.time() - start_time
|
| 63 |
peak_memory = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
|
|
@@ -79,7 +83,9 @@ def benchmark_sequential_processing(slides, settings_df, cancer_subtype_name_map
|
|
| 79 |
}
|
| 80 |
|
| 81 |
|
| 82 |
-
def benchmark_batch_processing(
|
|
|
|
|
|
|
| 83 |
"""Benchmark optimized batch processing (models loaded once)."""
|
| 84 |
logger.info("=" * 80)
|
| 85 |
logger.info("BENCHMARKING: Batch Processing (NEW METHOD)")
|
|
@@ -128,7 +134,9 @@ def compare_results(sequential_stats, batch_stats):
|
|
| 128 |
|
| 129 |
speedup = sequential_stats["total_time"] / batch_stats["total_time"]
|
| 130 |
time_saved = sequential_stats["total_time"] - batch_stats["total_time"]
|
| 131 |
-
percent_faster = (
|
|
|
|
|
|
|
| 132 |
|
| 133 |
logger.info(f"Number of slides: {sequential_stats['num_slides']}")
|
| 134 |
logger.info(f"")
|
|
@@ -141,9 +149,11 @@ def compare_results(sequential_stats, batch_stats):
|
|
| 141 |
|
| 142 |
if torch.cuda.is_available():
|
| 143 |
logger.info(f"")
|
| 144 |
-
logger.info(
|
|
|
|
|
|
|
| 145 |
logger.info(f"Batch peak memory: {batch_stats['peak_memory_gb']:.2f} GB")
|
| 146 |
-
memory_diff = batch_stats[
|
| 147 |
logger.info(f"Memory difference: {memory_diff:+.2f} GB")
|
| 148 |
|
| 149 |
logger.info("=" * 80)
|
|
@@ -161,31 +171,20 @@ def main():
|
|
| 161 |
parser = argparse.ArgumentParser(
|
| 162 |
description="Benchmark batch processing performance"
|
| 163 |
)
|
|
|
|
| 164 |
parser.add_argument(
|
| 165 |
-
"--
|
| 166 |
-
nargs="+",
|
| 167 |
-
help="List of slide paths to process"
|
| 168 |
-
)
|
| 169 |
-
parser.add_argument(
|
| 170 |
-
"--slide-csv",
|
| 171 |
-
type=str,
|
| 172 |
-
help="CSV file with slide paths and settings"
|
| 173 |
)
|
| 174 |
parser.add_argument(
|
| 175 |
-
"--num-workers",
|
| 176 |
-
type=int,
|
| 177 |
-
default=4,
|
| 178 |
-
help="Number of workers for data loading"
|
| 179 |
)
|
| 180 |
parser.add_argument(
|
| 181 |
"--skip-sequential",
|
| 182 |
action="store_true",
|
| 183 |
-
help="Skip sequential benchmark (faster, only test batch mode)"
|
| 184 |
)
|
| 185 |
parser.add_argument(
|
| 186 |
-
"--output",
|
| 187 |
-
type=str,
|
| 188 |
-
help="Save benchmark results to JSON file"
|
| 189 |
)
|
| 190 |
|
| 191 |
args = parser.parse_args()
|
|
@@ -195,27 +194,35 @@ def main():
|
|
| 195 |
|
| 196 |
# Load cancer subtype mappings
|
| 197 |
from mosaic.gradio_app import download_and_process_models
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
# Prepare slides and settings
|
| 201 |
if args.slide_csv:
|
| 202 |
settings_df = load_settings(args.slide_csv)
|
| 203 |
settings_df = validate_settings(
|
| 204 |
-
settings_df,
|
|
|
|
|
|
|
|
|
|
| 205 |
)
|
| 206 |
slides = settings_df["Slide"].tolist()
|
| 207 |
else:
|
| 208 |
slides = args.slides
|
| 209 |
# Create default settings
|
| 210 |
-
settings_df = pd.DataFrame(
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
|
|
|
|
|
|
| 219 |
|
| 220 |
logger.info(f"Benchmarking with {len(slides)} slides")
|
| 221 |
logger.info(f"GPU available: {torch.cuda.is_available()}")
|
|
@@ -239,8 +246,9 @@ def main():
|
|
| 239 |
# Save results if requested
|
| 240 |
if args.output:
|
| 241 |
import json
|
|
|
|
| 242 |
output_path = Path(args.output)
|
| 243 |
-
with open(output_path,
|
| 244 |
json.dump(comparison, f, indent=2, default=str)
|
| 245 |
logger.info(f"Benchmark results saved to {output_path}")
|
| 246 |
|
|
|
|
| 21 |
from mosaic.ui.utils import load_settings, validate_settings
|
| 22 |
|
| 23 |
|
| 24 |
+
def benchmark_sequential_processing(
|
| 25 |
+
slides, settings_df, cancer_subtype_name_map, num_workers
|
| 26 |
+
):
|
| 27 |
"""Benchmark traditional sequential processing (models loaded per slide)."""
|
| 28 |
logger.info("=" * 80)
|
| 29 |
logger.info("BENCHMARKING: Sequential Processing (OLD METHOD)")
|
|
|
|
| 53 |
slide_time = time.time() - slide_start
|
| 54 |
logger.info(f"Slide {idx + 1} completed in {slide_time:.2f}s")
|
| 55 |
|
| 56 |
+
results.append(
|
| 57 |
+
{
|
| 58 |
+
"slide": slide_path,
|
| 59 |
+
"time": slide_time,
|
| 60 |
+
"has_mask": slide_mask is not None,
|
| 61 |
+
"has_aeon": aeon_results is not None,
|
| 62 |
+
"has_paladin": paladin_results is not None,
|
| 63 |
+
}
|
| 64 |
+
)
|
| 65 |
|
| 66 |
total_time = time.time() - start_time
|
| 67 |
peak_memory = torch.cuda.max_memory_allocated() if torch.cuda.is_available() else 0
|
|
|
|
| 83 |
}
|
| 84 |
|
| 85 |
|
| 86 |
+
def benchmark_batch_processing(
|
| 87 |
+
slides, settings_df, cancer_subtype_name_map, num_workers
|
| 88 |
+
):
|
| 89 |
"""Benchmark optimized batch processing (models loaded once)."""
|
| 90 |
logger.info("=" * 80)
|
| 91 |
logger.info("BENCHMARKING: Batch Processing (NEW METHOD)")
|
|
|
|
| 134 |
|
| 135 |
speedup = sequential_stats["total_time"] / batch_stats["total_time"]
|
| 136 |
time_saved = sequential_stats["total_time"] - batch_stats["total_time"]
|
| 137 |
+
percent_faster = (
|
| 138 |
+
1 - (batch_stats["total_time"] / sequential_stats["total_time"])
|
| 139 |
+
) * 100
|
| 140 |
|
| 141 |
logger.info(f"Number of slides: {sequential_stats['num_slides']}")
|
| 142 |
logger.info(f"")
|
|
|
|
| 149 |
|
| 150 |
if torch.cuda.is_available():
|
| 151 |
logger.info(f"")
|
| 152 |
+
logger.info(
|
| 153 |
+
f"Sequential peak memory: {sequential_stats['peak_memory_gb']:.2f} GB"
|
| 154 |
+
)
|
| 155 |
logger.info(f"Batch peak memory: {batch_stats['peak_memory_gb']:.2f} GB")
|
| 156 |
+
memory_diff = batch_stats["peak_memory_gb"] - sequential_stats["peak_memory_gb"]
|
| 157 |
logger.info(f"Memory difference: {memory_diff:+.2f} GB")
|
| 158 |
|
| 159 |
logger.info("=" * 80)
|
|
|
|
| 171 |
parser = argparse.ArgumentParser(
|
| 172 |
description="Benchmark batch processing performance"
|
| 173 |
)
|
| 174 |
+
parser.add_argument("--slides", nargs="+", help="List of slide paths to process")
|
| 175 |
parser.add_argument(
|
| 176 |
+
"--slide-csv", type=str, help="CSV file with slide paths and settings"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
)
|
| 178 |
parser.add_argument(
|
| 179 |
+
"--num-workers", type=int, default=4, help="Number of workers for data loading"
|
|
|
|
|
|
|
|
|
|
| 180 |
)
|
| 181 |
parser.add_argument(
|
| 182 |
"--skip-sequential",
|
| 183 |
action="store_true",
|
| 184 |
+
help="Skip sequential benchmark (faster, only test batch mode)",
|
| 185 |
)
|
| 186 |
parser.add_argument(
|
| 187 |
+
"--output", type=str, help="Save benchmark results to JSON file"
|
|
|
|
|
|
|
| 188 |
)
|
| 189 |
|
| 190 |
args = parser.parse_args()
|
|
|
|
| 194 |
|
| 195 |
# Load cancer subtype mappings
|
| 196 |
from mosaic.gradio_app import download_and_process_models
|
| 197 |
+
|
| 198 |
+
cancer_subtype_name_map, cancer_subtypes, reversed_cancer_subtype_name_map = (
|
| 199 |
+
download_and_process_models()
|
| 200 |
+
)
|
| 201 |
|
| 202 |
# Prepare slides and settings
|
| 203 |
if args.slide_csv:
|
| 204 |
settings_df = load_settings(args.slide_csv)
|
| 205 |
settings_df = validate_settings(
|
| 206 |
+
settings_df,
|
| 207 |
+
cancer_subtype_name_map,
|
| 208 |
+
cancer_subtypes,
|
| 209 |
+
reversed_cancer_subtype_name_map,
|
| 210 |
)
|
| 211 |
slides = settings_df["Slide"].tolist()
|
| 212 |
else:
|
| 213 |
slides = args.slides
|
| 214 |
# Create default settings
|
| 215 |
+
settings_df = pd.DataFrame(
|
| 216 |
+
{
|
| 217 |
+
"Slide": slides,
|
| 218 |
+
"Site Type": ["Primary"] * len(slides),
|
| 219 |
+
"Sex": ["Unknown"] * len(slides),
|
| 220 |
+
"Tissue Site": ["Unknown"] * len(slides),
|
| 221 |
+
"Cancer Subtype": ["Unknown"] * len(slides),
|
| 222 |
+
"IHC Subtype": [""] * len(slides),
|
| 223 |
+
"Segmentation Config": ["Biopsy"] * len(slides),
|
| 224 |
+
}
|
| 225 |
+
)
|
| 226 |
|
| 227 |
logger.info(f"Benchmarking with {len(slides)} slides")
|
| 228 |
logger.info(f"GPU available: {torch.cuda.is_available()}")
|
|
|
|
| 246 |
# Save results if requested
|
| 247 |
if args.output:
|
| 248 |
import json
|
| 249 |
+
|
| 250 |
output_path = Path(args.output)
|
| 251 |
+
with open(output_path, "w") as f:
|
| 252 |
json.dump(comparison, f, indent=2, default=str)
|
| 253 |
logger.info(f"Benchmark results saved to {output_path}")
|
| 254 |
|
tests/conftest.py
CHANGED
|
@@ -3,22 +3,28 @@
|
|
| 3 |
import sys
|
| 4 |
from unittest.mock import MagicMock
|
| 5 |
|
|
|
|
| 6 |
# Create mock for gradio with Error class
|
| 7 |
class GradioMock(MagicMock):
|
| 8 |
"""Mock for gradio that supports Error and Warning classes."""
|
|
|
|
| 9 |
Error = Exception
|
| 10 |
Warning = lambda msg: None
|
| 11 |
Request = MagicMock
|
| 12 |
Progress = MagicMock
|
| 13 |
-
|
|
|
|
| 14 |
# Mock heavy dependencies before any imports
|
| 15 |
# This is necessary to allow tests to run without full environment setup
|
| 16 |
-
sys.modules[
|
| 17 |
-
sys.modules[
|
| 18 |
-
sys.modules[
|
| 19 |
-
sys.modules[
|
| 20 |
-
sys.modules[
|
| 21 |
-
sys.modules[
|
| 22 |
-
sys.modules[
|
| 23 |
-
sys.modules[
|
| 24 |
-
sys.modules[
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import sys
|
| 4 |
from unittest.mock import MagicMock
|
| 5 |
|
| 6 |
+
|
| 7 |
# Create mock for gradio with Error class
|
| 8 |
class GradioMock(MagicMock):
|
| 9 |
"""Mock for gradio that supports Error and Warning classes."""
|
| 10 |
+
|
| 11 |
Error = Exception
|
| 12 |
Warning = lambda msg: None
|
| 13 |
Request = MagicMock
|
| 14 |
Progress = MagicMock
|
| 15 |
+
|
| 16 |
+
|
| 17 |
# Mock heavy dependencies before any imports
|
| 18 |
# This is necessary to allow tests to run without full environment setup
|
| 19 |
+
sys.modules["mussel"] = MagicMock()
|
| 20 |
+
sys.modules["mussel.models"] = MagicMock()
|
| 21 |
+
sys.modules["mussel.utils"] = MagicMock()
|
| 22 |
+
sys.modules["mussel.utils.segment"] = MagicMock()
|
| 23 |
+
sys.modules["mussel.cli"] = MagicMock()
|
| 24 |
+
sys.modules["mussel.cli.tessellate"] = MagicMock()
|
| 25 |
+
sys.modules["gradio"] = GradioMock()
|
| 26 |
+
sys.modules["huggingface_hub"] = MagicMock()
|
| 27 |
+
sys.modules["loguru"] = MagicMock()
|
| 28 |
+
|
| 29 |
+
# Import fixtures from test_fixtures.py to make them available to all tests
|
| 30 |
+
pytest_plugins = ["tests.test_fixtures"]
|
tests/test_batch_analysis.py
DELETED
|
@@ -1,279 +0,0 @@
|
|
| 1 |
-
"""Integration tests for batch_analysis module.
|
| 2 |
-
|
| 3 |
-
Tests the batch processing coordinator and end-to-end batch workflow.
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import pytest
|
| 7 |
-
import pandas as pd
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
from unittest.mock import Mock, patch, MagicMock
|
| 10 |
-
import numpy as np
|
| 11 |
-
|
| 12 |
-
from mosaic.batch_analysis import analyze_slides_batch
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
class TestAnalyzeSlidesBatch:
|
| 16 |
-
"""Test analyze_slides_batch function."""
|
| 17 |
-
|
| 18 |
-
@pytest.fixture
|
| 19 |
-
def sample_settings_df(self):
|
| 20 |
-
"""Create sample settings DataFrame for testing."""
|
| 21 |
-
return pd.DataFrame({
|
| 22 |
-
"Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
|
| 23 |
-
"Site Type": ["Primary", "Primary", "Metastatic"],
|
| 24 |
-
"Sex": ["Male", "Female", "Unknown"],
|
| 25 |
-
"Tissue Site": ["Lung", "Breast", "Unknown"],
|
| 26 |
-
"Cancer Subtype": ["Unknown", "Unknown", "LUAD"],
|
| 27 |
-
"IHC Subtype": ["", "HR+/HER2-", ""],
|
| 28 |
-
"Segmentation Config": ["Biopsy", "Resection", "Biopsy"],
|
| 29 |
-
})
|
| 30 |
-
|
| 31 |
-
@pytest.fixture
|
| 32 |
-
def cancer_subtype_name_map(self):
|
| 33 |
-
"""Sample cancer subtype name mapping."""
|
| 34 |
-
return {
|
| 35 |
-
"Unknown": "Unknown",
|
| 36 |
-
"Lung Adenocarcinoma": "LUAD",
|
| 37 |
-
"Breast Invasive Ductal Carcinoma": "IDC",
|
| 38 |
-
}
|
| 39 |
-
|
| 40 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 41 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 42 |
-
def test_batch_analysis_basic(
|
| 43 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 44 |
-
):
|
| 45 |
-
"""Test basic batch analysis workflow."""
|
| 46 |
-
# Mock model cache
|
| 47 |
-
mock_cache = Mock()
|
| 48 |
-
mock_cache.cleanup = Mock()
|
| 49 |
-
mock_load_models.return_value = mock_cache
|
| 50 |
-
|
| 51 |
-
# Mock analyze_slide_with_models to return NEW DataFrames each time
|
| 52 |
-
def mock_analyze_side_effect(*args, **kwargs):
|
| 53 |
-
mock_mask = Mock()
|
| 54 |
-
# Aeon results should have Cancer Subtype as index, not a column
|
| 55 |
-
mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
|
| 56 |
-
mock_paladin = pd.DataFrame({
|
| 57 |
-
"Cancer Subtype": ["LUAD"],
|
| 58 |
-
"Biomarker": ["EGFR"],
|
| 59 |
-
"Score": [0.85]
|
| 60 |
-
})
|
| 61 |
-
return (mock_mask, mock_aeon, mock_paladin)
|
| 62 |
-
|
| 63 |
-
mock_analyze_slide.side_effect = mock_analyze_side_effect
|
| 64 |
-
|
| 65 |
-
slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
|
| 66 |
-
|
| 67 |
-
# Run batch analysis
|
| 68 |
-
masks, aeon_results, paladin_results = analyze_slides_batch(
|
| 69 |
-
slides=slides,
|
| 70 |
-
settings_df=sample_settings_df,
|
| 71 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 72 |
-
num_workers=4,
|
| 73 |
-
)
|
| 74 |
-
|
| 75 |
-
# Verify models were loaded once
|
| 76 |
-
mock_load_models.assert_called_once()
|
| 77 |
-
|
| 78 |
-
# Verify analyze_slide_with_models was called for each slide
|
| 79 |
-
assert mock_analyze_slide.call_count == 3
|
| 80 |
-
|
| 81 |
-
# Verify cleanup was called
|
| 82 |
-
mock_cache.cleanup.assert_called_once()
|
| 83 |
-
|
| 84 |
-
# Verify results structure
|
| 85 |
-
assert len(masks) == 3
|
| 86 |
-
assert len(aeon_results) == 3
|
| 87 |
-
assert len(paladin_results) == 3
|
| 88 |
-
|
| 89 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 90 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 91 |
-
def test_batch_analysis_with_failures(
|
| 92 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 93 |
-
):
|
| 94 |
-
"""Test batch analysis continues when individual slides fail."""
|
| 95 |
-
mock_cache = Mock()
|
| 96 |
-
mock_cache.cleanup = Mock()
|
| 97 |
-
mock_load_models.return_value = mock_cache
|
| 98 |
-
|
| 99 |
-
# First slide succeeds, second fails, third succeeds
|
| 100 |
-
def mock_analyze_side_effect(*args, **kwargs):
|
| 101 |
-
# Get the slide_path to determine which call this is
|
| 102 |
-
call_count = mock_analyze_slide.call_count
|
| 103 |
-
if call_count == 2: # Second call (index 1)
|
| 104 |
-
raise RuntimeError("Slide processing failed")
|
| 105 |
-
|
| 106 |
-
mock_mask = Mock()
|
| 107 |
-
# Aeon results should have Cancer Subtype as index, not a column
|
| 108 |
-
mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
|
| 109 |
-
mock_paladin = pd.DataFrame({
|
| 110 |
-
"Cancer Subtype": ["LUAD"],
|
| 111 |
-
"Biomarker": ["EGFR"],
|
| 112 |
-
"Score": [0.85]
|
| 113 |
-
})
|
| 114 |
-
return (mock_mask, mock_aeon, mock_paladin)
|
| 115 |
-
|
| 116 |
-
mock_analyze_slide.side_effect = mock_analyze_side_effect
|
| 117 |
-
|
| 118 |
-
slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
|
| 119 |
-
|
| 120 |
-
# Should not raise exception
|
| 121 |
-
masks, aeon_results, paladin_results = analyze_slides_batch(
|
| 122 |
-
slides=slides,
|
| 123 |
-
settings_df=sample_settings_df,
|
| 124 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 125 |
-
)
|
| 126 |
-
|
| 127 |
-
# Should have results for 2 out of 3 slides
|
| 128 |
-
assert len(masks) == 2
|
| 129 |
-
assert len(aeon_results) == 2
|
| 130 |
-
assert len(paladin_results) == 2
|
| 131 |
-
|
| 132 |
-
# Cleanup should still be called
|
| 133 |
-
mock_cache.cleanup.assert_called_once()
|
| 134 |
-
|
| 135 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 136 |
-
def test_batch_analysis_cleanup_on_error(
|
| 137 |
-
self, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 138 |
-
):
|
| 139 |
-
"""Test cleanup is called even when load_all_models fails."""
|
| 140 |
-
mock_load_models.side_effect = RuntimeError("Failed to load models")
|
| 141 |
-
|
| 142 |
-
slides = ["slide1.svs"]
|
| 143 |
-
|
| 144 |
-
with pytest.raises(RuntimeError, match="Failed to load models"):
|
| 145 |
-
analyze_slides_batch(
|
| 146 |
-
slides=slides,
|
| 147 |
-
settings_df=sample_settings_df,
|
| 148 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 149 |
-
)
|
| 150 |
-
|
| 151 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 152 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 153 |
-
def test_batch_analysis_empty_results(
|
| 154 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 155 |
-
):
|
| 156 |
-
"""Test batch analysis with slides that have no tissue."""
|
| 157 |
-
mock_cache = Mock()
|
| 158 |
-
mock_cache.cleanup = Mock()
|
| 159 |
-
mock_load_models.return_value = mock_cache
|
| 160 |
-
|
| 161 |
-
# All slides return None (no tissue found)
|
| 162 |
-
mock_analyze_slide.return_value = (None, None, None)
|
| 163 |
-
|
| 164 |
-
slides = ["slide1.svs", "slide2.svs"]
|
| 165 |
-
|
| 166 |
-
masks, aeon_results, paladin_results = analyze_slides_batch(
|
| 167 |
-
slides=slides,
|
| 168 |
-
settings_df=sample_settings_df[:2],
|
| 169 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 170 |
-
)
|
| 171 |
-
|
| 172 |
-
# Should have empty results
|
| 173 |
-
assert len(masks) == 0
|
| 174 |
-
assert len(aeon_results) == 0
|
| 175 |
-
assert len(paladin_results) == 0
|
| 176 |
-
|
| 177 |
-
# Cleanup should still be called
|
| 178 |
-
mock_cache.cleanup.assert_called_once()
|
| 179 |
-
|
| 180 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 181 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 182 |
-
def test_batch_analysis_aggressive_memory_management(
|
| 183 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 184 |
-
):
|
| 185 |
-
"""Test batch analysis with explicit aggressive memory management."""
|
| 186 |
-
mock_cache = Mock()
|
| 187 |
-
mock_cache.cleanup = Mock()
|
| 188 |
-
mock_cache.aggressive_memory_mgmt = True
|
| 189 |
-
mock_load_models.return_value = mock_cache
|
| 190 |
-
|
| 191 |
-
mock_analyze_slide.return_value = (Mock(), Mock(), Mock())
|
| 192 |
-
|
| 193 |
-
slides = ["slide1.svs"]
|
| 194 |
-
|
| 195 |
-
analyze_slides_batch(
|
| 196 |
-
slides=slides,
|
| 197 |
-
settings_df=sample_settings_df[:1],
|
| 198 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 199 |
-
aggressive_memory_mgmt=True,
|
| 200 |
-
)
|
| 201 |
-
|
| 202 |
-
# Verify aggressive_memory_mgmt was passed to load_all_models
|
| 203 |
-
mock_load_models.assert_called_once_with(
|
| 204 |
-
use_gpu=True,
|
| 205 |
-
aggressive_memory_mgmt=True,
|
| 206 |
-
)
|
| 207 |
-
|
| 208 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 209 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 210 |
-
def test_batch_analysis_progress_tracking(
|
| 211 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 212 |
-
):
|
| 213 |
-
"""Test batch analysis updates progress correctly."""
|
| 214 |
-
mock_cache = Mock()
|
| 215 |
-
mock_cache.cleanup = Mock()
|
| 216 |
-
mock_load_models.return_value = mock_cache
|
| 217 |
-
|
| 218 |
-
mock_analyze_slide.return_value = (Mock(), Mock(), Mock())
|
| 219 |
-
|
| 220 |
-
mock_progress = Mock()
|
| 221 |
-
slides = ["slide1.svs", "slide2.svs", "slide3.svs"]
|
| 222 |
-
|
| 223 |
-
analyze_slides_batch(
|
| 224 |
-
slides=slides,
|
| 225 |
-
settings_df=sample_settings_df,
|
| 226 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 227 |
-
progress=mock_progress,
|
| 228 |
-
)
|
| 229 |
-
|
| 230 |
-
# Verify progress was called
|
| 231 |
-
assert mock_progress.call_count > 0
|
| 232 |
-
|
| 233 |
-
# Verify final progress call
|
| 234 |
-
final_call = mock_progress.call_args_list[-1]
|
| 235 |
-
assert final_call[0][0] == 1.0 # Should be 100% at end
|
| 236 |
-
|
| 237 |
-
@patch('mosaic.batch_analysis.load_all_models')
|
| 238 |
-
@patch('mosaic.batch_analysis.analyze_slide_with_models')
|
| 239 |
-
def test_batch_analysis_multi_slide_naming(
|
| 240 |
-
self, mock_analyze_slide, mock_load_models, sample_settings_df, cancer_subtype_name_map
|
| 241 |
-
):
|
| 242 |
-
"""Test that multi-slide results include slide names."""
|
| 243 |
-
mock_cache = Mock()
|
| 244 |
-
mock_cache.cleanup = Mock()
|
| 245 |
-
mock_load_models.return_value = mock_cache
|
| 246 |
-
|
| 247 |
-
# Return new DataFrames each time
|
| 248 |
-
def mock_analyze_side_effect(*args, **kwargs):
|
| 249 |
-
mock_mask = Mock()
|
| 250 |
-
# Aeon results should have Cancer Subtype as index, not a column
|
| 251 |
-
mock_aeon = pd.DataFrame({"Confidence": [0.95]}, index=pd.Index(["LUAD"], name="Cancer Subtype"))
|
| 252 |
-
mock_paladin = pd.DataFrame({
|
| 253 |
-
"Cancer Subtype": ["LUAD"],
|
| 254 |
-
"Biomarker": ["EGFR"],
|
| 255 |
-
"Score": [0.85]
|
| 256 |
-
})
|
| 257 |
-
return (mock_mask, mock_aeon, mock_paladin)
|
| 258 |
-
|
| 259 |
-
mock_analyze_slide.side_effect = mock_analyze_side_effect
|
| 260 |
-
|
| 261 |
-
slides = ["slide1.svs", "slide2.svs"]
|
| 262 |
-
|
| 263 |
-
masks, aeon_results, paladin_results = analyze_slides_batch(
|
| 264 |
-
slides=slides,
|
| 265 |
-
settings_df=sample_settings_df[:2],
|
| 266 |
-
cancer_subtype_name_map=cancer_subtype_name_map,
|
| 267 |
-
)
|
| 268 |
-
|
| 269 |
-
# Verify slide names are in results
|
| 270 |
-
assert len(masks) == 2
|
| 271 |
-
assert masks[0][1] == "slide1.svs"
|
| 272 |
-
assert masks[1][1] == "slide2.svs"
|
| 273 |
-
|
| 274 |
-
# Paladin results should have Slide column
|
| 275 |
-
assert "Slide" in paladin_results[0].columns
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
if __name__ == "__main__":
|
| 279 |
-
pytest.main([__file__, "-v"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_cli.py
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for CLI execution modes and argument handling.
|
| 2 |
+
|
| 3 |
+
This module tests the Mosaic CLI, including:
|
| 4 |
+
- Argument parsing and routing
|
| 5 |
+
- Single-slide processing mode
|
| 6 |
+
- Batch CSV processing mode
|
| 7 |
+
- Model download behavior
|
| 8 |
+
- Output file generation
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import pytest
|
| 12 |
+
from unittest.mock import Mock, patch, MagicMock, call
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import pandas as pd
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class TestArgumentParsing:
|
| 18 |
+
"""Test CLI argument parsing and mode routing."""
|
| 19 |
+
|
| 20 |
+
@patch("mosaic.gradio_app.launch_gradio")
|
| 21 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 22 |
+
@patch("sys.argv", ["mosaic"])
|
| 23 |
+
def test_no_arguments_launches_web_interface(self, mock_download, mock_launch):
|
| 24 |
+
"""Test no arguments routes to web interface mode."""
|
| 25 |
+
mock_download.return_value = ({}, {}, [])
|
| 26 |
+
|
| 27 |
+
from mosaic.gradio_app import main
|
| 28 |
+
|
| 29 |
+
main()
|
| 30 |
+
|
| 31 |
+
# Should call launch_gradio
|
| 32 |
+
assert mock_launch.called
|
| 33 |
+
assert mock_launch.call_count == 1
|
| 34 |
+
|
| 35 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 36 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 37 |
+
@patch("sys.argv", ["mosaic", "--slide-path", "test.svs", "--output-dir", "out"])
|
| 38 |
+
def test_slide_path_routes_to_single_mode(self, mock_download, mock_analyze):
|
| 39 |
+
"""Test --slide-path routes to single-slide mode."""
|
| 40 |
+
mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
|
| 41 |
+
mock_analyze.return_value = (None, None, None)
|
| 42 |
+
|
| 43 |
+
from mosaic.gradio_app import main
|
| 44 |
+
|
| 45 |
+
with patch("mosaic.gradio_app.Path.mkdir"):
|
| 46 |
+
main()
|
| 47 |
+
|
| 48 |
+
# Should call analyze_slide
|
| 49 |
+
assert mock_analyze.called
|
| 50 |
+
|
| 51 |
+
@patch("mosaic.gradio_app.load_all_models")
|
| 52 |
+
@patch("mosaic.gradio_app.load_settings")
|
| 53 |
+
@patch("mosaic.gradio_app.validate_settings")
|
| 54 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 55 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 56 |
+
@patch("sys.argv", ["mosaic", "--slide-csv", "test.csv", "--output-dir", "out"])
|
| 57 |
+
def test_slide_csv_routes_to_batch_mode(
|
| 58 |
+
self,
|
| 59 |
+
mock_download,
|
| 60 |
+
mock_analyze,
|
| 61 |
+
mock_validate,
|
| 62 |
+
mock_load_settings,
|
| 63 |
+
mock_load_models,
|
| 64 |
+
):
|
| 65 |
+
"""Test --slide-csv routes to batch mode."""
|
| 66 |
+
mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
|
| 67 |
+
mock_load_settings.return_value = pd.DataFrame(
|
| 68 |
+
{
|
| 69 |
+
"Slide": ["test.svs"],
|
| 70 |
+
"Site Type": ["Primary"],
|
| 71 |
+
"Sex": ["Unknown"],
|
| 72 |
+
"Tissue Site": ["Unknown"],
|
| 73 |
+
"Cancer Subtype": ["Unknown"],
|
| 74 |
+
"IHC Subtype": [""],
|
| 75 |
+
"Segmentation Config": ["Biopsy"],
|
| 76 |
+
}
|
| 77 |
+
)
|
| 78 |
+
mock_validate.return_value = mock_load_settings.return_value
|
| 79 |
+
mock_analyze.return_value = (None, None, None)
|
| 80 |
+
|
| 81 |
+
mock_cache = Mock()
|
| 82 |
+
mock_cache.cleanup = Mock()
|
| 83 |
+
mock_load_models.return_value = mock_cache
|
| 84 |
+
|
| 85 |
+
from mosaic.gradio_app import main
|
| 86 |
+
|
| 87 |
+
with patch("mosaic.gradio_app.Path.mkdir"):
|
| 88 |
+
main()
|
| 89 |
+
|
| 90 |
+
# Should call load_all_models (batch mode)
|
| 91 |
+
assert mock_load_models.called
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class TestSingleSlideMode:
|
| 95 |
+
"""Test single-slide processing mode."""
|
| 96 |
+
|
| 97 |
+
@patch("mosaic.gradio_app.Path.mkdir")
|
| 98 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 99 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 100 |
+
def test_analyze_slide_called_with_correct_params(
|
| 101 |
+
self, mock_download, mock_analyze, mock_mkdir, cli_args_single
|
| 102 |
+
):
|
| 103 |
+
"""Test analyze_slide called with correct parameters in single mode."""
|
| 104 |
+
mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
|
| 105 |
+
mock_analyze.return_value = (None, None, None)
|
| 106 |
+
|
| 107 |
+
# Patch ArgumentParser to return our test args
|
| 108 |
+
with patch(
|
| 109 |
+
"mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_single
|
| 110 |
+
):
|
| 111 |
+
from mosaic.gradio_app import main
|
| 112 |
+
|
| 113 |
+
main()
|
| 114 |
+
|
| 115 |
+
# Verify analyze_slide was called
|
| 116 |
+
assert mock_analyze.called
|
| 117 |
+
call_args = mock_analyze.call_args[0] # Positional args
|
| 118 |
+
|
| 119 |
+
# Check key parameters (analyze_slide uses positional args)
|
| 120 |
+
assert call_args[0] == cli_args_single.slide_path # slide_path
|
| 121 |
+
assert call_args[1] == cli_args_single.segmentation_config # seg_config
|
| 122 |
+
assert call_args[2] == cli_args_single.site_type # site_type
|
| 123 |
+
|
| 124 |
+
@patch("PIL.Image.Image.save")
|
| 125 |
+
@patch("mosaic.gradio_app.Path.mkdir")
|
| 126 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 127 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 128 |
+
def test_output_files_saved_correctly(
|
| 129 |
+
self,
|
| 130 |
+
mock_download,
|
| 131 |
+
mock_analyze,
|
| 132 |
+
mock_mkdir,
|
| 133 |
+
mock_save,
|
| 134 |
+
cli_args_single,
|
| 135 |
+
mock_analyze_slide_results,
|
| 136 |
+
):
|
| 137 |
+
"""Test output files are saved with correct names."""
|
| 138 |
+
from PIL import Image
|
| 139 |
+
|
| 140 |
+
mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
|
| 141 |
+
|
| 142 |
+
# Mock analyze_slide to return results
|
| 143 |
+
mask, aeon_results, paladin_results = mock_analyze_slide_results
|
| 144 |
+
mock_analyze.return_value = (mask, aeon_results, paladin_results)
|
| 145 |
+
|
| 146 |
+
# Patch ArgumentParser
|
| 147 |
+
with patch(
|
| 148 |
+
"mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_single
|
| 149 |
+
):
|
| 150 |
+
# Patch DataFrame.to_csv to avoid actual file writes
|
| 151 |
+
with patch("pandas.DataFrame.to_csv"):
|
| 152 |
+
from mosaic.gradio_app import main
|
| 153 |
+
|
| 154 |
+
main()
|
| 155 |
+
|
| 156 |
+
# Verify save was called for mask
|
| 157 |
+
assert mock_save.called
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
class TestBatchCsvMode:
|
| 161 |
+
"""Test batch CSV processing mode."""
|
| 162 |
+
|
| 163 |
+
@patch("mosaic.gradio_app.Path.mkdir")
|
| 164 |
+
@patch("mosaic.gradio_app.load_all_models")
|
| 165 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 166 |
+
@patch("mosaic.gradio_app.validate_settings")
|
| 167 |
+
@patch("mosaic.gradio_app.load_settings")
|
| 168 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 169 |
+
def test_load_all_models_called_once(
|
| 170 |
+
self,
|
| 171 |
+
mock_download,
|
| 172 |
+
mock_load_settings,
|
| 173 |
+
mock_validate,
|
| 174 |
+
mock_analyze,
|
| 175 |
+
mock_load_models,
|
| 176 |
+
mock_mkdir,
|
| 177 |
+
cli_args_batch,
|
| 178 |
+
sample_settings_df,
|
| 179 |
+
mock_analyze_slide_results,
|
| 180 |
+
):
|
| 181 |
+
"""Test load_all_models called once in batch mode."""
|
| 182 |
+
from PIL import Image
|
| 183 |
+
|
| 184 |
+
mock_download.return_value = ({"Unknown": "UNK"}, {"UNK": "Unknown"}, [])
|
| 185 |
+
mock_load_settings.return_value = sample_settings_df
|
| 186 |
+
mock_validate.return_value = sample_settings_df
|
| 187 |
+
|
| 188 |
+
# Return fresh DataFrames on each call to avoid mutation
|
| 189 |
+
def mock_analyze_side_effect(*args, **kwargs):
|
| 190 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 191 |
+
aeon_results = pd.DataFrame(
|
| 192 |
+
{"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
|
| 193 |
+
)
|
| 194 |
+
paladin_results = pd.DataFrame(
|
| 195 |
+
{
|
| 196 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 197 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 198 |
+
"Score": [0.85, 0.72, 0.63],
|
| 199 |
+
}
|
| 200 |
+
)
|
| 201 |
+
return (mask, aeon_results, paladin_results)
|
| 202 |
+
|
| 203 |
+
mock_analyze.side_effect = mock_analyze_side_effect
|
| 204 |
+
|
| 205 |
+
mock_cache = Mock()
|
| 206 |
+
mock_cache.cleanup = Mock()
|
| 207 |
+
mock_load_models.return_value = mock_cache
|
| 208 |
+
|
| 209 |
+
with patch(
|
| 210 |
+
"mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_batch
|
| 211 |
+
):
|
| 212 |
+
with patch("pandas.DataFrame.to_csv"):
|
| 213 |
+
with patch("PIL.Image.Image.save"):
|
| 214 |
+
from mosaic.gradio_app import main
|
| 215 |
+
|
| 216 |
+
main()
|
| 217 |
+
|
| 218 |
+
# load_all_models should be called exactly once
|
| 219 |
+
assert mock_load_models.call_count == 1
|
| 220 |
+
|
| 221 |
+
# analyze_slide should be called for each slide (3 times)
|
| 222 |
+
assert mock_analyze.call_count == 3
|
| 223 |
+
|
| 224 |
+
# All analyze_slide calls should receive the model_cache
|
| 225 |
+
for call in mock_analyze.call_args_list:
|
| 226 |
+
assert call[1]["model_cache"] == mock_cache
|
| 227 |
+
|
| 228 |
+
# cleanup should be called
|
| 229 |
+
assert mock_cache.cleanup.called
|
| 230 |
+
|
| 231 |
+
@patch("mosaic.gradio_app.Path.mkdir")
|
| 232 |
+
@patch("mosaic.gradio_app.load_all_models")
|
| 233 |
+
@patch("mosaic.gradio_app.analyze_slide")
|
| 234 |
+
@patch("mosaic.gradio_app.validate_settings")
|
| 235 |
+
@patch("mosaic.gradio_app.load_settings")
|
| 236 |
+
@patch("mosaic.gradio_app.download_and_process_models")
|
| 237 |
+
def test_combined_outputs_generated(
|
| 238 |
+
self,
|
| 239 |
+
mock_download,
|
| 240 |
+
mock_load_settings,
|
| 241 |
+
mock_validate,
|
| 242 |
+
mock_analyze,
|
| 243 |
+
mock_load_models,
|
| 244 |
+
mock_mkdir,
|
| 245 |
+
cli_args_batch,
|
| 246 |
+
sample_settings_df,
|
| 247 |
+
mock_analyze_slide_results,
|
| 248 |
+
):
|
| 249 |
+
"""Test combined output files are generated in batch mode."""
|
| 250 |
+
from PIL import Image
|
| 251 |
+
|
| 252 |
+
mock_download.return_value = (
|
| 253 |
+
{"Unknown": "UNK", "Lung Adenocarcinoma (LUAD)": "LUAD"},
|
| 254 |
+
{"UNK": "Unknown", "LUAD": "Lung Adenocarcinoma (LUAD)"},
|
| 255 |
+
["LUAD"],
|
| 256 |
+
)
|
| 257 |
+
mock_load_settings.return_value = sample_settings_df
|
| 258 |
+
mock_validate.return_value = sample_settings_df
|
| 259 |
+
|
| 260 |
+
# Return fresh DataFrames on each call
|
| 261 |
+
def mock_analyze_side_effect(*args, **kwargs):
|
| 262 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 263 |
+
aeon_results = pd.DataFrame(
|
| 264 |
+
{"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
|
| 265 |
+
)
|
| 266 |
+
paladin_results = pd.DataFrame(
|
| 267 |
+
{
|
| 268 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 269 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 270 |
+
"Score": [0.85, 0.72, 0.63],
|
| 271 |
+
}
|
| 272 |
+
)
|
| 273 |
+
return (mask, aeon_results, paladin_results)
|
| 274 |
+
|
| 275 |
+
mock_analyze.side_effect = mock_analyze_side_effect
|
| 276 |
+
|
| 277 |
+
mock_cache = Mock()
|
| 278 |
+
mock_cache.cleanup = Mock()
|
| 279 |
+
mock_load_models.return_value = mock_cache
|
| 280 |
+
|
| 281 |
+
csv_calls = []
|
| 282 |
+
|
| 283 |
+
def track_csv_write(path, *args, **kwargs):
|
| 284 |
+
"""Track CSV file writes."""
|
| 285 |
+
csv_calls.append(str(path))
|
| 286 |
+
|
| 287 |
+
with patch(
|
| 288 |
+
"mosaic.gradio_app.ArgumentParser.parse_args", return_value=cli_args_batch
|
| 289 |
+
):
|
| 290 |
+
with patch("pandas.DataFrame.to_csv", side_effect=track_csv_write):
|
| 291 |
+
with patch("PIL.Image.Image.save"):
|
| 292 |
+
from mosaic.gradio_app import main
|
| 293 |
+
|
| 294 |
+
main()
|
| 295 |
+
|
| 296 |
+
# Should have combined files
|
| 297 |
+
combined_files = [c for c in csv_calls if "combined" in c]
|
| 298 |
+
assert len(combined_files) >= 2 # combined_aeon and combined_paladin
|
tests/test_fixtures.py
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Shared fixtures and utilities for UI and CLI tests.
|
| 2 |
+
|
| 3 |
+
This module provides reusable fixtures for testing the Mosaic Gradio UI and CLI,
|
| 4 |
+
including mock file objects, settings DataFrames, cancer subtype mappings, and
|
| 5 |
+
utility functions for test setup/teardown.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import tempfile
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from unittest.mock import Mock
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import numpy as np
|
| 13 |
+
import pytest
|
| 14 |
+
from PIL import Image
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ============================================================================
|
| 18 |
+
# File and Path Fixtures
|
| 19 |
+
# ============================================================================
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
@pytest.fixture
|
| 23 |
+
def test_slide_path():
|
| 24 |
+
"""Path to actual test slide for integration tests."""
|
| 25 |
+
return Path("tests/testdata/948176.svs")
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@pytest.fixture
|
| 29 |
+
def temp_output_dir():
|
| 30 |
+
"""Temporary directory for test outputs."""
|
| 31 |
+
with tempfile.TemporaryDirectory(prefix="mosaic_test_") as tmpdir:
|
| 32 |
+
yield Path(tmpdir)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
@pytest.fixture
|
| 36 |
+
def mock_user_dir(temp_output_dir):
|
| 37 |
+
"""Mock user directory (same as temp_output_dir for simplicity)."""
|
| 38 |
+
return temp_output_dir
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
# ============================================================================
|
| 42 |
+
# Mock File Upload Fixtures
|
| 43 |
+
# ============================================================================
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@pytest.fixture
|
| 47 |
+
def sample_files_single():
|
| 48 |
+
"""Mock single file upload."""
|
| 49 |
+
mock_file = Mock()
|
| 50 |
+
mock_file.name = "test_slide_1.svs"
|
| 51 |
+
return [mock_file]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@pytest.fixture
|
| 55 |
+
def sample_files_multiple():
|
| 56 |
+
"""Mock multiple file uploads (3 files)."""
|
| 57 |
+
files = []
|
| 58 |
+
for i in range(1, 4):
|
| 59 |
+
mock_file = Mock()
|
| 60 |
+
mock_file.name = f"test_slide_{i}.svs"
|
| 61 |
+
files.append(mock_file)
|
| 62 |
+
return files
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def create_mock_file(filename):
|
| 66 |
+
"""Create a mock file object with specified filename.
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
filename: Name for the mock file
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
Mock object with .name attribute
|
| 73 |
+
"""
|
| 74 |
+
mock_file = Mock()
|
| 75 |
+
mock_file.name = filename
|
| 76 |
+
return mock_file
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# ============================================================================
|
| 80 |
+
# Settings DataFrame Fixtures
|
| 81 |
+
# ============================================================================
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
@pytest.fixture
|
| 85 |
+
def sample_settings_df():
|
| 86 |
+
"""Sample settings DataFrame with 3 slides."""
|
| 87 |
+
return pd.DataFrame(
|
| 88 |
+
{
|
| 89 |
+
"Slide": ["slide1.svs", "slide2.svs", "slide3.svs"],
|
| 90 |
+
"Site Type": ["Primary", "Metastatic", "Primary"],
|
| 91 |
+
"Sex": ["Unknown", "Female", "Male"],
|
| 92 |
+
"Tissue Site": ["Lung", "Liver", "Unknown"],
|
| 93 |
+
"Cancer Subtype": ["Unknown", "Lung Adenocarcinoma (LUAD)", "Unknown"],
|
| 94 |
+
"IHC Subtype": ["", "", ""],
|
| 95 |
+
"Segmentation Config": ["Biopsy", "Resection", "TCGA"],
|
| 96 |
+
}
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def create_settings_df(n_rows, **kwargs):
|
| 101 |
+
"""Generate a test settings DataFrame with specified number of rows.
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
n_rows: Number of rows to generate
|
| 105 |
+
**kwargs: Column overrides (e.g., site_type="Metastatic")
|
| 106 |
+
|
| 107 |
+
Returns:
|
| 108 |
+
DataFrame with SETTINGS_COLUMNS
|
| 109 |
+
"""
|
| 110 |
+
defaults = {
|
| 111 |
+
"Slide": [f"slide_{i}.svs" for i in range(1, n_rows + 1)],
|
| 112 |
+
"Site Type": ["Primary"] * n_rows,
|
| 113 |
+
"Sex": ["Unknown"] * n_rows,
|
| 114 |
+
"Tissue Site": ["Unknown"] * n_rows,
|
| 115 |
+
"Cancer Subtype": ["Unknown"] * n_rows,
|
| 116 |
+
"IHC Subtype": [""] * n_rows,
|
| 117 |
+
"Segmentation Config": ["Biopsy"] * n_rows,
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
# Override with any provided kwargs
|
| 121 |
+
for key, value in kwargs.items():
|
| 122 |
+
column_name = key.replace("_", " ").title()
|
| 123 |
+
if column_name in defaults:
|
| 124 |
+
if isinstance(value, list):
|
| 125 |
+
defaults[column_name] = value
|
| 126 |
+
else:
|
| 127 |
+
defaults[column_name] = [value] * n_rows
|
| 128 |
+
|
| 129 |
+
return pd.DataFrame(defaults)
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
# ============================================================================
|
| 133 |
+
# CSV File Fixtures
|
| 134 |
+
# ============================================================================
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
@pytest.fixture
|
| 138 |
+
def sample_csv_valid():
|
| 139 |
+
"""Temporary CSV file with valid settings."""
|
| 140 |
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
|
| 141 |
+
f.write(
|
| 142 |
+
"Slide,Site Type,Sex,Tissue Site,Cancer Subtype,IHC Subtype,Segmentation Config\n"
|
| 143 |
+
)
|
| 144 |
+
f.write("slide1.svs,Primary,Unknown,Lung,Unknown,,Biopsy\n")
|
| 145 |
+
f.write(
|
| 146 |
+
"slide2.svs,Metastatic,Female,Liver,Lung Adenocarcinoma (LUAD),,Resection\n"
|
| 147 |
+
)
|
| 148 |
+
f.write("slide3.svs,Primary,Male,Unknown,Unknown,,TCGA\n")
|
| 149 |
+
f.flush()
|
| 150 |
+
yield f.name
|
| 151 |
+
Path(f.name).unlink(missing_ok=True)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
@pytest.fixture
|
| 155 |
+
def sample_csv_invalid():
|
| 156 |
+
"""Temporary CSV file with invalid values (for validation testing)."""
|
| 157 |
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
|
| 158 |
+
f.write(
|
| 159 |
+
"Slide,Site Type,Sex,Tissue Site,Cancer Subtype,IHC Subtype,Segmentation Config\n"
|
| 160 |
+
)
|
| 161 |
+
f.write(
|
| 162 |
+
"slide1.svs,InvalidSite,InvalidSex,InvalidTissue,InvalidSubtype,InvalidIHC,InvalidConfig\n"
|
| 163 |
+
)
|
| 164 |
+
f.write(
|
| 165 |
+
"slide2.svs,Primary,Unknown,Lung,BRCA,HR+/HER2+,Biopsy\n"
|
| 166 |
+
) # Valid breast cancer
|
| 167 |
+
f.flush()
|
| 168 |
+
yield f.name
|
| 169 |
+
Path(f.name).unlink(missing_ok=True)
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
@pytest.fixture
|
| 173 |
+
def sample_csv_minimal():
|
| 174 |
+
"""Temporary CSV file with only required columns (missing optional columns)."""
|
| 175 |
+
with tempfile.NamedTemporaryFile(mode="w", suffix=".csv", delete=False) as f:
|
| 176 |
+
f.write("Slide,Site Type,Cancer Subtype\n")
|
| 177 |
+
f.write("slide1.svs,Primary,Unknown\n")
|
| 178 |
+
f.write("slide2.svs,Metastatic,LUAD\n")
|
| 179 |
+
f.flush()
|
| 180 |
+
yield f.name
|
| 181 |
+
Path(f.name).unlink(missing_ok=True)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# ============================================================================
|
| 185 |
+
# Cancer Subtype Mapping Fixtures
|
| 186 |
+
# ============================================================================
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
@pytest.fixture
|
| 190 |
+
def mock_cancer_subtype_maps():
|
| 191 |
+
"""Mock cancer subtype mappings for testing."""
|
| 192 |
+
cancer_subtype_name_map = {
|
| 193 |
+
"Unknown": "UNK",
|
| 194 |
+
"Lung Adenocarcinoma (LUAD)": "LUAD",
|
| 195 |
+
"Breast Invasive Carcinoma (BRCA)": "BRCA",
|
| 196 |
+
"Colorectal Adenocarcinoma (COAD)": "COAD",
|
| 197 |
+
"Prostate Adenocarcinoma (PRAD)": "PRAD",
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
reversed_cancer_subtype_name_map = {
|
| 201 |
+
"UNK": "Unknown",
|
| 202 |
+
"LUAD": "Lung Adenocarcinoma (LUAD)",
|
| 203 |
+
"BRCA": "Breast Invasive Carcinoma (BRCA)",
|
| 204 |
+
"COAD": "Colorectal Adenocarcinoma (COAD)",
|
| 205 |
+
"PRAD": "Prostate Adenocarcinoma (PRAD)",
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
cancer_subtypes = ["LUAD", "BRCA", "COAD", "PRAD"]
|
| 209 |
+
|
| 210 |
+
return cancer_subtype_name_map, reversed_cancer_subtype_name_map, cancer_subtypes
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# ============================================================================
|
| 214 |
+
# Mock Analysis Results Fixtures
|
| 215 |
+
# ============================================================================
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
@pytest.fixture
|
| 219 |
+
def mock_analyze_slide_results():
|
| 220 |
+
"""Mock results from analyze_slide function."""
|
| 221 |
+
# Create a simple test mask image
|
| 222 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 223 |
+
|
| 224 |
+
# Create Aeon results DataFrame
|
| 225 |
+
aeon_results = pd.DataFrame(
|
| 226 |
+
{
|
| 227 |
+
"Cancer Subtype": ["LUAD"],
|
| 228 |
+
"Confidence": [0.95],
|
| 229 |
+
}
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
# Create Paladin results DataFrame (NOTE: No "Slide" column - that gets added by CLI/UI)
|
| 233 |
+
paladin_results = pd.DataFrame(
|
| 234 |
+
{
|
| 235 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 236 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 237 |
+
"Score": [0.85, 0.72, 0.63],
|
| 238 |
+
}
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
return (mask, aeon_results, paladin_results)
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
@pytest.fixture
|
| 245 |
+
def mock_model_cache():
|
| 246 |
+
"""Mock ModelCache with test models."""
|
| 247 |
+
from unittest.mock import Mock
|
| 248 |
+
|
| 249 |
+
cache = Mock()
|
| 250 |
+
cache.ctranspath_model = Mock()
|
| 251 |
+
cache.optimus_model = Mock()
|
| 252 |
+
cache.marker_classifier = Mock()
|
| 253 |
+
cache.aeon_model = Mock()
|
| 254 |
+
cache.paladin_models = {}
|
| 255 |
+
cache.device = Mock()
|
| 256 |
+
cache.cleanup = Mock()
|
| 257 |
+
|
| 258 |
+
return cache
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
# ============================================================================
|
| 262 |
+
# CLI Argument Fixtures
|
| 263 |
+
# ============================================================================
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
@pytest.fixture
|
| 267 |
+
def cli_args_single():
|
| 268 |
+
"""Mock argparse Namespace for single-slide mode."""
|
| 269 |
+
from argparse import Namespace
|
| 270 |
+
|
| 271 |
+
return Namespace(
|
| 272 |
+
debug=False,
|
| 273 |
+
server_name="0.0.0.0",
|
| 274 |
+
server_port=None,
|
| 275 |
+
share=False,
|
| 276 |
+
slide_path="tests/testdata/948176.svs",
|
| 277 |
+
slide_csv=None,
|
| 278 |
+
output_dir="test_output",
|
| 279 |
+
site_type="Primary",
|
| 280 |
+
sex="Unknown",
|
| 281 |
+
tissue_site="Unknown",
|
| 282 |
+
cancer_subtype="Unknown",
|
| 283 |
+
ihc_subtype="",
|
| 284 |
+
segmentation_config="Biopsy",
|
| 285 |
+
num_workers=4,
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
|
| 289 |
+
@pytest.fixture
|
| 290 |
+
def cli_args_batch(sample_csv_valid):
|
| 291 |
+
"""Mock argparse Namespace for batch mode."""
|
| 292 |
+
from argparse import Namespace
|
| 293 |
+
|
| 294 |
+
return Namespace(
|
| 295 |
+
debug=False,
|
| 296 |
+
server_name="0.0.0.0",
|
| 297 |
+
server_port=None,
|
| 298 |
+
share=False,
|
| 299 |
+
slide_path=None,
|
| 300 |
+
slide_csv=sample_csv_valid,
|
| 301 |
+
output_dir="test_output",
|
| 302 |
+
site_type="Primary",
|
| 303 |
+
sex="Unknown",
|
| 304 |
+
tissue_site="Unknown",
|
| 305 |
+
cancer_subtype="Unknown",
|
| 306 |
+
ihc_subtype="",
|
| 307 |
+
segmentation_config="Biopsy",
|
| 308 |
+
num_workers=4,
|
| 309 |
+
)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
# ============================================================================
|
| 313 |
+
# Utility Functions
|
| 314 |
+
# ============================================================================
|
| 315 |
+
|
| 316 |
+
|
| 317 |
+
def verify_csv_output(path, expected_columns):
|
| 318 |
+
"""Validate CSV file structure.
|
| 319 |
+
|
| 320 |
+
Args:
|
| 321 |
+
path: Path to CSV file
|
| 322 |
+
expected_columns: List of expected column names
|
| 323 |
+
|
| 324 |
+
Returns:
|
| 325 |
+
DataFrame loaded from CSV
|
| 326 |
+
|
| 327 |
+
Raises:
|
| 328 |
+
AssertionError: If CSV is invalid or missing columns
|
| 329 |
+
"""
|
| 330 |
+
assert Path(path).exists(), f"CSV file not found: {path}"
|
| 331 |
+
|
| 332 |
+
df = pd.read_csv(path)
|
| 333 |
+
assert not df.empty, f"CSV file is empty: {path}"
|
| 334 |
+
|
| 335 |
+
missing_cols = set(expected_columns) - set(df.columns)
|
| 336 |
+
assert not missing_cols, f"Missing columns in CSV: {missing_cols}"
|
| 337 |
+
|
| 338 |
+
return df
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def mock_gradio_components():
|
| 342 |
+
"""Context manager to mock Gradio component classes.
|
| 343 |
+
|
| 344 |
+
Usage:
|
| 345 |
+
with mock_gradio_components() as mocks:
|
| 346 |
+
# Gradio components are mocked
|
| 347 |
+
result = function_that_returns_gr_components()
|
| 348 |
+
# Verify mocks
|
| 349 |
+
assert mocks['Dataframe'].called
|
| 350 |
+
"""
|
| 351 |
+
from unittest.mock import patch, Mock
|
| 352 |
+
|
| 353 |
+
mocks = {
|
| 354 |
+
"Dataframe": Mock(return_value=Mock()),
|
| 355 |
+
"File": Mock(return_value=Mock()),
|
| 356 |
+
"DownloadButton": Mock(return_value=Mock()),
|
| 357 |
+
"Dropdown": Mock(return_value=Mock()),
|
| 358 |
+
"Gallery": Mock(return_value=Mock()),
|
| 359 |
+
"Error": Exception, # gr.Error is an exception
|
| 360 |
+
"Warning": Mock(),
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
patches = []
|
| 364 |
+
for name, mock_obj in mocks.items():
|
| 365 |
+
patch_obj = patch(f"mosaic.ui.app.gr.{name}", mock_obj)
|
| 366 |
+
patches.append(patch_obj)
|
| 367 |
+
|
| 368 |
+
# Start all patches
|
| 369 |
+
for p in patches:
|
| 370 |
+
p.start()
|
| 371 |
+
|
| 372 |
+
try:
|
| 373 |
+
yield mocks
|
| 374 |
+
finally:
|
| 375 |
+
# Stop all patches
|
| 376 |
+
for p in patches:
|
| 377 |
+
p.stop()
|
tests/test_gradio_app.py
CHANGED
|
@@ -71,14 +71,16 @@ class TestLoadSettings:
|
|
| 71 |
reversed_cancer_subtype_name_map = {
|
| 72 |
value: key for key, value in cancer_subtype_name_map.items()
|
| 73 |
}
|
| 74 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
@pytest.fixture
|
| 77 |
def temp_settings_csv(self):
|
| 78 |
"""Create a temporary settings CSV file with all columns."""
|
| 79 |
-
with tempfile.NamedTemporaryFile(
|
| 80 |
-
mode="w", delete=False, suffix=".csv"
|
| 81 |
-
) as f:
|
| 82 |
f.write("Slide,Site Type,Cancer Subtype,IHC Subtype,Segmentation Config\n")
|
| 83 |
f.write("slide1.svs,Primary,Unknown,,Biopsy\n")
|
| 84 |
f.write("slide2.svs,Metastatic,Unknown,,Resection\n")
|
|
@@ -89,9 +91,7 @@ class TestLoadSettings:
|
|
| 89 |
@pytest.fixture
|
| 90 |
def temp_minimal_settings_csv(self):
|
| 91 |
"""Create a temporary settings CSV file with minimal columns."""
|
| 92 |
-
with tempfile.NamedTemporaryFile(
|
| 93 |
-
mode="w", delete=False, suffix=".csv"
|
| 94 |
-
) as f:
|
| 95 |
f.write("Slide,Site Type\n")
|
| 96 |
f.write("slide1.svs,Primary\n")
|
| 97 |
f.write("slide2.svs,Metastatic\n")
|
|
@@ -129,9 +129,7 @@ class TestLoadSettings:
|
|
| 129 |
|
| 130 |
def test_load_settings_missing_required_column_raises_error(self):
|
| 131 |
"""Test that missing required column raises ValueError."""
|
| 132 |
-
with tempfile.NamedTemporaryFile(
|
| 133 |
-
mode="w", delete=False, suffix=".csv"
|
| 134 |
-
) as f:
|
| 135 |
f.write("RandomColumn\n")
|
| 136 |
f.write("value\n")
|
| 137 |
temp_path = f.name
|
|
|
|
| 71 |
reversed_cancer_subtype_name_map = {
|
| 72 |
value: key for key, value in cancer_subtype_name_map.items()
|
| 73 |
}
|
| 74 |
+
return (
|
| 75 |
+
cancer_subtype_name_map,
|
| 76 |
+
cancer_subtypes,
|
| 77 |
+
reversed_cancer_subtype_name_map,
|
| 78 |
+
)
|
| 79 |
|
| 80 |
@pytest.fixture
|
| 81 |
def temp_settings_csv(self):
|
| 82 |
"""Create a temporary settings CSV file with all columns."""
|
| 83 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
|
|
|
|
|
|
|
| 84 |
f.write("Slide,Site Type,Cancer Subtype,IHC Subtype,Segmentation Config\n")
|
| 85 |
f.write("slide1.svs,Primary,Unknown,,Biopsy\n")
|
| 86 |
f.write("slide2.svs,Metastatic,Unknown,,Resection\n")
|
|
|
|
| 91 |
@pytest.fixture
|
| 92 |
def temp_minimal_settings_csv(self):
|
| 93 |
"""Create a temporary settings CSV file with minimal columns."""
|
| 94 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
|
|
|
|
|
|
|
| 95 |
f.write("Slide,Site Type\n")
|
| 96 |
f.write("slide1.svs,Primary\n")
|
| 97 |
f.write("slide2.svs,Metastatic\n")
|
|
|
|
| 129 |
|
| 130 |
def test_load_settings_missing_required_column_raises_error(self):
|
| 131 |
"""Test that missing required column raises ValueError."""
|
| 132 |
+
with tempfile.NamedTemporaryFile(mode="w", delete=False, suffix=".csv") as f:
|
|
|
|
|
|
|
| 133 |
f.write("RandomColumn\n")
|
| 134 |
f.write("value\n")
|
| 135 |
temp_path = f.name
|
tests/test_model_manager.py
CHANGED
|
@@ -10,7 +10,11 @@ from unittest.mock import Mock, patch, MagicMock
|
|
| 10 |
import pickle
|
| 11 |
import gc
|
| 12 |
|
| 13 |
-
from mosaic.model_manager import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
|
| 16 |
class TestModelCache:
|
|
@@ -73,9 +77,11 @@ class TestModelCache:
|
|
| 73 |
|
| 74 |
assert cache.paladin_models == {}
|
| 75 |
|
| 76 |
-
@patch(
|
| 77 |
-
@patch(
|
| 78 |
-
def test_cleanup_paladin_clears_cuda_cache(
|
|
|
|
|
|
|
| 79 |
"""Test cleanup_paladin calls torch.cuda.empty_cache()."""
|
| 80 |
cache = ModelCache()
|
| 81 |
cache.paladin_models = {"model1": Mock()}
|
|
@@ -107,52 +113,52 @@ class TestModelCache:
|
|
| 107 |
class TestLoadAllModels:
|
| 108 |
"""Test load_all_models function."""
|
| 109 |
|
| 110 |
-
@patch(
|
| 111 |
def test_load_models_cpu_only(self, mock_cuda_available):
|
| 112 |
"""Test loading models when CUDA is not available."""
|
| 113 |
-
with patch(
|
| 114 |
-
with patch(
|
| 115 |
# Mock the pickle loads
|
| 116 |
mock_pickle.return_value = Mock()
|
| 117 |
|
| 118 |
# Mock file exists checks
|
| 119 |
-
with patch.object(Path,
|
| 120 |
cache = load_all_models(use_gpu=False)
|
| 121 |
|
| 122 |
assert cache is not None
|
| 123 |
assert cache.device == torch.device("cpu")
|
| 124 |
assert cache.aggressive_memory_mgmt is False
|
| 125 |
|
| 126 |
-
@patch(
|
| 127 |
-
@patch(
|
| 128 |
def test_load_models_a100_gpu(self, mock_get_device, mock_cuda_available):
|
| 129 |
"""Test loading models on A100 GPU (high memory)."""
|
| 130 |
-
with patch(
|
| 131 |
-
with patch(
|
| 132 |
mock_model = Mock()
|
| 133 |
mock_model.to = Mock(return_value=mock_model)
|
| 134 |
mock_model.eval = Mock()
|
| 135 |
mock_pickle.return_value = mock_model
|
| 136 |
|
| 137 |
-
with patch.object(Path,
|
| 138 |
cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
| 139 |
|
| 140 |
assert cache.device == torch.device("cuda")
|
| 141 |
assert cache.is_t4_gpu is False
|
| 142 |
assert cache.aggressive_memory_mgmt is False # A100 should use caching
|
| 143 |
|
| 144 |
-
@patch(
|
| 145 |
-
@patch(
|
| 146 |
def test_load_models_t4_gpu(self, mock_get_device, mock_cuda_available):
|
| 147 |
"""Test loading models on T4 GPU (low memory)."""
|
| 148 |
-
with patch(
|
| 149 |
-
with patch(
|
| 150 |
mock_model = Mock()
|
| 151 |
mock_model.to = Mock(return_value=mock_model)
|
| 152 |
mock_model.eval = Mock()
|
| 153 |
mock_pickle.return_value = mock_model
|
| 154 |
|
| 155 |
-
with patch.object(Path,
|
| 156 |
cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
| 157 |
|
| 158 |
assert cache.device == torch.device("cuda")
|
|
@@ -161,33 +167,36 @@ class TestLoadAllModels:
|
|
| 161 |
|
| 162 |
def test_load_models_missing_aeon_file(self):
|
| 163 |
"""Test load_all_models raises error when Aeon model file is missing."""
|
|
|
|
| 164 |
def exists_side_effect(self):
|
| 165 |
# Return True for marker_classifier and optimus, False for aeon
|
| 166 |
filename = str(self)
|
| 167 |
-
if
|
| 168 |
return False
|
| 169 |
return True
|
| 170 |
|
| 171 |
-
with patch.object(Path,
|
| 172 |
with pytest.raises(FileNotFoundError, match="Aeon model not found"):
|
| 173 |
-
with patch(
|
| 174 |
-
with patch(
|
| 175 |
load_all_models(use_gpu=False)
|
| 176 |
|
| 177 |
-
@patch(
|
| 178 |
def test_load_models_explicit_aggressive_mode(self, mock_cuda_available):
|
| 179 |
"""Test explicit aggressive memory management setting."""
|
| 180 |
-
with patch(
|
| 181 |
-
with patch(
|
| 182 |
-
with patch(
|
| 183 |
mock_model = Mock()
|
| 184 |
mock_model.to = Mock(return_value=mock_model)
|
| 185 |
mock_model.eval = Mock()
|
| 186 |
mock_pickle.return_value = mock_model
|
| 187 |
|
| 188 |
-
with patch.object(Path,
|
| 189 |
# Force aggressive mode even on A100
|
| 190 |
-
cache = load_all_models(
|
|
|
|
|
|
|
| 191 |
|
| 192 |
assert cache.aggressive_memory_mgmt is True # Should respect explicit setting
|
| 193 |
|
|
@@ -200,8 +209,8 @@ class TestLoadPaladinModelForInference:
|
|
| 200 |
cache = ModelCache(aggressive_memory_mgmt=True, device=torch.device("cpu"))
|
| 201 |
model_path = Path("data/paladin/test_model.pkl")
|
| 202 |
|
| 203 |
-
with patch(
|
| 204 |
-
with patch(
|
| 205 |
mock_model = Mock()
|
| 206 |
mock_model.to = Mock(return_value=mock_model)
|
| 207 |
mock_model.eval = Mock()
|
|
@@ -220,8 +229,8 @@ class TestLoadPaladinModelForInference:
|
|
| 220 |
cache = ModelCache(aggressive_memory_mgmt=False, device=torch.device("cpu"))
|
| 221 |
model_path = Path("data/paladin/test_model.pkl")
|
| 222 |
|
| 223 |
-
with patch(
|
| 224 |
-
with patch(
|
| 225 |
mock_model = Mock()
|
| 226 |
mock_model.to = Mock(return_value=mock_model)
|
| 227 |
mock_model.eval = Mock()
|
|
@@ -243,7 +252,7 @@ class TestLoadPaladinModelForInference:
|
|
| 243 |
cache.paladin_models[str(model_path)] = cached_model
|
| 244 |
|
| 245 |
# Load model - should return cached version without pickle.load
|
| 246 |
-
with patch(
|
| 247 |
model = load_paladin_model_for_inference(cache, model_path)
|
| 248 |
|
| 249 |
assert model == cached_model
|
|
|
|
| 10 |
import pickle
|
| 11 |
import gc
|
| 12 |
|
| 13 |
+
from mosaic.model_manager import (
|
| 14 |
+
ModelCache,
|
| 15 |
+
load_all_models,
|
| 16 |
+
load_paladin_model_for_inference,
|
| 17 |
+
)
|
| 18 |
|
| 19 |
|
| 20 |
class TestModelCache:
|
|
|
|
| 77 |
|
| 78 |
assert cache.paladin_models == {}
|
| 79 |
|
| 80 |
+
@patch("torch.cuda.is_available", return_value=True)
|
| 81 |
+
@patch("torch.cuda.empty_cache")
|
| 82 |
+
def test_cleanup_paladin_clears_cuda_cache(
|
| 83 |
+
self, mock_empty_cache, mock_cuda_available
|
| 84 |
+
):
|
| 85 |
"""Test cleanup_paladin calls torch.cuda.empty_cache()."""
|
| 86 |
cache = ModelCache()
|
| 87 |
cache.paladin_models = {"model1": Mock()}
|
|
|
|
| 113 |
class TestLoadAllModels:
|
| 114 |
"""Test load_all_models function."""
|
| 115 |
|
| 116 |
+
@patch("torch.cuda.is_available", return_value=False)
|
| 117 |
def test_load_models_cpu_only(self, mock_cuda_available):
|
| 118 |
"""Test loading models when CUDA is not available."""
|
| 119 |
+
with patch("builtins.open", create=True) as mock_open:
|
| 120 |
+
with patch("pickle.load") as mock_pickle:
|
| 121 |
# Mock the pickle loads
|
| 122 |
mock_pickle.return_value = Mock()
|
| 123 |
|
| 124 |
# Mock file exists checks
|
| 125 |
+
with patch.object(Path, "exists", return_value=True):
|
| 126 |
cache = load_all_models(use_gpu=False)
|
| 127 |
|
| 128 |
assert cache is not None
|
| 129 |
assert cache.device == torch.device("cpu")
|
| 130 |
assert cache.aggressive_memory_mgmt is False
|
| 131 |
|
| 132 |
+
@patch("torch.cuda.is_available", return_value=True)
|
| 133 |
+
@patch("torch.cuda.get_device_name", return_value="NVIDIA A100")
|
| 134 |
def test_load_models_a100_gpu(self, mock_get_device, mock_cuda_available):
|
| 135 |
"""Test loading models on A100 GPU (high memory)."""
|
| 136 |
+
with patch("builtins.open", create=True):
|
| 137 |
+
with patch("pickle.load") as mock_pickle:
|
| 138 |
mock_model = Mock()
|
| 139 |
mock_model.to = Mock(return_value=mock_model)
|
| 140 |
mock_model.eval = Mock()
|
| 141 |
mock_pickle.return_value = mock_model
|
| 142 |
|
| 143 |
+
with patch.object(Path, "exists", return_value=True):
|
| 144 |
cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
| 145 |
|
| 146 |
assert cache.device == torch.device("cuda")
|
| 147 |
assert cache.is_t4_gpu is False
|
| 148 |
assert cache.aggressive_memory_mgmt is False # A100 should use caching
|
| 149 |
|
| 150 |
+
@patch("torch.cuda.is_available", return_value=True)
|
| 151 |
+
@patch("torch.cuda.get_device_name", return_value="Tesla T4")
|
| 152 |
def test_load_models_t4_gpu(self, mock_get_device, mock_cuda_available):
|
| 153 |
"""Test loading models on T4 GPU (low memory)."""
|
| 154 |
+
with patch("builtins.open", create=True):
|
| 155 |
+
with patch("pickle.load") as mock_pickle:
|
| 156 |
mock_model = Mock()
|
| 157 |
mock_model.to = Mock(return_value=mock_model)
|
| 158 |
mock_model.eval = Mock()
|
| 159 |
mock_pickle.return_value = mock_model
|
| 160 |
|
| 161 |
+
with patch.object(Path, "exists", return_value=True):
|
| 162 |
cache = load_all_models(use_gpu=True, aggressive_memory_mgmt=None)
|
| 163 |
|
| 164 |
assert cache.device == torch.device("cuda")
|
|
|
|
| 167 |
|
| 168 |
def test_load_models_missing_aeon_file(self):
|
| 169 |
"""Test load_all_models raises error when Aeon model file is missing."""
|
| 170 |
+
|
| 171 |
def exists_side_effect(self):
|
| 172 |
# Return True for marker_classifier and optimus, False for aeon
|
| 173 |
filename = str(self)
|
| 174 |
+
if "aeon_model.pkl" in filename:
|
| 175 |
return False
|
| 176 |
return True
|
| 177 |
|
| 178 |
+
with patch.object(Path, "exists", exists_side_effect):
|
| 179 |
with pytest.raises(FileNotFoundError, match="Aeon model not found"):
|
| 180 |
+
with patch("builtins.open", create=True):
|
| 181 |
+
with patch("pickle.load"):
|
| 182 |
load_all_models(use_gpu=False)
|
| 183 |
|
| 184 |
+
@patch("torch.cuda.is_available", return_value=True)
|
| 185 |
def test_load_models_explicit_aggressive_mode(self, mock_cuda_available):
|
| 186 |
"""Test explicit aggressive memory management setting."""
|
| 187 |
+
with patch("torch.cuda.get_device_name", return_value="NVIDIA A100"):
|
| 188 |
+
with patch("builtins.open", create=True):
|
| 189 |
+
with patch("pickle.load") as mock_pickle:
|
| 190 |
mock_model = Mock()
|
| 191 |
mock_model.to = Mock(return_value=mock_model)
|
| 192 |
mock_model.eval = Mock()
|
| 193 |
mock_pickle.return_value = mock_model
|
| 194 |
|
| 195 |
+
with patch.object(Path, "exists", return_value=True):
|
| 196 |
# Force aggressive mode even on A100
|
| 197 |
+
cache = load_all_models(
|
| 198 |
+
use_gpu=True, aggressive_memory_mgmt=True
|
| 199 |
+
)
|
| 200 |
|
| 201 |
assert cache.aggressive_memory_mgmt is True # Should respect explicit setting
|
| 202 |
|
|
|
|
| 209 |
cache = ModelCache(aggressive_memory_mgmt=True, device=torch.device("cpu"))
|
| 210 |
model_path = Path("data/paladin/test_model.pkl")
|
| 211 |
|
| 212 |
+
with patch("builtins.open", create=True):
|
| 213 |
+
with patch("pickle.load") as mock_pickle:
|
| 214 |
mock_model = Mock()
|
| 215 |
mock_model.to = Mock(return_value=mock_model)
|
| 216 |
mock_model.eval = Mock()
|
|
|
|
| 229 |
cache = ModelCache(aggressive_memory_mgmt=False, device=torch.device("cpu"))
|
| 230 |
model_path = Path("data/paladin/test_model.pkl")
|
| 231 |
|
| 232 |
+
with patch("builtins.open", create=True):
|
| 233 |
+
with patch("pickle.load") as mock_pickle:
|
| 234 |
mock_model = Mock()
|
| 235 |
mock_model.to = Mock(return_value=mock_model)
|
| 236 |
mock_model.eval = Mock()
|
|
|
|
| 252 |
cache.paladin_models[str(model_path)] = cached_model
|
| 253 |
|
| 254 |
# Load model - should return cached version without pickle.load
|
| 255 |
+
with patch("pickle.load") as mock_pickle:
|
| 256 |
model = load_paladin_model_for_inference(cache, model_path)
|
| 257 |
|
| 258 |
assert model == cached_model
|
tests/test_regression_single_slide.py
CHANGED
|
@@ -30,13 +30,14 @@ class TestSingleSlideRegression:
|
|
| 30 |
"Lung Adenocarcinoma": "LUAD",
|
| 31 |
}
|
| 32 |
|
| 33 |
-
@patch(
|
| 34 |
-
@patch(
|
| 35 |
-
@patch(
|
| 36 |
-
@patch(
|
| 37 |
-
@patch(
|
| 38 |
-
@patch(
|
| 39 |
-
@patch(
|
|
|
|
| 40 |
def test_single_slide_analyze_slide_unchanged(
|
| 41 |
self,
|
| 42 |
mock_paladin,
|
|
@@ -44,6 +45,7 @@ class TestSingleSlideRegression:
|
|
| 44 |
mock_optimus,
|
| 45 |
mock_filter,
|
| 46 |
mock_ctranspath,
|
|
|
|
| 47 |
mock_mask,
|
| 48 |
mock_segment,
|
| 49 |
mock_slide_path,
|
|
@@ -60,6 +62,16 @@ class TestSingleSlideRegression:
|
|
| 60 |
mock_mask_image = Mock()
|
| 61 |
mock_mask.return_value = mock_mask_image
|
| 62 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
mock_features = np.random.rand(100, 768)
|
| 64 |
mock_ctranspath.return_value = (mock_features, mock_coords)
|
| 65 |
|
|
@@ -69,17 +81,14 @@ class TestSingleSlideRegression:
|
|
| 69 |
mock_optimus_features = np.random.rand(50, 1536)
|
| 70 |
mock_optimus.return_value = mock_optimus_features
|
| 71 |
|
| 72 |
-
mock_aeon_results = pd.DataFrame(
|
| 73 |
-
"Cancer Subtype": ["LUAD", "LUSC"],
|
| 74 |
-
|
| 75 |
-
})
|
| 76 |
mock_aeon.return_value = mock_aeon_results
|
| 77 |
|
| 78 |
-
mock_paladin_results = pd.DataFrame(
|
| 79 |
-
"Cancer Subtype": ["LUAD"],
|
| 80 |
-
|
| 81 |
-
"Score": [0.75]
|
| 82 |
-
})
|
| 83 |
mock_paladin.return_value = mock_paladin_results
|
| 84 |
|
| 85 |
# Run analyze_slide
|
|
@@ -107,10 +116,10 @@ class TestSingleSlideRegression:
|
|
| 107 |
assert isinstance(aeon_results, pd.DataFrame)
|
| 108 |
assert isinstance(paladin_results, pd.DataFrame)
|
| 109 |
|
| 110 |
-
@patch(
|
| 111 |
-
@patch(
|
| 112 |
-
@patch(
|
| 113 |
-
@patch(
|
| 114 |
def test_gradio_single_slide_uses_analyze_slide(
|
| 115 |
self,
|
| 116 |
mock_to_csv,
|
|
@@ -121,40 +130,53 @@ class TestSingleSlideRegression:
|
|
| 121 |
"""Test that Gradio UI uses analyze_slide for single slide (not batch mode)."""
|
| 122 |
# Setup
|
| 123 |
import tempfile
|
|
|
|
| 124 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 125 |
mock_dir = Path(tmpdir) / "test_user"
|
| 126 |
mock_dir.mkdir()
|
| 127 |
mock_create_dir.return_value = mock_dir
|
| 128 |
|
| 129 |
-
settings_df = pd.DataFrame(
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
| 138 |
mock_validate.return_value = settings_df
|
| 139 |
|
| 140 |
mock_mask = Mock()
|
| 141 |
mock_aeon = pd.DataFrame({"Cancer Subtype": ["LUAD"], "Confidence": [0.9]})
|
| 142 |
-
mock_paladin = pd.DataFrame(
|
| 143 |
-
"Cancer Subtype": ["LUAD"],
|
| 144 |
-
|
| 145 |
-
"Score": [0.8]
|
| 146 |
-
})
|
| 147 |
mock_analyze_slide.return_value = (mock_mask, mock_aeon, mock_paladin)
|
| 148 |
|
| 149 |
from mosaic.ui.app import cancer_subtype_name_map
|
| 150 |
|
| 151 |
-
# Call analyze_slides with a single slide
|
| 152 |
-
with patch(
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
| 154 |
slides=["test.svs"],
|
| 155 |
settings_input=settings_df,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
user_dir=mock_dir,
|
| 157 |
)
|
|
|
|
|
|
|
|
|
|
| 158 |
|
| 159 |
# Verify analyze_slide was called (not analyze_slides_batch)
|
| 160 |
mock_analyze_slide.assert_called_once()
|
|
@@ -162,10 +184,11 @@ class TestSingleSlideRegression:
|
|
| 162 |
# Verify results
|
| 163 |
assert len(masks) == 1
|
| 164 |
|
| 165 |
-
|
| 166 |
-
@patch(
|
| 167 |
-
|
| 168 |
-
|
|
|
|
| 169 |
"""Test single-slide analysis when no tissue is found."""
|
| 170 |
# No tissue tiles found
|
| 171 |
mock_segment.return_value = None # segment_tissue returns None when no tissue
|
|
@@ -187,18 +210,20 @@ class TestSingleSlideRegression:
|
|
| 187 |
# Verify warning was raised
|
| 188 |
mock_warning.assert_called_once()
|
| 189 |
|
| 190 |
-
@patch(
|
| 191 |
-
@patch(
|
| 192 |
-
@patch(
|
| 193 |
-
@patch(
|
| 194 |
-
@patch(
|
| 195 |
-
@patch(
|
|
|
|
| 196 |
def test_single_slide_known_cancer_subtype_skips_aeon(
|
| 197 |
self,
|
| 198 |
mock_paladin,
|
| 199 |
mock_optimus,
|
| 200 |
mock_filter,
|
| 201 |
mock_ctranspath,
|
|
|
|
| 202 |
mock_mask,
|
| 203 |
mock_segment,
|
| 204 |
mock_slide_path,
|
|
@@ -211,16 +236,25 @@ class TestSingleSlideRegression:
|
|
| 211 |
mock_attrs = {}
|
| 212 |
mock_segment.return_value = (mock_polygon, None, mock_coords, mock_attrs)
|
| 213 |
mock_mask.return_value = Mock()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
mock_ctranspath.return_value = (np.random.rand(10, 768), np.array([[0, 0]]))
|
| 215 |
mock_filter.return_value = (None, np.array([[0, 0]]))
|
| 216 |
mock_optimus.return_value = np.random.rand(10, 1536)
|
| 217 |
-
mock_paladin.return_value = pd.DataFrame(
|
| 218 |
-
"Cancer Subtype": ["LUAD"],
|
| 219 |
-
|
| 220 |
-
"Score": [0.8]
|
| 221 |
-
})
|
| 222 |
|
| 223 |
-
with patch(
|
| 224 |
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 225 |
slide_path=mock_slide_path,
|
| 226 |
seg_config="Biopsy",
|
|
@@ -244,6 +278,7 @@ class TestBackwardCompatibility:
|
|
| 244 |
def test_analyze_slide_signature_unchanged(self):
|
| 245 |
"""Test that analyze_slide function signature is unchanged."""
|
| 246 |
from inspect import signature
|
|
|
|
| 247 |
sig = signature(analyze_slide)
|
| 248 |
|
| 249 |
# Verify required parameters exist
|
|
@@ -261,8 +296,8 @@ class TestBackwardCompatibility:
|
|
| 261 |
|
| 262 |
def test_analyze_slide_return_type_unchanged(self):
|
| 263 |
"""Test that analyze_slide returns the same tuple structure."""
|
| 264 |
-
with patch(
|
| 265 |
-
with patch(
|
| 266 |
result = analyze_slide(
|
| 267 |
slide_path="test.svs",
|
| 268 |
seg_config="Biopsy",
|
|
|
|
| 30 |
"Lung Adenocarcinoma": "LUAD",
|
| 31 |
}
|
| 32 |
|
| 33 |
+
@patch("mosaic.analysis.segment_tissue")
|
| 34 |
+
@patch("mosaic.analysis.draw_slide_mask")
|
| 35 |
+
@patch("mosaic.model_manager.load_all_models")
|
| 36 |
+
@patch("mosaic.analysis._extract_ctranspath_features")
|
| 37 |
+
@patch("mosaic.analysis.filter_features")
|
| 38 |
+
@patch("mosaic.analysis._extract_optimus_features")
|
| 39 |
+
@patch("mosaic.analysis._run_aeon_inference_with_model")
|
| 40 |
+
@patch("mosaic.analysis._run_paladin_inference_with_models")
|
| 41 |
def test_single_slide_analyze_slide_unchanged(
|
| 42 |
self,
|
| 43 |
mock_paladin,
|
|
|
|
| 45 |
mock_optimus,
|
| 46 |
mock_filter,
|
| 47 |
mock_ctranspath,
|
| 48 |
+
mock_load_models,
|
| 49 |
mock_mask,
|
| 50 |
mock_segment,
|
| 51 |
mock_slide_path,
|
|
|
|
| 62 |
mock_mask_image = Mock()
|
| 63 |
mock_mask.return_value = mock_mask_image
|
| 64 |
|
| 65 |
+
# Mock ModelCache with required attributes
|
| 66 |
+
mock_model_cache = Mock()
|
| 67 |
+
mock_model_cache.ctranspath_model = Mock()
|
| 68 |
+
mock_model_cache.optimus_model = Mock()
|
| 69 |
+
mock_model_cache.marker_classifier = Mock()
|
| 70 |
+
mock_model_cache.aeon_model = Mock()
|
| 71 |
+
mock_model_cache.device = Mock()
|
| 72 |
+
mock_model_cache.cleanup = Mock()
|
| 73 |
+
mock_load_models.return_value = mock_model_cache
|
| 74 |
+
|
| 75 |
mock_features = np.random.rand(100, 768)
|
| 76 |
mock_ctranspath.return_value = (mock_features, mock_coords)
|
| 77 |
|
|
|
|
| 81 |
mock_optimus_features = np.random.rand(50, 1536)
|
| 82 |
mock_optimus.return_value = mock_optimus_features
|
| 83 |
|
| 84 |
+
mock_aeon_results = pd.DataFrame(
|
| 85 |
+
{"Cancer Subtype": ["LUAD", "LUSC"], "Confidence": [0.85, 0.15]}
|
| 86 |
+
)
|
|
|
|
| 87 |
mock_aeon.return_value = mock_aeon_results
|
| 88 |
|
| 89 |
+
mock_paladin_results = pd.DataFrame(
|
| 90 |
+
{"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.75]}
|
| 91 |
+
)
|
|
|
|
|
|
|
| 92 |
mock_paladin.return_value = mock_paladin_results
|
| 93 |
|
| 94 |
# Run analyze_slide
|
|
|
|
| 116 |
assert isinstance(aeon_results, pd.DataFrame)
|
| 117 |
assert isinstance(paladin_results, pd.DataFrame)
|
| 118 |
|
| 119 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 120 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 121 |
+
@patch("mosaic.ui.app.validate_settings")
|
| 122 |
+
@patch("pandas.DataFrame.to_csv") # Mock CSV writing to avoid directory issues
|
| 123 |
def test_gradio_single_slide_uses_analyze_slide(
|
| 124 |
self,
|
| 125 |
mock_to_csv,
|
|
|
|
| 130 |
"""Test that Gradio UI uses analyze_slide for single slide (not batch mode)."""
|
| 131 |
# Setup
|
| 132 |
import tempfile
|
| 133 |
+
|
| 134 |
with tempfile.TemporaryDirectory() as tmpdir:
|
| 135 |
mock_dir = Path(tmpdir) / "test_user"
|
| 136 |
mock_dir.mkdir()
|
| 137 |
mock_create_dir.return_value = mock_dir
|
| 138 |
|
| 139 |
+
settings_df = pd.DataFrame(
|
| 140 |
+
{
|
| 141 |
+
"Slide": ["test.svs"],
|
| 142 |
+
"Site Type": ["Primary"],
|
| 143 |
+
"Sex": ["Male"],
|
| 144 |
+
"Tissue Site": ["Lung"],
|
| 145 |
+
"Cancer Subtype": ["Unknown"],
|
| 146 |
+
"IHC Subtype": [""],
|
| 147 |
+
"Segmentation Config": ["Biopsy"],
|
| 148 |
+
}
|
| 149 |
+
)
|
| 150 |
mock_validate.return_value = settings_df
|
| 151 |
|
| 152 |
mock_mask = Mock()
|
| 153 |
mock_aeon = pd.DataFrame({"Cancer Subtype": ["LUAD"], "Confidence": [0.9]})
|
| 154 |
+
mock_paladin = pd.DataFrame(
|
| 155 |
+
{"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.8]}
|
| 156 |
+
)
|
|
|
|
|
|
|
| 157 |
mock_analyze_slide.return_value = (mock_mask, mock_aeon, mock_paladin)
|
| 158 |
|
| 159 |
from mosaic.ui.app import cancer_subtype_name_map
|
| 160 |
|
| 161 |
+
# Call analyze_slides with a single slide (generator function)
|
| 162 |
+
with patch(
|
| 163 |
+
"mosaic.ui.app.get_oncotree_code_name",
|
| 164 |
+
return_value="Lung Adenocarcinoma",
|
| 165 |
+
):
|
| 166 |
+
gen = analyze_slides(
|
| 167 |
slides=["test.svs"],
|
| 168 |
settings_input=settings_df,
|
| 169 |
+
site_type="Primary",
|
| 170 |
+
sex="Male",
|
| 171 |
+
tissue_site="Lung",
|
| 172 |
+
cancer_subtype="Unknown",
|
| 173 |
+
ihc_subtype="",
|
| 174 |
+
seg_config="Biopsy",
|
| 175 |
user_dir=mock_dir,
|
| 176 |
)
|
| 177 |
+
# Consume generator to get final result
|
| 178 |
+
results = list(gen)
|
| 179 |
+
masks, aeon, aeon_btn, paladin, paladin_btn, user_dir = results[-1]
|
| 180 |
|
| 181 |
# Verify analyze_slide was called (not analyze_slides_batch)
|
| 182 |
mock_analyze_slide.assert_called_once()
|
|
|
|
| 184 |
# Verify results
|
| 185 |
assert len(masks) == 1
|
| 186 |
|
| 187 |
+
@patch("mosaic.analysis.segment_tissue")
|
| 188 |
+
@patch("mosaic.analysis.gr.Warning")
|
| 189 |
+
def test_single_slide_no_tissue_found(
|
| 190 |
+
self, mock_warning, mock_segment, mock_slide_path, cancer_subtype_name_map
|
| 191 |
+
):
|
| 192 |
"""Test single-slide analysis when no tissue is found."""
|
| 193 |
# No tissue tiles found
|
| 194 |
mock_segment.return_value = None # segment_tissue returns None when no tissue
|
|
|
|
| 210 |
# Verify warning was raised
|
| 211 |
mock_warning.assert_called_once()
|
| 212 |
|
| 213 |
+
@patch("mosaic.analysis.segment_tissue")
|
| 214 |
+
@patch("mosaic.analysis.draw_slide_mask")
|
| 215 |
+
@patch("mosaic.model_manager.load_all_models")
|
| 216 |
+
@patch("mosaic.analysis._extract_ctranspath_features")
|
| 217 |
+
@patch("mosaic.analysis.filter_features")
|
| 218 |
+
@patch("mosaic.analysis._extract_optimus_features")
|
| 219 |
+
@patch("mosaic.analysis._run_paladin_inference_with_models")
|
| 220 |
def test_single_slide_known_cancer_subtype_skips_aeon(
|
| 221 |
self,
|
| 222 |
mock_paladin,
|
| 223 |
mock_optimus,
|
| 224 |
mock_filter,
|
| 225 |
mock_ctranspath,
|
| 226 |
+
mock_load_models,
|
| 227 |
mock_mask,
|
| 228 |
mock_segment,
|
| 229 |
mock_slide_path,
|
|
|
|
| 236 |
mock_attrs = {}
|
| 237 |
mock_segment.return_value = (mock_polygon, None, mock_coords, mock_attrs)
|
| 238 |
mock_mask.return_value = Mock()
|
| 239 |
+
|
| 240 |
+
# Mock ModelCache
|
| 241 |
+
mock_model_cache = Mock()
|
| 242 |
+
mock_model_cache.ctranspath_model = Mock()
|
| 243 |
+
mock_model_cache.optimus_model = Mock()
|
| 244 |
+
mock_model_cache.marker_classifier = Mock()
|
| 245 |
+
mock_model_cache.aeon_model = Mock()
|
| 246 |
+
mock_model_cache.device = Mock()
|
| 247 |
+
mock_model_cache.cleanup = Mock()
|
| 248 |
+
mock_load_models.return_value = mock_model_cache
|
| 249 |
+
|
| 250 |
mock_ctranspath.return_value = (np.random.rand(10, 768), np.array([[0, 0]]))
|
| 251 |
mock_filter.return_value = (None, np.array([[0, 0]]))
|
| 252 |
mock_optimus.return_value = np.random.rand(10, 1536)
|
| 253 |
+
mock_paladin.return_value = pd.DataFrame(
|
| 254 |
+
{"Cancer Subtype": ["LUAD"], "Biomarker": ["EGFR"], "Score": [0.8]}
|
| 255 |
+
)
|
|
|
|
|
|
|
| 256 |
|
| 257 |
+
with patch("mosaic.analysis._run_aeon_inference_with_model") as mock_aeon:
|
| 258 |
slide_mask, aeon_results, paladin_results = analyze_slide(
|
| 259 |
slide_path=mock_slide_path,
|
| 260 |
seg_config="Biopsy",
|
|
|
|
| 278 |
def test_analyze_slide_signature_unchanged(self):
|
| 279 |
"""Test that analyze_slide function signature is unchanged."""
|
| 280 |
from inspect import signature
|
| 281 |
+
|
| 282 |
sig = signature(analyze_slide)
|
| 283 |
|
| 284 |
# Verify required parameters exist
|
|
|
|
| 296 |
|
| 297 |
def test_analyze_slide_return_type_unchanged(self):
|
| 298 |
"""Test that analyze_slide returns the same tuple structure."""
|
| 299 |
+
with patch("mosaic.analysis.segment_tissue", return_value=None): # No tissue
|
| 300 |
+
with patch("mosaic.analysis.gr.Warning"): # Mock the warning
|
| 301 |
result = analyze_slide(
|
| 302 |
slide_path="test.svs",
|
| 303 |
seg_config="Biopsy",
|
tests/test_ui_components.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for Gradio UI components and their interactions.
|
| 2 |
+
|
| 3 |
+
This module tests the Mosaic Gradio UI components, including:
|
| 4 |
+
- Settings validation
|
| 5 |
+
- Analysis workflow
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import pytest
|
| 9 |
+
import pandas as pd
|
| 10 |
+
from unittest.mock import Mock, patch, MagicMock
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
|
| 13 |
+
# Import after mocking (mocks are set up in conftest.py)
|
| 14 |
+
from mosaic.ui.app import (
|
| 15 |
+
analyze_slides,
|
| 16 |
+
set_cancer_subtype_maps,
|
| 17 |
+
)
|
| 18 |
+
from mosaic.ui.utils import SETTINGS_COLUMNS
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class TestSettingsValidation:
|
| 22 |
+
"""Test settings validation logic."""
|
| 23 |
+
|
| 24 |
+
@patch("mosaic.ui.utils.gr.Warning")
|
| 25 |
+
def test_invalid_cancer_subtype_defaults_to_unknown(
|
| 26 |
+
self, mock_warning, mock_cancer_subtype_maps
|
| 27 |
+
):
|
| 28 |
+
"""Test invalid cancer subtype generates warning and defaults to Unknown."""
|
| 29 |
+
from mosaic.ui.utils import validate_settings
|
| 30 |
+
|
| 31 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 32 |
+
mock_cancer_subtype_maps
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Create DataFrame with invalid cancer subtype
|
| 36 |
+
df = pd.DataFrame(
|
| 37 |
+
{
|
| 38 |
+
"Slide": ["test.svs"],
|
| 39 |
+
"Site Type": ["Primary"],
|
| 40 |
+
"Sex": ["Unknown"],
|
| 41 |
+
"Tissue Site": ["Unknown"],
|
| 42 |
+
"Cancer Subtype": ["InvalidSubtype"],
|
| 43 |
+
"IHC Subtype": [""],
|
| 44 |
+
"Segmentation Config": ["Biopsy"],
|
| 45 |
+
}
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
result = validate_settings(
|
| 49 |
+
df, cancer_subtype_name_map, cancer_subtypes, reversed_map
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
# Should default to Unknown
|
| 53 |
+
assert result.iloc[0]["Cancer Subtype"] == "Unknown"
|
| 54 |
+
# Warning should be called
|
| 55 |
+
assert mock_warning.called
|
| 56 |
+
|
| 57 |
+
@patch("mosaic.ui.utils.gr.Warning")
|
| 58 |
+
def test_invalid_site_type_defaults_to_primary(
|
| 59 |
+
self, mock_warning, mock_cancer_subtype_maps
|
| 60 |
+
):
|
| 61 |
+
"""Test invalid site type generates warning and defaults to Primary."""
|
| 62 |
+
from mosaic.ui.utils import validate_settings
|
| 63 |
+
|
| 64 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 65 |
+
mock_cancer_subtype_maps
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
df = pd.DataFrame(
|
| 69 |
+
{
|
| 70 |
+
"Slide": ["test.svs"],
|
| 71 |
+
"Site Type": ["InvalidSite"],
|
| 72 |
+
"Sex": ["Unknown"],
|
| 73 |
+
"Tissue Site": ["Unknown"],
|
| 74 |
+
"Cancer Subtype": ["Unknown"],
|
| 75 |
+
"IHC Subtype": [""],
|
| 76 |
+
"Segmentation Config": ["Biopsy"],
|
| 77 |
+
}
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
result = validate_settings(
|
| 81 |
+
df, cancer_subtype_name_map, cancer_subtypes, reversed_map
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
assert result.iloc[0]["Site Type"] == "Primary"
|
| 85 |
+
assert mock_warning.called
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
class TestAnalysisWorkflow:
|
| 89 |
+
"""Test analysis workflow with mocked analyze_slide."""
|
| 90 |
+
|
| 91 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 92 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 93 |
+
def test_single_slide_analysis_no_model_cache(
|
| 94 |
+
self,
|
| 95 |
+
mock_create_dir,
|
| 96 |
+
mock_analyze,
|
| 97 |
+
sample_files_single,
|
| 98 |
+
mock_analyze_slide_results,
|
| 99 |
+
mock_cancer_subtype_maps,
|
| 100 |
+
temp_output_dir,
|
| 101 |
+
):
|
| 102 |
+
"""Test single slide analysis doesn't load model cache."""
|
| 103 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 104 |
+
mock_cancer_subtype_maps
|
| 105 |
+
)
|
| 106 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 107 |
+
|
| 108 |
+
# Setup mocks
|
| 109 |
+
mock_create_dir.return_value = temp_output_dir
|
| 110 |
+
mock_analyze.return_value = mock_analyze_slide_results
|
| 111 |
+
|
| 112 |
+
# Generate settings DataFrame manually
|
| 113 |
+
settings_df = pd.DataFrame(
|
| 114 |
+
{
|
| 115 |
+
"Slide": ["test_slide_1.svs"],
|
| 116 |
+
"Site Type": ["Primary"],
|
| 117 |
+
"Sex": ["Unknown"],
|
| 118 |
+
"Tissue Site": ["Unknown"],
|
| 119 |
+
"Cancer Subtype": ["Unknown"],
|
| 120 |
+
"IHC Subtype": [""],
|
| 121 |
+
"Segmentation Config": ["Biopsy"],
|
| 122 |
+
}
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
# Call analyze_slides (generator)
|
| 126 |
+
gen = analyze_slides(
|
| 127 |
+
sample_files_single,
|
| 128 |
+
settings_df,
|
| 129 |
+
"Primary",
|
| 130 |
+
"Unknown",
|
| 131 |
+
"Unknown",
|
| 132 |
+
"Unknown",
|
| 133 |
+
"",
|
| 134 |
+
"Biopsy",
|
| 135 |
+
temp_output_dir,
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# Consume generator
|
| 139 |
+
results = list(gen)
|
| 140 |
+
|
| 141 |
+
# Should yield at least once (intermediate + final)
|
| 142 |
+
assert len(results) >= 1
|
| 143 |
+
|
| 144 |
+
# analyze_slide should be called once
|
| 145 |
+
assert mock_analyze.call_count == 1
|
| 146 |
+
|
| 147 |
+
# Should be called with model_cache=None (single-slide mode)
|
| 148 |
+
call_kwargs = mock_analyze.call_args[1]
|
| 149 |
+
assert call_kwargs["model_cache"] is None
|
| 150 |
+
|
| 151 |
+
@patch("mosaic.ui.app.load_all_models")
|
| 152 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 153 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 154 |
+
def test_batch_analysis_loads_model_cache_once(
|
| 155 |
+
self,
|
| 156 |
+
mock_create_dir,
|
| 157 |
+
mock_analyze,
|
| 158 |
+
mock_load_models,
|
| 159 |
+
sample_files_multiple,
|
| 160 |
+
mock_analyze_slide_results,
|
| 161 |
+
mock_model_cache,
|
| 162 |
+
mock_cancer_subtype_maps,
|
| 163 |
+
temp_output_dir,
|
| 164 |
+
):
|
| 165 |
+
"""Test batch analysis loads models once and reuses cache."""
|
| 166 |
+
from PIL import Image
|
| 167 |
+
|
| 168 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 169 |
+
mock_cancer_subtype_maps
|
| 170 |
+
)
|
| 171 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 172 |
+
|
| 173 |
+
# Setup mocks - return new DataFrames on each call to avoid mutation issues
|
| 174 |
+
def mock_analyze_side_effect(*args, **kwargs):
|
| 175 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 176 |
+
aeon_results = pd.DataFrame(
|
| 177 |
+
{"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
|
| 178 |
+
)
|
| 179 |
+
paladin_results = pd.DataFrame(
|
| 180 |
+
{
|
| 181 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 182 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 183 |
+
"Score": [0.85, 0.72, 0.63],
|
| 184 |
+
}
|
| 185 |
+
)
|
| 186 |
+
return (mask, aeon_results, paladin_results)
|
| 187 |
+
|
| 188 |
+
mock_create_dir.return_value = temp_output_dir
|
| 189 |
+
mock_load_models.return_value = mock_model_cache
|
| 190 |
+
mock_analyze.side_effect = mock_analyze_side_effect
|
| 191 |
+
|
| 192 |
+
# Generate settings DataFrame manually for 3 files
|
| 193 |
+
settings_df = pd.DataFrame(
|
| 194 |
+
{
|
| 195 |
+
"Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
|
| 196 |
+
"Site Type": ["Primary", "Primary", "Primary"],
|
| 197 |
+
"Sex": ["Unknown", "Unknown", "Unknown"],
|
| 198 |
+
"Tissue Site": ["Unknown", "Unknown", "Unknown"],
|
| 199 |
+
"Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
|
| 200 |
+
"IHC Subtype": ["", "", ""],
|
| 201 |
+
"Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
|
| 202 |
+
}
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Call analyze_slides
|
| 206 |
+
gen = analyze_slides(
|
| 207 |
+
sample_files_multiple,
|
| 208 |
+
settings_df,
|
| 209 |
+
"Primary",
|
| 210 |
+
"Unknown",
|
| 211 |
+
"Unknown",
|
| 212 |
+
"Unknown",
|
| 213 |
+
"",
|
| 214 |
+
"Biopsy",
|
| 215 |
+
temp_output_dir,
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
# Consume generator
|
| 219 |
+
results = list(gen)
|
| 220 |
+
|
| 221 |
+
# load_all_models should be called once
|
| 222 |
+
assert mock_load_models.call_count == 1
|
| 223 |
+
|
| 224 |
+
# analyze_slide should be called 3 times (once per file)
|
| 225 |
+
assert mock_analyze.call_count == 3
|
| 226 |
+
|
| 227 |
+
# All calls should use the same model_cache
|
| 228 |
+
for call in mock_analyze.call_args_list:
|
| 229 |
+
assert call[1]["model_cache"] == mock_model_cache
|
| 230 |
+
|
| 231 |
+
# cleanup should be called
|
| 232 |
+
assert mock_model_cache.cleanup.called
|
| 233 |
+
|
| 234 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 235 |
+
def test_no_slides_raises_error(
|
| 236 |
+
self, mock_create_dir, mock_cancer_subtype_maps, temp_output_dir
|
| 237 |
+
):
|
| 238 |
+
"""Test that no slides uploaded raises gr.Error."""
|
| 239 |
+
import gradio as gr
|
| 240 |
+
|
| 241 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 242 |
+
mock_cancer_subtype_maps
|
| 243 |
+
)
|
| 244 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 245 |
+
|
| 246 |
+
mock_create_dir.return_value = temp_output_dir
|
| 247 |
+
|
| 248 |
+
# Call with no slides
|
| 249 |
+
gen = analyze_slides(
|
| 250 |
+
None,
|
| 251 |
+
None,
|
| 252 |
+
"Primary",
|
| 253 |
+
"Unknown",
|
| 254 |
+
"Unknown",
|
| 255 |
+
"Unknown",
|
| 256 |
+
"",
|
| 257 |
+
"Biopsy",
|
| 258 |
+
temp_output_dir,
|
| 259 |
+
)
|
| 260 |
+
|
| 261 |
+
# Should raise gr.Error
|
| 262 |
+
with pytest.raises(gr.Error):
|
| 263 |
+
next(gen)
|
| 264 |
+
|
| 265 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 266 |
+
def test_settings_mismatch_raises_error(
|
| 267 |
+
self,
|
| 268 |
+
mock_create_dir,
|
| 269 |
+
sample_files_multiple,
|
| 270 |
+
sample_settings_df,
|
| 271 |
+
mock_cancer_subtype_maps,
|
| 272 |
+
temp_output_dir,
|
| 273 |
+
):
|
| 274 |
+
"""Test that settings count mismatch raises gr.Error."""
|
| 275 |
+
import gradio as gr
|
| 276 |
+
|
| 277 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 278 |
+
mock_cancer_subtype_maps
|
| 279 |
+
)
|
| 280 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 281 |
+
|
| 282 |
+
mock_create_dir.return_value = temp_output_dir
|
| 283 |
+
|
| 284 |
+
# sample_files_multiple has 3 files, sample_settings_df has 3 rows
|
| 285 |
+
# Manually create mismatch by using only 2 files
|
| 286 |
+
two_files = sample_files_multiple[:2]
|
| 287 |
+
|
| 288 |
+
gen = analyze_slides(
|
| 289 |
+
two_files,
|
| 290 |
+
sample_settings_df,
|
| 291 |
+
"Primary",
|
| 292 |
+
"Unknown",
|
| 293 |
+
"Unknown",
|
| 294 |
+
"Unknown",
|
| 295 |
+
"",
|
| 296 |
+
"Biopsy",
|
| 297 |
+
temp_output_dir,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
# Should raise gr.Error about mismatch
|
| 301 |
+
with pytest.raises(gr.Error):
|
| 302 |
+
next(gen)
|
tests/test_ui_events.py
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for UI event handlers and state management.
|
| 2 |
+
|
| 3 |
+
This module tests complex event interactions in the Mosaic Gradio UI, including:
|
| 4 |
+
- Settings state management across events
|
| 5 |
+
- Generator behavior and incremental updates
|
| 6 |
+
- Error and warning display
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import pytest
|
| 10 |
+
import pandas as pd
|
| 11 |
+
from unittest.mock import Mock, patch, MagicMock
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
import inspect
|
| 14 |
+
|
| 15 |
+
from mosaic.ui.app import (
|
| 16 |
+
analyze_slides,
|
| 17 |
+
set_cancer_subtype_maps,
|
| 18 |
+
)
|
| 19 |
+
from mosaic.ui.utils import SETTINGS_COLUMNS, validate_settings, load_settings
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class TestSettingsStateManagement:
|
| 23 |
+
"""Test settings state management across multiple events."""
|
| 24 |
+
|
| 25 |
+
def test_csv_upload_replaces_settings(
|
| 26 |
+
self, sample_csv_valid, mock_cancer_subtype_maps
|
| 27 |
+
):
|
| 28 |
+
"""Test CSV upload replaces existing settings."""
|
| 29 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 30 |
+
mock_cancer_subtype_maps
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Load CSV
|
| 34 |
+
loaded_df = load_settings(sample_csv_valid)
|
| 35 |
+
validated_df = validate_settings(
|
| 36 |
+
loaded_df, cancer_subtype_name_map, cancer_subtypes, reversed_map
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Verify new settings loaded
|
| 40 |
+
assert len(validated_df) == 3
|
| 41 |
+
assert validated_df.iloc[0]["Slide"] == "slide1.svs"
|
| 42 |
+
assert validated_df.iloc[1]["Slide"] == "slide2.svs"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class TestGeneratorBehavior:
|
| 46 |
+
"""Test generator behavior for incremental updates."""
|
| 47 |
+
|
| 48 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 49 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 50 |
+
def test_analyze_slides_is_generator(
|
| 51 |
+
self,
|
| 52 |
+
mock_create_dir,
|
| 53 |
+
mock_analyze,
|
| 54 |
+
sample_files_single,
|
| 55 |
+
mock_analyze_slide_results,
|
| 56 |
+
mock_cancer_subtype_maps,
|
| 57 |
+
temp_output_dir,
|
| 58 |
+
):
|
| 59 |
+
"""Test analyze_slides returns a generator."""
|
| 60 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 61 |
+
mock_cancer_subtype_maps
|
| 62 |
+
)
|
| 63 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 64 |
+
|
| 65 |
+
mock_create_dir.return_value = temp_output_dir
|
| 66 |
+
mock_analyze.return_value = mock_analyze_slide_results
|
| 67 |
+
|
| 68 |
+
settings_df = pd.DataFrame(
|
| 69 |
+
{
|
| 70 |
+
"Slide": ["test_slide_1.svs"],
|
| 71 |
+
"Site Type": ["Primary"],
|
| 72 |
+
"Sex": ["Unknown"],
|
| 73 |
+
"Tissue Site": ["Unknown"],
|
| 74 |
+
"Cancer Subtype": ["Unknown"],
|
| 75 |
+
"IHC Subtype": [""],
|
| 76 |
+
"Segmentation Config": ["Biopsy"],
|
| 77 |
+
}
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
result = analyze_slides(
|
| 81 |
+
sample_files_single,
|
| 82 |
+
settings_df,
|
| 83 |
+
"Primary",
|
| 84 |
+
"Unknown",
|
| 85 |
+
"Unknown",
|
| 86 |
+
"Unknown",
|
| 87 |
+
"",
|
| 88 |
+
"Biopsy",
|
| 89 |
+
temp_output_dir,
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
# Verify it's a generator
|
| 93 |
+
assert inspect.isgenerator(result)
|
| 94 |
+
|
| 95 |
+
@patch("mosaic.ui.app.load_all_models")
|
| 96 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 97 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 98 |
+
def test_intermediate_yields_update_masks_only(
|
| 99 |
+
self,
|
| 100 |
+
mock_create_dir,
|
| 101 |
+
mock_analyze,
|
| 102 |
+
mock_load_models,
|
| 103 |
+
sample_files_multiple,
|
| 104 |
+
mock_analyze_slide_results,
|
| 105 |
+
mock_model_cache,
|
| 106 |
+
mock_cancer_subtype_maps,
|
| 107 |
+
temp_output_dir,
|
| 108 |
+
):
|
| 109 |
+
"""Test intermediate yields show only slide masks."""
|
| 110 |
+
from PIL import Image
|
| 111 |
+
|
| 112 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 113 |
+
mock_cancer_subtype_maps
|
| 114 |
+
)
|
| 115 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 116 |
+
|
| 117 |
+
mock_create_dir.return_value = temp_output_dir
|
| 118 |
+
mock_load_models.return_value = mock_model_cache
|
| 119 |
+
|
| 120 |
+
# Return fresh DataFrames on each call
|
| 121 |
+
def mock_analyze_side_effect(*args, **kwargs):
|
| 122 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 123 |
+
aeon_results = pd.DataFrame(
|
| 124 |
+
{"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
|
| 125 |
+
)
|
| 126 |
+
paladin_results = pd.DataFrame(
|
| 127 |
+
{
|
| 128 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 129 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 130 |
+
"Score": [0.85, 0.72, 0.63],
|
| 131 |
+
}
|
| 132 |
+
)
|
| 133 |
+
return (mask, aeon_results, paladin_results)
|
| 134 |
+
|
| 135 |
+
mock_analyze.side_effect = mock_analyze_side_effect
|
| 136 |
+
|
| 137 |
+
settings_df = pd.DataFrame(
|
| 138 |
+
{
|
| 139 |
+
"Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
|
| 140 |
+
"Site Type": ["Primary", "Primary", "Primary"],
|
| 141 |
+
"Sex": ["Unknown", "Unknown", "Unknown"],
|
| 142 |
+
"Tissue Site": ["Unknown", "Unknown", "Unknown"],
|
| 143 |
+
"Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
|
| 144 |
+
"IHC Subtype": ["", "", ""],
|
| 145 |
+
"Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
|
| 146 |
+
}
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
gen = analyze_slides(
|
| 150 |
+
sample_files_multiple,
|
| 151 |
+
settings_df,
|
| 152 |
+
"Primary",
|
| 153 |
+
"Unknown",
|
| 154 |
+
"Unknown",
|
| 155 |
+
"Unknown",
|
| 156 |
+
"",
|
| 157 |
+
"Biopsy",
|
| 158 |
+
temp_output_dir,
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
# Get first intermediate yield (after first slide)
|
| 162 |
+
first_yield = next(gen)
|
| 163 |
+
|
| 164 |
+
# Should be tuple with 6 elements
|
| 165 |
+
assert len(first_yield) == 6
|
| 166 |
+
|
| 167 |
+
# First element is slide_masks (should have 1 entry)
|
| 168 |
+
slide_masks = first_yield[0]
|
| 169 |
+
assert len(slide_masks) == 1
|
| 170 |
+
|
| 171 |
+
@patch("mosaic.ui.app.load_all_models")
|
| 172 |
+
@patch("mosaic.ui.app.analyze_slide")
|
| 173 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 174 |
+
def test_final_yield_has_complete_results(
|
| 175 |
+
self,
|
| 176 |
+
mock_create_dir,
|
| 177 |
+
mock_analyze,
|
| 178 |
+
mock_load_models,
|
| 179 |
+
sample_files_multiple,
|
| 180 |
+
mock_analyze_slide_results,
|
| 181 |
+
mock_model_cache,
|
| 182 |
+
mock_cancer_subtype_maps,
|
| 183 |
+
temp_output_dir,
|
| 184 |
+
):
|
| 185 |
+
"""Test final yield contains complete results."""
|
| 186 |
+
from PIL import Image
|
| 187 |
+
|
| 188 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 189 |
+
mock_cancer_subtype_maps
|
| 190 |
+
)
|
| 191 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 192 |
+
|
| 193 |
+
mock_create_dir.return_value = temp_output_dir
|
| 194 |
+
mock_load_models.return_value = mock_model_cache
|
| 195 |
+
|
| 196 |
+
# Return fresh DataFrames on each call
|
| 197 |
+
def mock_analyze_side_effect(*args, **kwargs):
|
| 198 |
+
mask = Image.new("RGB", (100, 100), color="red")
|
| 199 |
+
aeon_results = pd.DataFrame(
|
| 200 |
+
{"Cancer Subtype": ["LUAD"], "Confidence": [0.95]}
|
| 201 |
+
)
|
| 202 |
+
paladin_results = pd.DataFrame(
|
| 203 |
+
{
|
| 204 |
+
"Cancer Subtype": ["LUAD", "LUAD", "LUAD"],
|
| 205 |
+
"Biomarker": ["TP53", "KRAS", "EGFR"],
|
| 206 |
+
"Score": [0.85, 0.72, 0.63],
|
| 207 |
+
}
|
| 208 |
+
)
|
| 209 |
+
return (mask, aeon_results, paladin_results)
|
| 210 |
+
|
| 211 |
+
mock_analyze.side_effect = mock_analyze_side_effect
|
| 212 |
+
|
| 213 |
+
settings_df = pd.DataFrame(
|
| 214 |
+
{
|
| 215 |
+
"Slide": ["test_slide_1.svs", "test_slide_2.svs", "test_slide_3.svs"],
|
| 216 |
+
"Site Type": ["Primary", "Primary", "Primary"],
|
| 217 |
+
"Sex": ["Unknown", "Unknown", "Unknown"],
|
| 218 |
+
"Tissue Site": ["Unknown", "Unknown", "Unknown"],
|
| 219 |
+
"Cancer Subtype": ["Unknown", "Unknown", "Unknown"],
|
| 220 |
+
"IHC Subtype": ["", "", ""],
|
| 221 |
+
"Segmentation Config": ["Biopsy", "Biopsy", "Biopsy"],
|
| 222 |
+
}
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
gen = analyze_slides(
|
| 226 |
+
sample_files_multiple,
|
| 227 |
+
settings_df,
|
| 228 |
+
"Primary",
|
| 229 |
+
"Unknown",
|
| 230 |
+
"Unknown",
|
| 231 |
+
"Unknown",
|
| 232 |
+
"",
|
| 233 |
+
"Biopsy",
|
| 234 |
+
temp_output_dir,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
# Consume generator to get final yield
|
| 238 |
+
results = list(gen)
|
| 239 |
+
final_yield = results[-1]
|
| 240 |
+
|
| 241 |
+
# Final yield should have all results
|
| 242 |
+
assert len(final_yield) == 6
|
| 243 |
+
slide_masks = final_yield[0]
|
| 244 |
+
assert len(slide_masks) == 3 # All 3 slides
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
class TestErrorDisplay:
|
| 248 |
+
"""Test error and warning display behavior."""
|
| 249 |
+
|
| 250 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 251 |
+
def test_no_slides_raises_gr_error(
|
| 252 |
+
self, mock_create_dir, mock_cancer_subtype_maps, temp_output_dir
|
| 253 |
+
):
|
| 254 |
+
"""Test that no slides raises gr.Error."""
|
| 255 |
+
import gradio as gr
|
| 256 |
+
|
| 257 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 258 |
+
mock_cancer_subtype_maps
|
| 259 |
+
)
|
| 260 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 261 |
+
|
| 262 |
+
mock_create_dir.return_value = temp_output_dir
|
| 263 |
+
|
| 264 |
+
gen = analyze_slides(
|
| 265 |
+
None,
|
| 266 |
+
None,
|
| 267 |
+
"Primary",
|
| 268 |
+
"Unknown",
|
| 269 |
+
"Unknown",
|
| 270 |
+
"Unknown",
|
| 271 |
+
"",
|
| 272 |
+
"Biopsy",
|
| 273 |
+
temp_output_dir,
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
# Should raise gr.Error
|
| 277 |
+
with pytest.raises(gr.Error):
|
| 278 |
+
next(gen)
|
| 279 |
+
|
| 280 |
+
@patch("mosaic.ui.utils.gr.Warning")
|
| 281 |
+
def test_validation_warnings_shown(self, mock_warning, mock_cancer_subtype_maps):
|
| 282 |
+
"""Test validation warnings are displayed."""
|
| 283 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 284 |
+
mock_cancer_subtype_maps
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
# Create DataFrame with multiple invalid values
|
| 288 |
+
df = pd.DataFrame(
|
| 289 |
+
{
|
| 290 |
+
"Slide": ["test1.svs", "test2.svs"],
|
| 291 |
+
"Site Type": ["InvalidSite", "Primary"],
|
| 292 |
+
"Sex": ["Unknown", "InvalidSex"],
|
| 293 |
+
"Tissue Site": ["Unknown", "Unknown"],
|
| 294 |
+
"Cancer Subtype": ["InvalidSubtype", "Unknown"],
|
| 295 |
+
"IHC Subtype": ["", ""],
|
| 296 |
+
"Segmentation Config": ["Biopsy", "InvalidConfig"],
|
| 297 |
+
}
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
result = validate_settings(
|
| 301 |
+
df, cancer_subtype_name_map, cancer_subtypes, reversed_map
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Should have warning calls (at least 1 for the multiple invalid values)
|
| 305 |
+
assert mock_warning.call_count >= 1
|
| 306 |
+
|
| 307 |
+
# Verify defaults applied
|
| 308 |
+
assert result.iloc[0]["Site Type"] == "Primary" # Invalid → Primary
|
| 309 |
+
assert result.iloc[0]["Cancer Subtype"] == "Unknown" # Invalid → Unknown
|
| 310 |
+
assert result.iloc[1]["Sex"] == "Unknown" # Invalid → Unknown
|
| 311 |
+
assert result.iloc[1]["Segmentation Config"] == "Biopsy" # Invalid → Biopsy
|
| 312 |
+
|
| 313 |
+
@patch("mosaic.ui.app.create_user_directory")
|
| 314 |
+
def test_settings_mismatch_raises_gr_error(
|
| 315 |
+
self,
|
| 316 |
+
mock_create_dir,
|
| 317 |
+
sample_files_multiple,
|
| 318 |
+
sample_settings_df,
|
| 319 |
+
mock_cancer_subtype_maps,
|
| 320 |
+
temp_output_dir,
|
| 321 |
+
):
|
| 322 |
+
"""Test settings/files count mismatch raises gr.Error."""
|
| 323 |
+
import gradio as gr
|
| 324 |
+
|
| 325 |
+
cancer_subtype_name_map, reversed_map, cancer_subtypes = (
|
| 326 |
+
mock_cancer_subtype_maps
|
| 327 |
+
)
|
| 328 |
+
set_cancer_subtype_maps(cancer_subtype_name_map, reversed_map, cancer_subtypes)
|
| 329 |
+
|
| 330 |
+
mock_create_dir.return_value = temp_output_dir
|
| 331 |
+
|
| 332 |
+
# Create mismatch: 2 files but 3 settings rows
|
| 333 |
+
two_files = sample_files_multiple[:2]
|
| 334 |
+
|
| 335 |
+
gen = analyze_slides(
|
| 336 |
+
two_files,
|
| 337 |
+
sample_settings_df,
|
| 338 |
+
"Primary",
|
| 339 |
+
"Unknown",
|
| 340 |
+
"Unknown",
|
| 341 |
+
"Unknown",
|
| 342 |
+
"",
|
| 343 |
+
"Biopsy",
|
| 344 |
+
temp_output_dir,
|
| 345 |
+
)
|
| 346 |
+
|
| 347 |
+
# Should raise gr.Error about mismatch
|
| 348 |
+
with pytest.raises(gr.Error):
|
| 349 |
+
next(gen)
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|