Spaces:

raylim
/

mosaic-zero

Sleeping

App Files Files Community

raylim commited on Jan 7

Commit

c5e7bb2

unverified ·

2 Parent(s): 24b5de2 a3010ef

Merge pull request #7 from pathology-data-mining/dev

Browse files

Add comprehensive sex and tissue site parameter support

Files changed (23) hide show

.gitignore +2 -0
pyproject.toml +3 -0
scripts/README.md +247 -0
scripts/export_aeon_checkpoint.py +142 -0
scripts/run_aeon_tests.sh +175 -0
scripts/verify_aeon_results.py +224 -0
src/mosaic/analysis.py +33 -6
src/mosaic/gradio_app.py +25 -3
src/mosaic/inference/aeon.py +59 -25
src/mosaic/inference/data.py +145 -1
src/mosaic/ui/app.py +24 -12
src/mosaic/ui/utils.py +44 -0
test_slides/AEON_TEST_SUMMARY.md +178 -0
test_slides/results/744547/744547_aeon_results.csv +161 -0
test_slides/results/744547/744547_paladin_results.csv +2 -0
test_slides/results/755246/755246_aeon_results.csv +161 -0
test_slides/results/755246/755246_paladin_results.csv +2 -0
test_slides/results/881837/881837_aeon_results.csv +161 -0
test_slides/results/881837/881837_paladin_results.csv +6 -0
test_slides/test_samples.json +29 -0
test_slides/verification_report.json +38 -0
tests/inference/test_aeon.py +23 -23
uv.lock +114 -2

.gitignore CHANGED Viewed

@@ -17,3 +17,5 @@ data/
 htmlcov/
 flagged/
 gradio_cached_examples/

 htmlcov/
 flagged/
 gradio_cached_examples/
+*.svs
+*.png

pyproject.toml CHANGED Viewed

@@ -10,11 +10,14 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
   "gradio>=5.49.0",
   "loguru>=0.7.3",
   "memory-profiler>=0.61.0",
   "mussel[torch-gpu]",
   "paladin",
   "spaces>=0.30.0",
 ]
 [project.scripts]

 requires-python = ">=3.10"
 dependencies = [
   "gradio>=5.49.0",
+  "lightning>=2.6.0",
   "loguru>=0.7.3",
   "memory-profiler>=0.61.0",
   "mussel[torch-gpu]",
   "paladin",
+  "seaborn>=0.13.2",
   "spaces>=0.30.0",
+  "statsmodels>=0.14.6",
 ]
 [project.scripts]

scripts/README.md ADDED Viewed

	@@ -0,0 +1,247 @@

+# Mosaic Scripts
+This directory contains utility scripts for working with the Mosaic pipeline, particularly for Aeon model testing and deployment.
+## Aeon Model Scripts
+### 1. export_aeon_checkpoint.py
+Export PyTorch Lightning checkpoint to pickle format for inference.
+**Usage:**
+```bash
+python scripts/export_aeon_checkpoint.py \
+    --checkpoint data/checkpoint.ckpt \
+    --output data/aeon_model.pkl \
+    --metadata-dir data/metadata
+```
+**Arguments:**
+- `--checkpoint`: Path to PyTorch Lightning checkpoint (.ckpt file)
+- `--output`: Path to save exported model (.pkl file)
+- `--metadata-dir`: Directory containing metadata files (default: data/metadata)
+**Requirements:**
+- paladin package from git repo (must have AeonLightningModule)
+- PyTorch Lightning
+- Metadata files: n_classes.txt, ontology_embedding_dim.txt, target_dict.tsv
+**Example:**
+```bash
+# Export the checkpoint
+uv run python scripts/export_aeon_checkpoint.py \
+    --checkpoint data/checkpoint.ckpt \
+    --output data/aeon_model.pkl
+# Output:
+# Loading metadata from data/metadata...
+# Loading checkpoint from data/checkpoint.ckpt...
+# Saving model to data/aeon_model.pkl...
+# ✓ Successfully exported checkpoint to data/aeon_model.pkl
+#   Model size: 118.0 MB
+#   Model class: AeonLateAggregator
+#   Number of classes: 160
+#   Ontology embedding dim: 20
+#   Number of histologies: 160
+```
+### 2. run_aeon_tests.sh
+Run the Aeon model on test slides and validate predictions.
+**Usage:**
+```bash
+./scripts/run_aeon_tests.sh
+```
+**Configuration:**
+The script reads test samples from `test_slides/test_samples.json` and processes each slide through the full Mosaic pipeline with:
+- Cancer subtype: Unknown (triggers Aeon inference)
+- Segmentation config: Biopsy
+- Number of workers: 4
+**Output:**
+- Results saved to `test_slides/results/{slide_id}/`
+- Logs saved to `test_slides/logs/`
+- Summary showing passed/failed tests
+**Example Output:**
+```
+=========================================
+Aeon Model Test Suite
+=========================================
+Found 3 test slides
+=========================================
+Processing slide 1/3: 881837
+=========================================
+Ground Truth:
+  Cancer Subtype: BLCA
+  Site Type: Primary
+  Sex: Male
+  Tissue Site: Bladder
+Running Mosaic pipeline...
+Aeon Prediction:
+  Predicted: BLCA
+  Confidence: 0.9819
+✓ PASS: Prediction matches ground truth
+[... continues for all slides ...]
+=========================================
+Test Summary
+=========================================
+Total slides: 3
+Passed: 3
+Failed: 0
+All tests passed!
+```
+### 3. verify_aeon_results.py
+Verify Aeon test results against expected ground truth.
+**Usage:**
+```bash
+python scripts/verify_aeon_results.py \
+    --test-samples test_slides/test_samples.json \
+    --results-dir test_slides/results \
+    --output test_slides/verification_report.json
+```
+**Arguments:**
+- `--test-samples`: Path to test samples JSON file (default: test_slides/test_samples.json)
+- `--results-dir`: Directory containing results (default: test_slides/results)
+- `--output`: Optional path to save verification report as JSON
+**Example:**
+```bash
+# Verify results and save report
+uv run python scripts/verify_aeon_results.py \
+    --output test_slides/verification_report.json
+# Output:
+# ================================================================================
+# Aeon Model Verification Report
+# ================================================================================
+#
+# Slide: 881837
+#   Ground Truth: BLCA
+#   Site Type: Primary
+#   Sex: Male
+#   Tissue Site: Bladder
+#   Predicted: BLCA
+#   Confidence: 0.9819 (98.19%)
+#   Status: ✓ PASS
+#
+# [... continues for all slides ...]
+#
+# ================================================================================
+# Summary
+# ================================================================================
+# Total slides: 3
+# Passed: 3 (100.0%)
+# Failed: 0 (0.0%)
+#
+# ✓ All tests passed!
+#
+# Confidence Statistics (for passed tests):
+#   Average: 0.9910 (99.10%)
+#   Minimum: 0.9819 (98.19%)
+#   Maximum: 0.9961 (99.61%)
+```
+## Workflow
+### Complete Testing Workflow
+1. **Export checkpoint** (if needed):
+   ```bash
+   uv run python scripts/export_aeon_checkpoint.py \
+       --checkpoint data/checkpoint.ckpt \
+       --output data/aeon_model.pkl
+   ```
+2. **Run tests**:
+   ```bash
+   ./scripts/run_aeon_tests.sh
+   ```
+3. **Verify results**:
+   ```bash
+   uv run python scripts/verify_aeon_results.py \
+       --output test_slides/verification_report.json
+   ```
+### Quick Verification
+If you already have test results and just want to verify them:
+```bash
+uv run python scripts/verify_aeon_results.py
+```
+## Test Samples Format
+The test samples JSON file should have this format:
+```json
+[
+  {
+    "slide_id": "881837",
+    "cancer_subtype": "BLCA",
+    "site_type": "Primary",
+    "sex": "Male",
+    "tissue_site": "Bladder"
+  },
+  {
+    "slide_id": "744547",
+    "cancer_subtype": "HCC",
+    "site_type": "Metastatic",
+    "sex": "Male",
+    "tissue_site": "Liver"
+  }
+]
+```
+## Dependencies
+All scripts require:
+- Python 3.10+
+- uv package manager
+- Mosaic package with dependencies
+Additional requirements for checkpoint export:
+- paladin from git repository (dev branch)
+- PyTorch Lightning
+## Exit Codes
+- `0`: Success (all tests passed)
+- `1`: Failure (one or more tests failed)
+## Troubleshooting
+### "AeonLightningModule not found"
+```bash
+uv sync --upgrade-package paladin
+```
+### "Metadata files not found"
+Make sure you have:
+- `data/metadata/n_classes.txt`
+- `data/metadata/ontology_embedding_dim.txt`
+- `data/metadata/target_dict.tsv`
+### "Test slides not found"
+Place your test slides in `test_slides/` directory and update `test_samples.json` with correct paths.
+## See Also
+- [AEON_TEST_SUMMARY.md](../test_slides/AEON_TEST_SUMMARY.md) - Detailed test results and validation
+- [README.md](../README.md) - Main Mosaic documentation

scripts/export_aeon_checkpoint.py ADDED Viewed

	@@ -0,0 +1,142 @@

+#!/usr/bin/env python
+"""
+Export Aeon PyTorch Lightning checkpoint to pickle format for inference.
+This script converts a PyTorch Lightning checkpoint (.ckpt) file to a pickle
+(.pkl) file that can be used with the Mosaic inference pipeline.
+Usage:
+    python export_aeon_checkpoint.py \
+        --checkpoint data/checkpoint.ckpt \
+        --output data/aeon_model.pkl \
+        --metadata-dir data/metadata
+Requirements:
+    - paladin package from git repo (must have AeonLightningModule)
+    - PyTorch Lightning
+    - Access to metadata files (n_classes.txt, ontology_embedding_dim.txt, target_dict.tsv)
+"""
+import argparse
+import json
+import pickle
+from pathlib import Path
+def load_metadata(metadata_dir: Path):
+    """Load metadata required for model initialization.
+    Args:
+        metadata_dir: Directory containing metadata files
+    Returns:
+        SimpleMetadata object with n_classes, ontology_embedding_dim, and target_dicts
+    """
+    # Read n_classes
+    with open(metadata_dir / "n_classes.txt") as f:
+        n_classes = int(f.read().strip())
+    # Read ontology_embedding_dim
+    with open(metadata_dir / "ontology_embedding_dim.txt") as f:
+        ontology_embedding_dim = int(f.read().strip())
+    # Read target_dict (JSON format with single quotes)
+    with open(metadata_dir / "target_dict.tsv") as f:
+        target_dict_str = f.read().strip().replace("'", '"')
+        target_dict = json.loads(target_dict_str)
+    # Create simple metadata object
+    class SimpleMetadata:
+        def __init__(self, n_classes, ontology_embedding_dim, target_dict):
+            self.n_classes = n_classes
+            self.ontology_embedding_dim = ontology_embedding_dim
+            self.target_dicts = [target_dict]
+    return SimpleMetadata(n_classes, ontology_embedding_dim, target_dict)
+def export_checkpoint(checkpoint_path: Path, output_path: Path, metadata_dir: Path):
+    """Export PyTorch Lightning checkpoint to pickle format.
+    Args:
+        checkpoint_path: Path to .ckpt file
+        output_path: Path to save .pkl file
+        metadata_dir: Directory containing metadata files
+    """
+    try:
+        from paladin.pl_modules.aeon import AeonLightningModule
+    except ImportError:
+        raise ImportError(
+            "Failed to import AeonLightningModule. "
+            "Make sure paladin is installed from the git repository:\n"
+            "  uv sync --upgrade-package paladin"
+        )
+    print(f"Loading metadata from {metadata_dir}...")
+    metadata = load_metadata(metadata_dir)
+    print(f"Loading checkpoint from {checkpoint_path}...")
+    pl_module = AeonLightningModule.load_from_checkpoint(
+        str(checkpoint_path),
+        metadata=metadata
+    )
+    # Extract the model
+    model = pl_module.model
+    print(f"Saving model to {output_path}...")
+    with open(output_path, "wb") as f:
+        pickle.dump(model, f)
+    print(f"✓ Successfully exported checkpoint to {output_path}")
+    # Print model info
+    file_size = output_path.stat().st_size / (1024 * 1024)  # MB
+    print(f"  Model size: {file_size:.1f} MB")
+    print(f"  Model class: {type(model).__name__}")
+    print(f"  Number of classes: {metadata.n_classes}")
+    print(f"  Ontology embedding dim: {metadata.ontology_embedding_dim}")
+    print(f"  Number of histologies: {len(metadata.target_dicts[0]['histologies'])}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Export Aeon PyTorch Lightning checkpoint to pickle format"
+    )
+    parser.add_argument(
+        "--checkpoint",
+        type=Path,
+        required=True,
+        help="Path to PyTorch Lightning checkpoint (.ckpt)"
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        required=True,
+        help="Path to save exported model (.pkl)"
+    )
+    parser.add_argument(
+        "--metadata-dir",
+        type=Path,
+        default=Path("data/metadata"),
+        help="Directory containing metadata files (default: data/metadata)"
+    )
+    args = parser.parse_args()
+    # Validate inputs
+    if not args.checkpoint.exists():
+        raise FileNotFoundError(f"Checkpoint not found: {args.checkpoint}")
+    if not args.metadata_dir.exists():
+        raise FileNotFoundError(f"Metadata directory not found: {args.metadata_dir}")
+    # Create output directory if needed
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    # Export checkpoint
+    export_checkpoint(args.checkpoint, args.output, args.metadata_dir)
+if __name__ == "__main__":
+    main()

scripts/run_aeon_tests.sh ADDED Viewed

	@@ -0,0 +1,175 @@

+#!/bin/bash
+# Aeon Model Test Script
+# This script runs the Aeon cancer subtype prediction model on test slides
+# for reproducibility and validation.
+set -e  # Exit on error
+# Configuration
+TEST_SAMPLES_FILE="test_slides/test_samples.json"
+RESULTS_DIR="test_slides/results"
+LOG_DIR="test_slides/logs"
+SEGMENTATION_CONFIG="Biopsy"
+NUM_WORKERS=4
+# Colors for output
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+echo "========================================="
+echo "Aeon Model Test Suite"
+echo "========================================="
+echo ""
+# Create directories
+mkdir -p "${RESULTS_DIR}"
+mkdir -p "${LOG_DIR}"
+# Check if test samples file exists
+if [ ! -f "${TEST_SAMPLES_FILE}" ]; then
+    echo -e "${RED}Error: Test samples file not found: ${TEST_SAMPLES_FILE}${NC}"
+    exit 1
+fi
+# Read test samples
+echo "Reading test samples from ${TEST_SAMPLES_FILE}..."
+SLIDE_IDS=$(python3 -c "
+import json
+with open('${TEST_SAMPLES_FILE}') as f:
+    samples = json.load(f)
+    for sample in samples:
+        slide_id = sample.get('slide_id') or sample.get('image_id')
+        print(slide_id)
+")
+# Count slides
+NUM_SLIDES=$(echo "${SLIDE_IDS}" | wc -l)
+echo -e "${GREEN}Found ${NUM_SLIDES} test slides${NC}"
+echo ""
+# Process each slide
+CURRENT=0
+PASSED=0
+FAILED=0
+for SLIDE_ID in ${SLIDE_IDS}; do
+    CURRENT=$((CURRENT + 1))
+    echo "========================================="
+    echo -e "${YELLOW}Processing slide ${CURRENT}/${NUM_SLIDES}: ${SLIDE_ID}${NC}"
+    echo "========================================="
+    # Get slide metadata
+    METADATA=$(python3 -c "
+import json
+with open('${TEST_SAMPLES_FILE}') as f:
+    samples = json.load(f)
+    for sample in samples:
+        slide_id = sample.get('slide_id') or sample.get('image_id')
+        if slide_id == '${SLIDE_ID}':
+            cancer_subtype = sample.get('cancer_subtype') or sample.get('cancer_type')
+            print(f\"{cancer_subtype}|{sample['site_type']}|{sample['sex']}|{sample['tissue_site']}\")
+            break
+")
+    IFS='|' read -r CANCER_SUBTYPE SITE_TYPE SEX TISSUE_SITE <<< "${METADATA}"
+    echo "Ground Truth:"
+    echo "  Cancer Subtype: ${CANCER_SUBTYPE}"
+    echo "  Site Type: ${SITE_TYPE}"
+    echo "  Sex: ${SEX}"
+    echo "  Tissue Site: ${TISSUE_SITE}"
+    echo ""
+    # Find slide file
+    SLIDE_FILE=$(find test_slides -name "${SLIDE_ID}.svs" -o -name "${SLIDE_ID}.tiff" -o -name "${SLIDE_ID}.ndpi" 2>/dev/null | head -1)
+    if [ -z "${SLIDE_FILE}" ]; then
+        echo -e "${RED}Error: Slide file not found for ${SLIDE_ID}${NC}"
+        FAILED=$((FAILED + 1))
+        continue
+    fi
+    echo "Slide file: ${SLIDE_FILE}"
+    echo ""
+    # Run Mosaic pipeline with Aeon inference
+    LOG_FILE="${LOG_DIR}/${SLIDE_ID}_aeon_test.log"
+    echo "Running Mosaic pipeline..."
+    if uv run python -m mosaic.cli \
+        --input-slide "${SLIDE_FILE}" \
+        --output-dir "${RESULTS_DIR}/${SLIDE_ID}" \
+        --cancer-subtype "Unknown" \
+        --site-type "${SITE_TYPE}" \
+        --sex "${SEX}" \
+        --tissue-site "${TISSUE_SITE}" \
+        --segmentation-config "${SEGMENTATION_CONFIG}" \
+        --num-workers "${NUM_WORKERS}" \
+        > "${LOG_FILE}" 2>&1; then
+        # Check if results exist
+        AEON_RESULTS="${RESULTS_DIR}/${SLIDE_ID}/${SLIDE_ID}_aeon_results.csv"
+        if [ -f "${AEON_RESULTS}" ]; then
+            # Extract prediction
+            PREDICTION=$(python3 -c "
+import pandas as pd
+df = pd.read_csv('${AEON_RESULTS}')
+if not df.empty:
+    print(f\"{df.iloc[0]['Cancer Subtype']}|{df.iloc[0]['Confidence']:.4f}\")
+")
+            IFS='|' read -r PRED_SUBTYPE CONFIDENCE <<< "${PREDICTION}"
+            echo ""
+            echo "Aeon Prediction:"
+            echo "  Predicted: ${PRED_SUBTYPE}"
+            echo "  Confidence: ${CONFIDENCE}"
+            echo ""
+            # Check if prediction matches ground truth
+            if [ "${PRED_SUBTYPE}" == "${CANCER_SUBTYPE}" ]; then
+                echo -e "${GREEN}✓ PASS: Prediction matches ground truth${NC}"
+                PASSED=$((PASSED + 1))
+            else
+                echo -e "${RED}✗ FAIL: Prediction does not match ground truth${NC}"
+                echo "  Expected: ${CANCER_SUBTYPE}"
+                echo "  Got: ${PRED_SUBTYPE}"
+                FAILED=$((FAILED + 1))
+            fi
+        else
+            echo -e "${RED}✗ FAIL: Aeon results file not found${NC}"
+            FAILED=$((FAILED + 1))
+        fi
+    else
+        echo -e "${RED}✗ FAIL: Mosaic pipeline failed${NC}"
+        echo "Check log file: ${LOG_FILE}"
+        FAILED=$((FAILED + 1))
+    fi
+    echo ""
+done
+# Summary
+echo "========================================="
+echo "Test Summary"
+echo "========================================="
+echo "Total slides: ${NUM_SLIDES}"
+echo -e "${GREEN}Passed: ${PASSED}${NC}"
+if [ ${FAILED} -gt 0 ]; then
+    echo -e "${RED}Failed: ${FAILED}${NC}"
+else
+    echo "Failed: ${FAILED}"
+fi
+echo ""
+if [ ${FAILED} -eq 0 ]; then
+    echo -e "${GREEN}All tests passed!${NC}"
+    exit 0
+else
+    echo -e "${RED}Some tests failed. Check logs in ${LOG_DIR}${NC}"
+    exit 1
+fi

scripts/verify_aeon_results.py ADDED Viewed

	@@ -0,0 +1,224 @@

+#!/usr/bin/env python
+"""
+Verify Aeon test results against expected ground truth.
+This script reads the test results and compares them against the ground truth
+values in test_samples.json to validate the Aeon model predictions.
+Usage:
+    python verify_aeon_results.py \
+        --test-samples test_slides/test_samples.json \
+        --results-dir test_slides/results
+"""
+import argparse
+import json
+from pathlib import Path
+import pandas as pd
+from typing import Dict, List, Tuple
+def load_test_samples(test_samples_file: Path) -> List[Dict]:
+    """Load test samples from JSON file.
+    Args:
+        test_samples_file: Path to test_samples.json
+    Returns:
+        List of test sample dictionaries
+    """
+    with open(test_samples_file) as f:
+        return json.load(f)
+def load_aeon_results(slide_id: str, results_dir: Path) -> Tuple[str, float]:
+    """Load Aeon prediction results for a slide.
+    Args:
+        slide_id: Slide identifier
+        results_dir: Directory containing results
+    Returns:
+        Tuple of (predicted_subtype, confidence)
+    """
+    results_file = results_dir / slide_id / f"{slide_id}_aeon_results.csv"
+    if not results_file.exists():
+        raise FileNotFoundError(f"Results file not found: {results_file}")
+    df = pd.read_csv(results_file)
+    if df.empty:
+        raise ValueError(f"Empty results file: {results_file}")
+    # Get top prediction
+    top_prediction = df.iloc[0]
+    return top_prediction["Cancer Subtype"], top_prediction["Confidence"]
+def verify_results(test_samples: List[Dict], results_dir: Path) -> Dict:
+    """Verify all test results against ground truth.
+    Args:
+        test_samples: List of test sample dictionaries
+        results_dir: Directory containing results
+    Returns:
+        Dictionary with verification statistics
+    """
+    total = len(test_samples)
+    passed = 0
+    failed = 0
+    results = []
+    print("=" * 80)
+    print("Aeon Model Verification Report")
+    print("=" * 80)
+    print()
+    for sample in test_samples:
+        slide_id = sample.get("slide_id") or sample.get("image_id")
+        ground_truth = sample.get("cancer_subtype") or sample.get("cancer_type")
+        site_type = sample["site_type"]
+        sex = sample["sex"]
+        tissue_site = sample["tissue_site"]
+        print(f"Slide: {slide_id}")
+        print(f"  Ground Truth: {ground_truth}")
+        print(f"  Site Type: {site_type}")
+        print(f"  Sex: {sex}")
+        print(f"  Tissue Site: {tissue_site}")
+        try:
+            predicted, confidence = load_aeon_results(slide_id, results_dir)
+            print(f"  Predicted: {predicted}")
+            print(f"  Confidence: {confidence:.4f} ({confidence * 100:.2f}%)")
+            # Check if prediction matches
+            if predicted == ground_truth:
+                print("  Status: ✓ PASS")
+                passed += 1
+                status = "PASS"
+            else:
+                print(f"  Status: ✗ FAIL (expected {ground_truth}, got {predicted})")
+                failed += 1
+                status = "FAIL"
+            results.append({
+                "slide_id": slide_id,
+                "ground_truth": ground_truth,
+                "predicted": predicted,
+                "confidence": confidence,
+                "site_type": site_type,
+                "sex": sex,
+                "tissue_site": tissue_site,
+                "status": status
+            })
+        except Exception as e:
+            print(f"  Status: ✗ ERROR - {e}")
+            failed += 1
+            results.append({
+                "slide_id": slide_id,
+                "ground_truth": ground_truth,
+                "predicted": None,
+                "confidence": None,
+                "site_type": site_type,
+                "sex": sex,
+                "tissue_site": tissue_site,
+                "status": "ERROR",
+                "error": str(e)
+            })
+        print()
+    # Print summary
+    print("=" * 80)
+    print("Summary")
+    print("=" * 80)
+    print(f"Total slides: {total}")
+    print(f"Passed: {passed} ({passed / total * 100:.1f}%)")
+    print(f"Failed: {failed} ({failed / total * 100:.1f}%)")
+    print()
+    if passed == total:
+        print("✓ All tests passed!")
+    else:
+        print(f"✗ {failed} test(s) failed")
+    # Calculate statistics for passed tests
+    if passed > 0:
+        confidences = [r["confidence"] for r in results if r["status"] == "PASS"]
+        avg_confidence = sum(confidences) / len(confidences)
+        min_confidence = min(confidences)
+        max_confidence = max(confidences)
+        print()
+        print("Confidence Statistics (for passed tests):")
+        print(f"  Average: {avg_confidence:.4f} ({avg_confidence * 100:.2f}%)")
+        print(f"  Minimum: {min_confidence:.4f} ({min_confidence * 100:.2f}%)")
+        print(f"  Maximum: {max_confidence:.4f} ({max_confidence * 100:.2f}%)")
+    return {
+        "total": total,
+        "passed": passed,
+        "failed": failed,
+        "accuracy": passed / total if total > 0 else 0,
+        "results": results
+    }
+def main():
+    parser = argparse.ArgumentParser(
+        description="Verify Aeon test results against ground truth"
+    )
+    parser.add_argument(
+        "--test-samples",
+        type=Path,
+        default=Path("test_slides/test_samples.json"),
+        help="Path to test_samples.json (default: test_slides/test_samples.json)"
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=Path,
+        default=Path("test_slides/results"),
+        help="Directory containing results (default: test_slides/results)"
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        help="Optional path to save verification report as JSON"
+    )
+    args = parser.parse_args()
+    # Validate inputs
+    if not args.test_samples.exists():
+        raise FileNotFoundError(f"Test samples file not found: {args.test_samples}")
+    if not args.results_dir.exists():
+        raise FileNotFoundError(f"Results directory not found: {args.results_dir}")
+    # Load test samples
+    test_samples = load_test_samples(args.test_samples)
+    # Verify results
+    verification_report = verify_results(test_samples, args.results_dir)
+    # Save report if requested
+    if args.output:
+        with open(args.output, "w") as f:
+            json.dump(verification_report, f, indent=2)
+        print()
+        print(f"Verification report saved to: {args.output}")
+    # Exit with appropriate code
+    if verification_report["failed"] > 0:
+        exit(1)
+    else:
+        exit(0)
+if __name__ == "__main__":
+    main()

src/mosaic/analysis.py CHANGED Viewed

@@ -154,13 +154,15 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
     return features
-def _run_aeon_inference(features, site_type, num_workers):
     """Run Aeon cancer subtype inference on GPU.
     Args:
         features: Optimus features
         site_type: Site type ("Primary" or "Metastatic")
         num_workers: Number of worker processes
     Returns:
         Aeon results DataFrame
@@ -183,6 +185,8 @@ def _run_aeon_inference(features, site_type, num_workers):
         metastatic=(site_type == "Metastatic"),
         batch_size=8,
         num_workers=num_workers,
         use_cpu=False,
     )
     end_time = pd.Timestamp.now()
@@ -260,6 +264,8 @@ def _run_inference_pipeline_free(
     slide_path,
     attrs,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
@@ -267,8 +273,8 @@ def _run_inference_pipeline_free(
 ):
     """Run inference pipeline with 60s GPU limit (for free users)."""
     return _run_inference_pipeline_impl(
-        coords, slide_path, attrs, site_type, cancer_subtype,
-        cancer_subtype_name_map, num_workers, progress
     )
@@ -278,6 +284,8 @@ def _run_inference_pipeline_pro(
     slide_path,
     attrs,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
@@ -285,8 +293,8 @@ def _run_inference_pipeline_pro(
 ):
     """Run inference pipeline with 300s GPU limit (for PRO users)."""
     return _run_inference_pipeline_impl(
-        coords, slide_path, attrs, site_type, cancer_subtype,
-        cancer_subtype_name_map, num_workers, progress
     )
@@ -295,6 +303,8 @@ def _run_inference_pipeline_impl(
     slide_path,
     attrs,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
@@ -351,7 +361,7 @@ def _run_inference_pipeline_impl(
     # Step 5: Run Aeon to predict histology if not supplied
     if cancer_subtype == "Unknown":
         progress(0.9, desc="Running Aeon for cancer subtype inference")
-        aeon_results = _run_aeon_inference(features, site_type, num_workers)
     else:
         cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
         aeon_results = pd.DataFrame(
@@ -379,6 +389,8 @@ def analyze_slide(
     slide_path,
     seg_config,
     site_type,
     cancer_subtype,
     cancer_subtype_name_map,
     ihc_subtype="",
@@ -507,6 +519,17 @@ def analyze_slide(
             import traceback
             logger.warning(traceback.format_exc())
     if is_logged_in:
         logger.info("Using 300s GPU allocation (logged-in user)")
         aeon_results, paladin_results = _run_inference_pipeline_pro(
@@ -514,6 +537,8 @@ def analyze_slide(
             slide_path,
             attrs,
             site_type,
             cancer_subtype,
             cancer_subtype_name_map,
             num_workers,
@@ -526,6 +551,8 @@ def analyze_slide(
             slide_path,
             attrs,
             site_type,
             cancer_subtype,
             cancer_subtype_name_map,
             num_workers,

     return features
+def _run_aeon_inference(features, site_type, num_workers, sex=None, tissue_site_idx=None):
     """Run Aeon cancer subtype inference on GPU.
     Args:
         features: Optimus features
         site_type: Site type ("Primary" or "Metastatic")
         num_workers: Number of worker processes
+        sex: Patient sex (0=Male, 1=Female), optional
+        tissue_site_idx: Tissue site index (0-56), optional
     Returns:
         Aeon results DataFrame
         metastatic=(site_type == "Metastatic"),
         batch_size=8,
         num_workers=num_workers,
+        sex=sex,
+        tissue_site_idx=tissue_site_idx,
         use_cpu=False,
     )
     end_time = pd.Timestamp.now()
     slide_path,
     attrs,
     site_type,
+    sex,
+    tissue_site_idx,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
 ):
     """Run inference pipeline with 60s GPU limit (for free users)."""
     return _run_inference_pipeline_impl(
+        coords, slide_path, attrs, site_type, sex, tissue_site_idx,
+        cancer_subtype, cancer_subtype_name_map, num_workers, progress
     )
     slide_path,
     attrs,
     site_type,
+    sex,
+    tissue_site_idx,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
 ):
     """Run inference pipeline with 300s GPU limit (for PRO users)."""
     return _run_inference_pipeline_impl(
+        coords, slide_path, attrs, site_type, sex, tissue_site_idx,
+        cancer_subtype, cancer_subtype_name_map, num_workers, progress
     )
     slide_path,
     attrs,
     site_type,
+    sex,
+    tissue_site_idx,
     cancer_subtype,
     cancer_subtype_name_map,
     num_workers,
     # Step 5: Run Aeon to predict histology if not supplied
     if cancer_subtype == "Unknown":
         progress(0.9, desc="Running Aeon for cancer subtype inference")
+        aeon_results = _run_aeon_inference(features, site_type, num_workers, sex, tissue_site_idx)
     else:
         cancer_subtype_code = cancer_subtype_name_map.get(cancer_subtype)
         aeon_results = pd.DataFrame(
     slide_path,
     seg_config,
     site_type,
+    sex,
+    tissue_site,
     cancer_subtype,
     cancer_subtype_name_map,
     ihc_subtype="",
             import traceback
             logger.warning(traceback.format_exc())
+    # Convert sex and tissue_site to indices for Aeon model
+    from mosaic.inference.data import encode_sex, encode_tissue_site
+    sex_idx = None
+    if sex is not None:
+        sex_idx = encode_sex(sex)
+    tissue_site_idx = None
+    if tissue_site is not None:
+        tissue_site_idx = encode_tissue_site(tissue_site)
     if is_logged_in:
         logger.info("Using 300s GPU allocation (logged-in user)")
         aeon_results, paladin_results = _run_inference_pipeline_pro(
             slide_path,
             attrs,
             site_type,
+            sex_idx,
+            tissue_site_idx,
             cancer_subtype,
             cancer_subtype_name_map,
             num_workers,
             slide_path,
             attrs,
             site_type,
+            sex_idx,
+            tissue_site_idx,
             cancer_subtype,
             cancer_subtype_name_map,
             num_workers,

src/mosaic/gradio_app.py CHANGED Viewed

@@ -21,6 +21,7 @@ from mosaic.ui.utils import (
     validate_settings,
     IHC_SUBTYPES,
     SETTINGS_COLUMNS,
 )
 from mosaic.analysis import analyze_slide
@@ -43,10 +44,10 @@ def download_and_process_models():
         "data/paladin_model_map.csv",
     )
     cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
-    cancer_subtype_name_map = {
         f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes
-    }
-    cancer_subtype_name_map["Unknown"] = "UNK"
     reversed_cancer_subtype_name_map = {
         value: key for key, value in cancer_subtype_name_map.items()
     }
@@ -99,6 +100,19 @@ def main():
         default="Primary",
         help="Site type of the slide (for single slide processing)",
     )
     parser.add_argument(
         "--cancer-subtype",
         type=str,
@@ -144,6 +158,8 @@ def main():
                 [
                     args.slide_path,
                     args.site_type,
                     args.cancer_subtype,
                     args.ihc_subtype,
                     args.segmentation_config,
@@ -156,6 +172,8 @@ def main():
             args.slide_path,
             args.segmentation_config,
             args.site_type,
             args.cancer_subtype,
             cancer_subtype_name_map,
             args.ihc_subtype,
@@ -191,6 +209,8 @@ def main():
             slide_path = row["Slide"]
             seg_config = row["Segmentation Config"]
             site_type = row["Site Type"]
             cancer_subtype = row["Cancer Subtype"]
             ihc_subtype = row.get("IHC Subtype", "")
             logger.info(
@@ -200,6 +220,8 @@ def main():
                 slide_path,
                 seg_config,
                 site_type,
                 cancer_subtype,
                 cancer_subtype_name_map,
                 ihc_subtype,

     validate_settings,
     IHC_SUBTYPES,
     SETTINGS_COLUMNS,
+    SEX_OPTIONS,
 )
 from mosaic.analysis import analyze_slide
         "data/paladin_model_map.csv",
     )
     cancer_subtypes = model_map["cancer_subtype"].unique().tolist()
+    cancer_subtype_name_map = {"Unknown": "UNK"}
+    cancer_subtype_name_map.update({
         f"{get_oncotree_code_name(code)} ({code})": code for code in cancer_subtypes
+    })
     reversed_cancer_subtype_name_map = {
         value: key for key, value in cancer_subtype_name_map.items()
     }
         default="Primary",
         help="Site type of the slide (for single slide processing)",
     )
+    parser.add_argument(
+        "--sex",
+        type=str,
+        choices=SEX_OPTIONS,
+        default="Unknown",
+        help="Sex of the patient (for single slide processing)",
+    )
+    parser.add_argument(
+        "--tissue-site",
+        type=str,
+        default="Unknown",
+        help="Tissue site of the slide (for single slide processing)",
+    )
     parser.add_argument(
         "--cancer-subtype",
         type=str,
                 [
                     args.slide_path,
                     args.site_type,
+                    args.sex,
+                    args.tissue_site,
                     args.cancer_subtype,
                     args.ihc_subtype,
                     args.segmentation_config,
             args.slide_path,
             args.segmentation_config,
             args.site_type,
+            args.sex,
+            args.tissue_site,
             args.cancer_subtype,
             cancer_subtype_name_map,
             args.ihc_subtype,
             slide_path = row["Slide"]
             seg_config = row["Segmentation Config"]
             site_type = row["Site Type"]
+            sex = row.get("Sex", "Unknown")
+            tissue_site = row.get("Tissue Site", "Unknown")
             cancer_subtype = row["Cancer Subtype"]
             ihc_subtype = row.get("IHC Subtype", "")
             logger.info(
                 slide_path,
                 seg_config,
                 site_type,
+                sex,
+                tissue_site,
                 cancer_subtype,
                 cancer_subtype_name_map,
                 ihc_subtype,

src/mosaic/inference/aeon.py CHANGED Viewed

@@ -4,6 +4,7 @@ This module provides functionality to run the Aeon deep learning model
 for predicting cancer subtypes from H&E whole slide image features.
 """
 import pickle  # nosec
 import sys
 from argparse import ArgumentParser
@@ -16,36 +17,21 @@ from torch.utils.data import DataLoader
 from mosaic.inference.data import (
     SiteType,
     TileFeatureTensorDataset,
-    INT_TO_CANCER_TYPE_MAP,
-    CANCER_TYPE_TO_INT_MAP,
 )
 from loguru import logger
 # Cancer types excluded from prediction (too broad or ambiguous)
-cancer_types_to_drop = [
     "UDMN",
     "ADNOS",
     "CUP",
     "CUPNOS",
-    "BRCNOS",
-    "GNOS",
-    "SCCNOS",
-    "PDC",
-    "NSCLC",
-    "BRCA",
-    "SARCNOS",
-    "NETNOS",
-    "MEL",
-    "RCC",
-    "BRCANOS",
-    "COADREAD",
-    "MUP",
-    "NECNOS",
-    "UCEC",
     "NOT",
 ]
-col_indices_to_drop = [CANCER_TYPE_TO_INT_MAP[x] for x in cancer_types_to_drop]
 BATCH_SIZE = 8
@@ -53,10 +39,11 @@ NUM_WORKERS = 8
 def run(
-    features, model_path, metastatic=False, batch_size=8, num_workers=8, use_cpu=False
 ):
     """Run Aeon model inference for cancer subtype prediction.
     Args:
         features: NumPy array of tile features extracted from the WSI
         model_path: Path to the pickled Aeon model file
@@ -64,7 +51,9 @@ def run(
         batch_size: Batch size for inference
         num_workers: Number of workers for data loading
         use_cpu: Force CPU usage instead of GPU
     Returns:
         tuple: (results_df, part_embedding)
             - results_df: DataFrame with cancer subtypes and confidence scores
@@ -79,12 +68,27 @@ def run(
     model.to(device)
     model.eval()
     site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
     # For UI, InferenceDataset will just be a single slide.  Sample id is not relevant.
     dataset = TileFeatureTensorDataset(
         site_type=site_type,
         tile_features=features,
         n_max_tiles=20000,
     )
     dataloader = DataLoader(
@@ -95,15 +99,19 @@ def run(
     batch = next(iter(dataloader))
     with torch.no_grad():
         batch["tile_tensor"] = batch["tile_tensor"].to(device)
         y = model(batch)
-        y["logits"][:, col_indices_to_drop] = -1e6
         batch_size = y["logits"].shape[0]
         assert batch_size == 1
         softmax = torch.nn.functional.softmax(y["logits"][0], dim=0)
         argmax = torch.argmax(softmax, dim=0)
-        class_assignment = INT_TO_CANCER_TYPE_MAP[argmax.item()]
         max_confidence = softmax[argmax].item()
         mean_confidence = torch.mean(softmax).item()
@@ -114,7 +122,7 @@ def run(
         part_embedding = y["whole_part_representation"][0].cpu()
-        for cancer_subtype, j in sorted(CANCER_TYPE_TO_INT_MAP.items()):
             confidence = softmax[j].item()
             results.append((cancer_subtype, confidence))
         results.sort(key=lambda row: row[1], reverse=True)
@@ -153,6 +161,19 @@ def parse_args():
     parser.add_argument(
         "--metastatic", action="store_true", help="Tissue is from a metastatic site"
     )
     parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, help="Batch size")
     parser.add_argument(
         "--num-workers", type=int, default=NUM_WORKERS, help="Number of workers"
@@ -174,6 +195,17 @@ def main():
     features = torch.load(opt.features_path)
     results_df, part_embedding = run(
         features=features,
         model_path=opt.model_path,
@@ -181,6 +213,8 @@ def main():
         batch_size=opt.batch_size,
         num_workers=opt.num_workers,
         use_cpu=opt.use_cpu,
     )
     results_df.to_csv(output_path, index=False)

 for predicting cancer subtypes from H&E whole slide image features.
 """
+import json
 import pickle  # nosec
 import sys
 from argparse import ArgumentParser
 from mosaic.inference.data import (
     SiteType,
     TileFeatureTensorDataset,
+    encode_sex,
+    encode_tissue_site,
 )
 from loguru import logger
 # Cancer types excluded from prediction (too broad or ambiguous)
+# These are used to mask out predictions for overly general cancer types
+CANCER_TYPES_TO_DROP = [
     "UDMN",
     "ADNOS",
     "CUP",
     "CUPNOS",
     "NOT",
 ]
 BATCH_SIZE = 8
 def run(
+    features, model_path, metastatic=False, batch_size=8, num_workers=8, use_cpu=False,
+    sex=None, tissue_site_idx=None
 ):
     """Run Aeon model inference for cancer subtype prediction.
     Args:
         features: NumPy array of tile features extracted from the WSI
         model_path: Path to the pickled Aeon model file
         batch_size: Batch size for inference
         num_workers: Number of workers for data loading
         use_cpu: Force CPU usage instead of GPU
+        sex: Patient sex (0=Male, 1=Female), optional
+        tissue_site_idx: Tissue site index (0-56), optional
     Returns:
         tuple: (results_df, part_embedding)
             - results_df: DataFrame with cancer subtypes and confidence scores
     model.to(device)
     model.eval()
+    # Load the correct mapping from metadata for this model
+    metadata_path = Path(__file__).parent.parent.parent.parent / "data" / "metadata" / "target_dict.tsv"
+    with open(metadata_path) as f:
+        target_dict_str = f.read().strip().replace("'", '"')
+        target_dict = json.loads(target_dict_str)
+    histologies = target_dict['histologies']
+    INT_TO_CANCER_TYPE_MAP_LOCAL = {i: histology for i, histology in enumerate(histologies)}
+    CANCER_TYPE_TO_INT_MAP_LOCAL = {v: k for k, v in INT_TO_CANCER_TYPE_MAP_LOCAL.items()}
+    # Calculate col_indices_to_drop using local mapping
+    col_indices_to_drop_local = [CANCER_TYPE_TO_INT_MAP_LOCAL[x] for x in CANCER_TYPES_TO_DROP if x in CANCER_TYPE_TO_INT_MAP_LOCAL]
     site_type = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
     # For UI, InferenceDataset will just be a single slide.  Sample id is not relevant.
     dataset = TileFeatureTensorDataset(
         site_type=site_type,
         tile_features=features,
+        sex=sex,
+        tissue_site_idx=tissue_site_idx,
         n_max_tiles=20000,
     )
     dataloader = DataLoader(
     batch = next(iter(dataloader))
     with torch.no_grad():
         batch["tile_tensor"] = batch["tile_tensor"].to(device)
+        if "SEX" in batch:
+            batch["SEX"] = batch["SEX"].to(device)
+        if "TISSUE_SITE" in batch:
+            batch["TISSUE_SITE"] = batch["TISSUE_SITE"].to(device)
         y = model(batch)
+        y["logits"][:, col_indices_to_drop_local] = -1e6
         batch_size = y["logits"].shape[0]
         assert batch_size == 1
         softmax = torch.nn.functional.softmax(y["logits"][0], dim=0)
         argmax = torch.argmax(softmax, dim=0)
+        class_assignment = INT_TO_CANCER_TYPE_MAP_LOCAL[argmax.item()]
         max_confidence = softmax[argmax].item()
         mean_confidence = torch.mean(softmax).item()
         part_embedding = y["whole_part_representation"][0].cpu()
+        for cancer_subtype, j in sorted(CANCER_TYPE_TO_INT_MAP_LOCAL.items()):
             confidence = softmax[j].item()
             results.append((cancer_subtype, confidence))
         results.sort(key=lambda row: row[1], reverse=True)
     parser.add_argument(
         "--metastatic", action="store_true", help="Tissue is from a metastatic site"
     )
+    parser.add_argument(
+        "--sex",
+        type=str,
+        choices=["Male", "Female", "Unknown"],
+        default=None,
+        help="Patient sex (Male or Female)",
+    )
+    parser.add_argument(
+        "--tissue-site",
+        type=str,
+        default=None,
+        help="Tissue site name",
+    )
     parser.add_argument("--batch-size", type=int, default=BATCH_SIZE, help="Batch size")
     parser.add_argument(
         "--num-workers", type=int, default=NUM_WORKERS, help="Number of workers"
     features = torch.load(opt.features_path)
+    # Encode sex and tissue site if provided
+    sex_encoded = None
+    if opt.sex:
+        sex_encoded = encode_sex(opt.sex)
+        logger.info(f"Using sex: {opt.sex} (encoded as {sex_encoded})")
+    tissue_site_idx = None
+    if opt.tissue_site:
+        tissue_site_idx = encode_tissue_site(opt.tissue_site)
+        logger.info(f"Using tissue site: {opt.tissue_site} (encoded as {tissue_site_idx})")
     results_df, part_embedding = run(
         features=features,
         model_path=opt.model_path,
         batch_size=opt.batch_size,
         num_workers=opt.num_workers,
         use_cpu=opt.use_cpu,
+        sex=sex_encoded,
+        tissue_site_idx=tissue_site_idx,
     )
     results_df.to_csv(output_path, index=False)

src/mosaic/inference/data.py CHANGED Viewed

@@ -201,6 +201,129 @@ CANCER_TYPE_TO_INT_MAP = {
 INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
 class SiteType(Enum):
     PRIMARY = "Primary"
     METASTASIS = "Metastasis"
@@ -211,6 +334,8 @@ class TileFeatureTensorDataset(Dataset):
         self,
         site_type: SiteType,
         tile_features: np.ndarray,
         n_max_tiles: int = 20000,
     ) -> None:
         """Initialize the dataset.
@@ -218,12 +343,16 @@ class TileFeatureTensorDataset(Dataset):
         Args:
             site_type: the site type as str, either "Primary" or "Metastasis"
             tile_features: the tile feature array
             n_max_tiles: the maximum number of tiles to use as int
         Returns:
             None
         """
         self.site_type = site_type
         self.n_max_tiles = n_max_tiles
         self.features = self._get_features(tile_features)
@@ -264,7 +393,22 @@ class TileFeatureTensorDataset(Dataset):
         Returns:
             dict: the item
         """
-        return {
             "site": self.site_type.value,
             "tile_tensor": self.features
         }

 INT_TO_CANCER_TYPE_MAP = {v: k for k, v in CANCER_TYPE_TO_INT_MAP.items()}
+# Tissue site mapping (module-level cache)
+_TISSUE_SITE_MAP = None
+# Default tissue site index for "Not Applicable"
+DEFAULT_TISSUE_SITE_IDX = 8
+def get_tissue_site_map():
+    """Load tissue site name → index mapping from CSV.
+    Returns:
+        dict: Mapping of tissue site names to indices (0-56)
+    Raises:
+        FileNotFoundError: If the tissue site CSV file is not found
+    """
+    global _TISSUE_SITE_MAP
+    if _TISSUE_SITE_MAP is None:
+        from pathlib import Path
+        import pandas as pd
+        csv_path = Path(__file__).parent.parent.parent.parent / "data" / "tissue_site_original_to_idx.csv"
+        try:
+            df = pd.read_csv(csv_path)
+        except FileNotFoundError as e:
+            raise FileNotFoundError(
+                f"Tissue site mapping file not found at {csv_path}. "
+                f"Please ensure the data directory contains 'tissue_site_original_to_idx.csv'."
+            ) from e
+        _TISSUE_SITE_MAP = {}
+        for _, row in df.iterrows():
+            _TISSUE_SITE_MAP[row['TISSUE_SITE']] = int(row['idx'])
+    return _TISSUE_SITE_MAP
+def get_tissue_site_options():
+    """Get sorted unique tissue site names for UI dropdowns.
+    Returns:
+        list: Sorted list of unique tissue site names
+    """
+    site_map = get_tissue_site_map()
+    return sorted(set(site_map.keys()))
+_SEX_MAP = None
+def get_sex_map():
+    """Get the sex to index mapping from CSV file.
+    Returns:
+        dict: Mapping of sex values to indices (0-2)
+    Raises:
+        FileNotFoundError: If the sex mapping CSV file is not found
+    """
+    global _SEX_MAP
+    if _SEX_MAP is None:
+        from pathlib import Path
+        import pandas as pd
+        csv_path = Path(__file__).parent.parent.parent.parent / "data" / "sex_original_to_idx.csv"
+        try:
+            df = pd.read_csv(csv_path)
+        except FileNotFoundError as e:
+            raise FileNotFoundError(
+                f"Sex mapping file not found at {csv_path}. "
+                f"Please ensure the data directory contains 'sex_original_to_idx.csv'."
+            ) from e
+        _SEX_MAP = {}
+        for _, row in df.iterrows():
+            _SEX_MAP[row['SEX']] = int(row['idx'])
+    return _SEX_MAP
+def encode_sex(sex):
+    """Convert sex to numeric encoding.
+    Args:
+        sex: "Male", "Female", or "Unknown" (case insensitive)
+    Returns:
+        int: 0 for Male, 1 for Female, 2 for Unknown
+    """
+    sex_map = get_sex_map()
+    unknown_idx = sex_map.get("Unknown", 2)
+    return sex_map.get(sex, unknown_idx)
+def encode_tissue_site(site_name):
+    """Convert tissue site name to index (0-56).
+    Args:
+        site_name: Tissue site name from CSV
+    Returns:
+        int: Tissue site index, defaults to DEFAULT_TISSUE_SITE_IDX ("Not Applicable")
+    """
+    site_map = get_tissue_site_map()
+    return site_map.get(site_name, DEFAULT_TISSUE_SITE_IDX)
+def tissue_site_to_one_hot(site_idx, num_classes=57):
+    """Convert tissue site index to one-hot vector.
+    Args:
+        site_idx: Index value (0-56 for tissue site, 0-2 for sex)
+        num_classes: Number of classes (57 for tissue site, 3 for sex)
+    Returns:
+        list: One-hot encoded vector
+    """
+    one_hot = [0] * num_classes
+    if 0 <= site_idx < num_classes:
+        one_hot[site_idx] = 1
+    return one_hot
 class SiteType(Enum):
     PRIMARY = "Primary"
     METASTASIS = "Metastasis"
         self,
         site_type: SiteType,
         tile_features: np.ndarray,
+        sex: int = None,
+        tissue_site_idx: int = None,
         n_max_tiles: int = 20000,
     ) -> None:
         """Initialize the dataset.
         Args:
             site_type: the site type as str, either "Primary" or "Metastasis"
             tile_features: the tile feature array
+            sex: patient sex (0=Male, 1=Female), optional for Aeon
+            tissue_site_idx: tissue site index (0-56), optional for Aeon
             n_max_tiles: the maximum number of tiles to use as int
         Returns:
             None
         """
         self.site_type = site_type
+        self.sex = sex
+        self.tissue_site_idx = tissue_site_idx
         self.n_max_tiles = n_max_tiles
         self.features = self._get_features(tile_features)
         Returns:
             dict: the item
         """
+        result = {
             "site": self.site_type.value,
             "tile_tensor": self.features
         }
+        # Add sex and tissue_site if provided (for Aeon)
+        if self.sex is not None:
+            result["SEX"] = torch.tensor(
+                tissue_site_to_one_hot(self.sex, num_classes=3),
+                dtype=torch.float32
+            )
+        if self.tissue_site_idx is not None:
+            result["TISSUE_SITE"] = torch.tensor(
+                tissue_site_to_one_hot(self.tissue_site_idx, num_classes=57),
+                dtype=torch.float32
+            )
+        return result

src/mosaic/ui/app.py CHANGED Viewed

@@ -18,7 +18,9 @@ from mosaic.ui.utils import (
     create_user_directory,
     load_settings,
     validate_settings,
     IHC_SUBTYPES,
     SETTINGS_COLUMNS,
 )
 from mosaic.analysis import analyze_slide
@@ -80,6 +82,8 @@ def analyze_slides(
             slides[idx],
             row["Segmentation Config"],
             row["Site Type"],
             row["Cancer Subtype"],
             cancer_subtype_name_map,
             row["IHC Subtype"],
@@ -177,6 +181,16 @@ def launch_gradio(server_name, server_port, share):
                     label="Site Type",
                     value="Primary",
                 )
                 cancer_subtype_dropdown = gr.Dropdown(
                     choices=[name for name in cancer_subtype_name_map.keys()],
                     label="Cancer Subtype",
@@ -195,15 +209,9 @@ def launch_gradio(server_name, server_port, share):
                 )
                 with gr.Row():
                     settings_input = gr.Dataframe(
-                        headers=[
-                            "Slide",
-                            "Site Type",
-                            "Cancer Subtype",
-                            "IHC Subtype",
-                            "Segmentation Config",
-                        ],
                         label="Current Settings",
-                        datatype=["str", "str", "str", "str", "str"],
                         visible=False,
                         interactive=True,
                         static_columns="Slide",
@@ -270,7 +278,7 @@ def launch_gradio(server_name, server_port, share):
                 gr.File(visible=False),
             )
-        def get_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
             if files is None:
                 return pd.DataFrame()
             settings = []
@@ -278,7 +286,7 @@ def launch_gradio(server_name, server_port, share):
                 filename = file.name if hasattr(file, "name") else file
                 slide_name = filename.split("/")[-1]
                 settings.append(
-                    [slide_name, site_type, cancer_subtype, ihc_subtype, seg_config]
                 )
             df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
             return df
@@ -288,6 +296,8 @@ def launch_gradio(server_name, server_port, share):
             [
                 input_slides.change,
                 site_dropdown.change,
                 cancer_subtype_dropdown.change,
                 ihc_subtype_dropdown.change,
                 seg_config_dropdown.change,
@@ -295,18 +305,20 @@ def launch_gradio(server_name, server_port, share):
             inputs=[
                 input_slides,
                 site_dropdown,
                 cancer_subtype_dropdown,
                 ihc_subtype_dropdown,
                 seg_config_dropdown,
             ],
             outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
         )
-        def update_settings(files, site_type, cancer_subtype, ihc_subtype, seg_config):
             has_ihc = "Breast" in cancer_subtype
             if not files:
                 return None, None, gr.Dropdown(visible=has_ihc)
             settings_df = get_settings(
-                files, site_type, cancer_subtype, ihc_subtype, seg_config
             )
             if settings_df is not None:
                 has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])

     create_user_directory,
     load_settings,
     validate_settings,
+    get_tissue_sites,
     IHC_SUBTYPES,
+    SEX_OPTIONS,
     SETTINGS_COLUMNS,
 )
 from mosaic.analysis import analyze_slide
             slides[idx],
             row["Segmentation Config"],
             row["Site Type"],
+            row["Sex"],
+            row["Tissue Site"],
             row["Cancer Subtype"],
             cancer_subtype_name_map,
             row["IHC Subtype"],
                     label="Site Type",
                     value="Primary",
                 )
+                sex_dropdown = gr.Dropdown(
+                    choices=SEX_OPTIONS,
+                    label="Sex",
+                    value="Unknown",
+                )
+                tissue_site_dropdown = gr.Dropdown(
+                    choices=get_tissue_sites(),
+                    label="Tissue Site",
+                    value="Unknown",
+                )
                 cancer_subtype_dropdown = gr.Dropdown(
                     choices=[name for name in cancer_subtype_name_map.keys()],
                     label="Cancer Subtype",
                 )
                 with gr.Row():
                     settings_input = gr.Dataframe(
+                        headers=SETTINGS_COLUMNS,
                         label="Current Settings",
+                        datatype=["str"] * len(SETTINGS_COLUMNS),
                         visible=False,
                         interactive=True,
                         static_columns="Slide",
                 gr.File(visible=False),
             )
+        def get_settings(files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config):
             if files is None:
                 return pd.DataFrame()
             settings = []
                 filename = file.name if hasattr(file, "name") else file
                 slide_name = filename.split("/")[-1]
                 settings.append(
+                    [slide_name, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config]
                 )
             df = pd.DataFrame(settings, columns=SETTINGS_COLUMNS)
             return df
             [
                 input_slides.change,
                 site_dropdown.change,
+                sex_dropdown.change,
+                tissue_site_dropdown.change,
                 cancer_subtype_dropdown.change,
                 ihc_subtype_dropdown.change,
                 seg_config_dropdown.change,
             inputs=[
                 input_slides,
                 site_dropdown,
+                sex_dropdown,
+                tissue_site_dropdown,
                 cancer_subtype_dropdown,
                 ihc_subtype_dropdown,
                 seg_config_dropdown,
             ],
             outputs=[settings_input, settings_csv, ihc_subtype_dropdown],
         )
+        def update_settings(files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config):
             has_ihc = "Breast" in cancer_subtype
             if not files:
                 return None, None, gr.Dropdown(visible=has_ihc)
             settings_df = get_settings(
+                files, site_type, sex, tissue_site, cancer_subtype, ihc_subtype, seg_config
             )
             if settings_df is not None:
                 has_ihc = any("Breast" in cs for cs in settings_df["Cancer Subtype"])

src/mosaic/ui/utils.py CHANGED Viewed

@@ -17,16 +17,44 @@ import requests
 TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
 IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
 SETTINGS_COLUMNS = [
     "Slide",
     "Site Type",
     "Cancer Subtype",
     "IHC Subtype",
     "Segmentation Config",
 ]
 oncotree_code_map = {}
 def get_oncotree_code_name(code):
@@ -98,6 +126,10 @@ def load_settings(slide_csv_path):
         settings_df["Cancer Subtype"] = "Unknown"
     if "IHC Subtype" not in settings_df.columns:
         settings_df["IHC Subtype"] = ""
     if not set(SETTINGS_COLUMNS).issubset(settings_df.columns):
         raise ValueError("Missing required column in CSV file")
     settings_df = settings_df[SETTINGS_COLUMNS]
@@ -125,6 +157,8 @@ def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, rev
     """
     settings_df.columns = SETTINGS_COLUMNS
     warnings = []
     for idx, row in settings_df.iterrows():
         slide_name = row["Slide"]
         subtype = row["Cancer Subtype"]
@@ -142,6 +176,16 @@ def validate_settings(settings_df, cancer_subtype_name_map, cancer_subtypes, rev
                 f"Slide {slide_name}: Unknown site type. Valid types are: Metastatic, Primary. "
             )
             settings_df.at[idx, "Site Type"] = "Primary"
         if (
             "Breast" not in settings_df.at[idx, "Cancer Subtype"]
             and row["IHC Subtype"] != ""

 TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data"
 IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"]
+SEX_OPTIONS = ["Unknown", "Male", "Female"]
 SETTINGS_COLUMNS = [
     "Slide",
     "Site Type",
+    "Sex",
+    "Tissue Site",
     "Cancer Subtype",
     "IHC Subtype",
     "Segmentation Config",
 ]
 oncotree_code_map = {}
+tissue_site_list = None
+def get_tissue_sites():
+    """Get the list of tissue sites from the tissue site map file.
+    Returns:
+        List of tissue site names. Returns ["Unknown"] if the CSV file is not found.
+    """
+    global tissue_site_list
+    if tissue_site_list is None:
+        try:
+            current_dir = Path(__file__).parent.parent.parent.parent
+            tissue_site_map_path = current_dir / "data" / "tissue_site_original_to_idx.csv"
+            df = pd.read_csv(tissue_site_map_path)
+            # Get unique tissue sites and sort them
+            tissue_site_list = ["Unknown"] + sorted(df["TISSUE_SITE"].unique().tolist())
+        except FileNotFoundError:
+            gr.Warning(
+                f"Tissue site mapping file not found at {tissue_site_map_path}. "
+                "Only 'Unknown' option will be available for tissue site selection. "
+                "Please ensure the data files are downloaded from the model repository."
+            )
+            tissue_site_list = ["Unknown"]
+    return tissue_site_list
 def get_oncotree_code_name(code):
         settings_df["Cancer Subtype"] = "Unknown"
     if "IHC Subtype" not in settings_df.columns:
         settings_df["IHC Subtype"] = ""
+    if "Sex" not in settings_df.columns:
+        settings_df["Sex"] = "Unknown"
+    if "Tissue Site" not in settings_df.columns:
+        settings_df["Tissue Site"] = "Unknown"
     if not set(SETTINGS_COLUMNS).issubset(settings_df.columns):
         raise ValueError("Missing required column in CSV file")
     settings_df = settings_df[SETTINGS_COLUMNS]
     """
     settings_df.columns = SETTINGS_COLUMNS
     warnings = []
+    tissue_sites = get_tissue_sites()
     for idx, row in settings_df.iterrows():
         slide_name = row["Slide"]
         subtype = row["Cancer Subtype"]
                 f"Slide {slide_name}: Unknown site type. Valid types are: Metastatic, Primary. "
             )
             settings_df.at[idx, "Site Type"] = "Primary"
+        if row["Sex"] not in SEX_OPTIONS:
+            warnings.append(
+                f"Slide {slide_name}: Unknown sex. Valid options are: {', '.join(SEX_OPTIONS)}. "
+            )
+            settings_df.at[idx, "Sex"] = "Unknown"
+        if row["Tissue Site"] not in tissue_sites:
+            warnings.append(
+                f"Slide {slide_name}: Unknown tissue site. Valid tissue sites are: {', '.join(tissue_sites)}. "
+            )
+            settings_df.at[idx, "Tissue Site"] = "Unknown"
         if (
             "Breast" not in settings_df.at[idx, "Cancer Subtype"]
             and row["IHC Subtype"] != ""

test_slides/AEON_TEST_SUMMARY.md ADDED Viewed

	@@ -0,0 +1,178 @@

+# Aeon Model Test Summary
+## Overview
+This document summarizes the Aeon cancer subtype prediction model testing performed on January 7, 2026.
+## Model Information
+- **Model File**: `aeon_model.pkl` (118MB)
+- **Source**: Exported from `checkpoint.ckpt` (469MB, Nov 29, 2024)
+- **Architecture**: AeonLateAggregator with late fusion
+- **Output Classes**: 160 cancer subtypes
+- **Input Features**:
+  - Tile embeddings from Optimus model
+  - Sex (one-hot encoded, 3 classes)
+  - Tissue site (one-hot encoded, 57 classes)
+  - Site type (Primary/Metastatic)
+## Test Slides
+Three test slides were processed using the full Mosaic pipeline with Aeon inference:
+### Slide 1: 881837
+- **File**: `881837.svs`
+- **Ground Truth**: BLCA (Bladder Urothelial Carcinoma)
+- **Site Type**: Primary
+- **Sex**: Male
+- **Tissue Site**: Bladder
+- **Prediction**: BLCA
+- **Confidence**: 98.19%
+- **Status**: ✓ PASS
+### Slide 2: 744547
+- **File**: `744547.svs`
+- **Ground Truth**: HCC (Hepatocellular Carcinoma)
+- **Site Type**: Metastatic
+- **Sex**: Male
+- **Tissue Site**: Liver
+- **Prediction**: HCC
+- **Confidence**: 99.49%
+- **Status**: ✓ PASS
+### Slide 3: 755246
+- **File**: `755246.svs`
+- **Ground Truth**: HCC (Hepatocellular Carcinoma)
+- **Site Type**: Primary
+- **Sex**: Male
+- **Tissue Site**: Liver
+- **Prediction**: HCC
+- **Confidence**: 99.61%
+- **Status**: ✓ PASS
+## Test Results
+| Slide ID | Ground Truth | Prediction | Confidence | Next Highest | Status |
+|----------|--------------|------------|------------|--------------|--------|
+| 881837   | BLCA         | BLCA       | 98.19%     | UTUC (0.87%) | ✓ PASS |
+| 744547   | HCC          | HCC        | 99.49%     | IHCH (0.18%) | ✓ PASS |
+| 755246   | HCC          | HCC        | 99.61%     | IHCH (0.29%) | ✓ PASS |
+**Overall Accuracy**: 3/3 (100%)
+## Pipeline Configuration
+### Segmentation
+- **Config**: Biopsy (`SegmentationConfig.BIOPSY`)
+- **Tissue Detection**: Automated segmentation of tissue regions
+- **Tile Size**: 224x224 pixels at 20x magnification
+### Feature Extraction
+- **CTransPath**: Pretrained histopathology foundation model
+- **Optimus**: Multi-task feature aggregator
+- **Marker Classifier**: Tissue marker prediction
+### Aeon Inference
+- **Model Path**: `data/aeon_model.pkl`
+- **Batch Size**: 8
+- **Workers**: 4
+- **Sex Encoding**: Male=0 (one-hot: [1,0,0])
+- **Tissue Site Encoding**:
+  - Bladder=11 (one-hot vector, 57 dims)
+  - Liver=26 (one-hot vector, 57 dims)
+## Key Implementation Details
+### Cancer Type Mapping
+- Mappings loaded from `data/metadata/target_dict.tsv`
+- 160 histologies supported
+- 5 cancer types excluded from predictions: UDMN, ADNOS, CUP, CUPNOS, NOT
+### Model Architecture
+```python
+AeonLateAggregator(
+    tile_emb_dim=768,
+    num_targets=160,
+    sex_embedding_dim=mini_latent_dim (latent_dim // 4),
+    tissue_site_embedding_dim=mini_latent_dim,
+    site_embedding_dim=mini_latent_dim
+)
+```
+### Encoding Functions
+- **Sex**: `encode_sex(sex_str)` → index (0-2) → one-hot (3 classes)
+- **Tissue Site**: `encode_tissue_site(tissue_site_str)` → index (0-56) → one-hot (57 classes)
+## Critical Fixes Applied
+### Issue 1: Model-Metadata Mismatch
+- **Problem**: Model outputs 160 classes but code used 183-entry mapping
+- **Solution**: Load mappings from `metadata/target_dict.tsv` instead of global constants
+- **Files Modified**: `src/mosaic/inference/aeon.py` (lines 87-102, 127, 130-147)
+### Issue 2: Checkpoint Format
+- **Problem**: Inference code expects `.pkl` files, not PyTorch Lightning `.ckpt` files
+- **Solution**: Exported checkpoint using paladin's `AeonLightningModule.load_from_checkpoint()`
+- **Export Command**: See `scripts/export_aeon_checkpoint.py`
+### Issue 3: Missing AeonLateAggregator
+- **Problem**: PyPI paladin package had `AeonAggregator`, not `AeonLateAggregator`
+- **Solution**: Installed paladin from git repo dev branch
+- **Command**: `uv sync --upgrade-package paladin`
+## Dependencies
+### Critical Packages
+- `paladin` (from git: ssh://git@github.com/pathology-data-mining/paladin.git@dev)
+- `torch>=2.0`
+- `pytorch-lightning`
+- `pandas`
+- `numpy`
+### Model Files Required
+- `aeon_model.pkl` (118MB)
+- `metadata/target_dict.tsv`
+- `metadata/n_classes.txt`
+- `metadata/ontology_embedding_dim.txt`
+- `sex_original_to_idx.csv`
+- `tissue_site_original_to_idx.csv`
+## Reproducibility
+All test results are fully reproducible using:
+1. The test samples defined in `test_samples.json`
+2. The run script: `scripts/run_aeon_tests.sh`
+3. The model and metadata uploaded to `PDM-Group/paladin-aeon-models` on Hugging Face
+## Output Files
+For each slide, the following files are generated in `test_slides/results/{slide_id}/`:
+- `{slide_id}_aeon_results.csv` - Full confidence scores for all 160 cancer subtypes
+- `{slide_id}_paladin_results.csv` - Biomarker predictions
+- `{slide_id}_mask.png` - Tissue segmentation mask
+- `{slide_id}_features.h5` - Extracted tile features
+## Validation Metrics
+- **Prediction Accuracy**: 100% (3/3)
+- **Average Confidence**: 99.10%
+- **Minimum Confidence**: 98.19%
+- **Maximum Confidence**: 99.61%
+## Hugging Face Repository
+All model files and metadata have been uploaded to:
+- **Repository**: `PDM-Group/paladin-aeon-models`
+- **URL**: https://huggingface.co/PDM-Group/paladin-aeon-models
+### Uploaded Files (Jan 7, 2026)
+- `aeon_model.pkl` (118MB)
+- `metadata/` (5 files)
+- `sex_original_to_idx.csv`
+- `tissue_site_original_to_idx.csv`
+## Test Date
+- **Date**: January 7, 2026
+- **Git Commit**: 49fbf68 (Complete implementation of sex and tissue site parameters)
+- **Tester**: Ray Lim

test_slides/results/744547/744547_aeon_results.csv ADDED Viewed

	@@ -0,0 +1,161 @@

+Cancer Subtype,Confidence
+HCC,0.9949353337287903
+IHCH,0.0018046980258077383
+ACC,0.000752770050894469
+OPHSC,0.00036126983468420804
+PAAC,0.0003446016926318407
+THHC,0.0001074946703738533
+NPC,9.898660937324166e-05
+HNSC,8.415944466833025e-05
+STAD,8.021725079743192e-05
+HGNEC,7.314849062822759e-05
+WT,6.0233636759221554e-05
+ANSC,5.0760227168211713e-05
+NSGCT,4.1861039790092036e-05
+CHRCC,3.653292151284404e-05
+EHCH,3.409649070817977e-05
+PEMESO,3.360131086083129e-05
+SFT,3.2356434530811384e-05
+PANET,3.2246403861790895e-05
+ANGS,3.0679162591695786e-05
+PAAD,2.974703056679573e-05
+EGC,2.666616092028562e-05
+UM,2.5927740352926776e-05
+MCC,2.5807627025642432e-05
+SKCM,2.2870222892379388e-05
+ODG,2.1314301193342544e-05
+COAD,2.0228028006386012e-05
+SCLC,2.006701470236294e-05
+PTAD,1.9878523744409904e-05
+SSRCC,1.9336708646733314e-05
+PECOMA,1.8762702893582173e-05
+SBOV,1.75385684997309e-05
+PRCC,1.7517071682959795e-05
+EMPD,1.742972199281212e-05
+LMS,1.7376194591633976e-05
+ES,1.7088817912735976e-05
+SBWDNET,1.6885298464330845e-05
+READ,1.6691648852429353e-05
+ASTR,1.5237395928124897e-05
+MACR,1.468232130719116e-05
+ARMM,1.4290351828094572e-05
+CSCC,1.4241238204704132e-05
+LUSC,1.3981271877128165e-05
+THPA,1.3920045603299513e-05
+CCOV,1.389113640470896e-05
+PRAD,1.3490677702066023e-05
+BLCA,1.2866928955190815e-05
+GIST,1.2835040251957253e-05
+BCC,1.2371066986816004e-05
+THPD,1.229105419042753e-05
+BMGCT,1.1911726687685587e-05
+DES,1.1774703125411179e-05
+LGSOC,1.158510258392198e-05
+UTUC,1.1252666809014045e-05
+PAMPCA,1.1209620424779132e-05
+ACYC,1.0676962119759992e-05
+THYC,1.063129002432106e-05
+ULMS,1.0360988198954146e-05
+WDLS,9.846420653047971e-06
+DA,9.834530828811694e-06
+MPNST,9.739153028931469e-06
+HNMUCM,9.407535799255129e-06
+THAP,9.405741366208531e-06
+OCS,9.383109500049613e-06
+GBAD,9.255892109649722e-06
+GCCAP,9.183406291413121e-06
+SDCA,9.130208127317019e-06
+EPIS,8.989960406324826e-06
+PHC,8.885029274097178e-06
+EHAE,8.772132787271403e-06
+PLMESO,8.688036359671969e-06
+ESCC,8.65026049723383e-06
+TAC,8.619595064374153e-06
+GRCT,8.587684533267748e-06
+BLAD,8.564702511648647e-06
+DSRCT,8.161241566995159e-06
+EPM,7.880689736339264e-06
+MFH,7.607672614540206e-06
+SCBC,7.4185886660416145e-06
+SEM,7.19011586625129e-06
+SYNS,7.085985998855904e-06
+UCP,6.969770311116008e-06
+UEC,6.914885034348117e-06
+LUCA,6.88146519678412e-06
+GEJ,6.397517154255183e-06
+ALUCA,6.210095307324082e-06
+CHDM,6.182817287481157e-06
+OS,6.126435437181499e-06
+MAAP,6.075235432945192e-06
+LUPC,6.040307198418304e-06
+ESCA,5.798766324005555e-06
+ERMS,5.519645128515549e-06
+RBL,5.355142548069125e-06
+VSC,5.320628133631544e-06
+DDLS,5.237710411165608e-06
+CCRCC,4.890111540589714e-06
+ARMS,4.8574038373772055e-06
+MNG,4.79665777675109e-06
+HGSOC,4.588881893141661e-06
+THYM,4.543203431239817e-06
+BA,4.302230991015676e-06
+NBL,4.251941845723195e-06
+UCCC,4.113941486139083e-06
+GBM,3.829367415164597e-06
+EOV,3.7675256407965207e-06
+CHS,3.7070362850499805e-06
+IDC,2.831375240930356e-06
+MBC,2.820524969138205e-06
+DASTR,2.7697560653905384e-06
+UCS,2.7230109935771907e-06
+CESC,2.5985430056607584e-06
+VMM,2.466509386067628e-06
+ILC,2.451496357025462e-06
+LUNE,2.4507951366103953e-06
+ATM,2.3741329187032534e-06
+MRLS,2.351298462599516e-06
+THME,2.2384033400157932e-06
+MOV,2.2177562186698196e-06
+ECAD,2.15100362765952e-06
+LUAD,1.7751663108356297e-06
+ACRM,1.5891558859948418e-06
+MFS,1.58587818077649e-06
+PAST,1.1561178325791843e-06
+USC,9.618892136131763e-07
+SCHW,9.386900501340278e-07
+NECNOS,6.424317007258651e-07
+BRCANOS,4.31851503890357e-07
+MDLC,4.30085862035412e-07
+SCCNOS,3.5081365012956667e-07
+SBC,3.1285472346098686e-07
+NSCLC,2.687320375116542e-07
+MXOV,2.395507863184321e-07
+SARCNOS,2.364027835710658e-07
+MEL,2.3354837708211562e-07
+CHOL,2.3344369992628344e-07
+PAASC,2.256272324530073e-07
+MUP,2.2071883165608597e-07
+BRCA,2.1710592079671187e-07
+NVRINT,2.1676953565474832e-07
+AMPCA,2.1609788802834373e-07
+LUAS,2.136845722588987e-07
+URCC,2.0894131580462272e-07
+BRCNOS,2.067983615461344e-07
+GINET,2.0037792580751557e-07
+PDC,1.7432496690616972e-07
+GNOS,1.6309185468799114e-07
+NETNOS,1.6066735497588525e-07
+APAD,1.5840650746667961e-07
+DIFG,1.5840393530197616e-07
+COADREAD,1.5738932290787488e-07
+CSCLC,1.4587288887923933e-07
+RCC,1.3664508458077762e-07
+UMEC,1.1690717371948267e-07
+GBC,8.854171795746879e-08
+NSCLCPD,7.288439718422524e-08
+UCEC,7.106321220362588e-08
+ADNOS,0.0
+CUP,0.0
+CUPNOS,0.0
+NOT,0.0
+UDMN,0.0

test_slides/results/744547/744547_paladin_results.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Cancer Subtype,Biomarker,Score
2	+ HCC,Del_8p,0.2197096198797226

test_slides/results/755246/755246_aeon_results.csv ADDED Viewed

	@@ -0,0 +1,161 @@

+Cancer Subtype,Confidence
+HCC,0.9960950016975403
+IHCH,0.0028622548561543226
+PAAC,0.0002748313418123871
+OPHSC,4.615329817170277e-05
+ACC,3.686468699015677e-05
+NPC,3.5695637052413076e-05
+HGNEC,2.805266558425501e-05
+PAMPCA,2.4401044356636703e-05
+HNSC,2.1414962247945368e-05
+WT,2.062890780507587e-05
+STAD,1.9500384951243177e-05
+ANSC,1.9137134586344473e-05
+NSGCT,1.8425340385874733e-05
+ACYC,1.6153164324350655e-05
+SFT,1.4236396054911893e-05
+PAAD,1.385363702866016e-05
+ANGS,1.3724854397878516e-05
+CSCC,1.1247223483223934e-05
+CHRCC,1.1185951734660193e-05
+MCC,1.1107679711130913e-05
+PEMESO,1.0368969924456906e-05
+LUSC,1.0365045454818755e-05
+BMGCT,1.0343398571421858e-05
+LGSOC,1.0238399227091577e-05
+COAD,1.0101895895786583e-05
+PECOMA,9.616783245292027e-06
+ODG,9.323048288933933e-06
+READ,8.963126674643718e-06
+GBAD,8.933258868637495e-06
+ASTR,8.659791092213709e-06
+EMPD,8.184463695215527e-06
+CCOV,7.98109067545738e-06
+SSRCC,7.932881999295205e-06
+LMS,7.707203621976078e-06
+SDCA,7.481134616682539e-06
+EGC,7.304894097615033e-06
+EHCH,7.18998262527748e-06
+DA,7.112325420166599e-06
+GRCT,7.074557743180776e-06
+THYC,6.8966392063885e-06
+ES,6.499152732430957e-06
+TAC,6.04554270466906e-06
+LUPC,5.894301011721836e-06
+UM,5.811031769553665e-06
+HNMUCM,5.70150587009266e-06
+MPNST,5.589005922956858e-06
+PTAD,5.411789061326999e-06
+THHC,5.279471679386916e-06
+SBOV,5.134588718647137e-06
+EHAE,5.056334430264542e-06
+UCP,4.79583195556188e-06
+PANET,4.769396582560148e-06
+UEC,4.691957201430341e-06
+MFH,4.5436950131261256e-06
+DES,4.397928478283575e-06
+MACR,4.386647560750134e-06
+DSRCT,4.259570687281666e-06
+BCC,4.2207666410831735e-06
+THYM,4.200107468932401e-06
+SKCM,4.163767698628362e-06
+SCLC,4.137152245675679e-06
+SBWDNET,4.107126642338699e-06
+BLCA,4.042853561259108e-06
+THPA,3.967577868024819e-06
+CHDM,3.964094503317028e-06
+EPIS,3.7835466173419263e-06
+UTUC,3.6972760426579043e-06
+PRCC,3.5816708532365737e-06
+RBL,3.18466845783405e-06
+PHC,3.1321446840593126e-06
+ARMS,3.073702600886463e-06
+WDLS,2.967372893181164e-06
+HGSOC,2.9246689337014686e-06
+EOV,2.842233243427472e-06
+PLMESO,2.8313088478171267e-06
+LUCA,2.7812454845843604e-06
+ERMS,2.6738305223261705e-06
+ARMM,2.653551291587064e-06
+ESCC,2.534464101699996e-06
+GIST,2.4675975964782992e-06
+OS,2.3962300019775284e-06
+SEM,2.267454874527175e-06
+UCCC,2.2549797904503066e-06
+GEJ,2.228342964372132e-06
+ALUCA,2.21250593313016e-06
+CCRCC,2.1547989490500186e-06
+GCCAP,2.145640792150516e-06
+CHS,2.1370740341808414e-06
+EPM,2.113296659445041e-06
+LUNE,1.8964744867844274e-06
+THAP,1.8861140915760188e-06
+MNG,1.883976551653177e-06
+SCBC,1.8645082491275389e-06
+GBM,1.8083156874126871e-06
+THME,1.7755396584107075e-06
+SYNS,1.749014700180851e-06
+VSC,1.7275091295232414e-06
+PRAD,1.7234281131095486e-06
+THPD,1.5961649069140549e-06
+OCS,1.574517796143482e-06
+ULMS,1.5738452248115209e-06
+ESCA,1.5239124877552968e-06
+LUAD,1.5225077731884085e-06
+ATM,1.4334439129015664e-06
+DASTR,1.3374641412156052e-06
+BLAD,1.3204950164436013e-06
+MRLS,1.2621910627785837e-06
+PAST,1.214046847053396e-06
+DDLS,1.207565446748049e-06
+BA,1.193913703900762e-06
+USC,1.1651327440631576e-06
+UCS,1.127504333453544e-06
+IDC,1.1131320434287773e-06
+MBC,1.073736029866268e-06
+CESC,1.0562247325651697e-06
+ECAD,1.0221086768069654e-06
+MOV,1.0173221198783722e-06
+MFS,8.068860779530951e-07
+ILC,7.115056064321834e-07
+NBL,7.080777209012012e-07
+MAAP,7.038765375000366e-07
+SCHW,6.599229323001055e-07
+VMM,6.425576088986418e-07
+ACRM,5.137104039931728e-07
+SCCNOS,2.88556179839361e-07
+NECNOS,2.860723498088191e-07
+MDLC,1.5423735533204308e-07
+SBC,1.473414386055083e-07
+LUAS,1.313886741627357e-07
+AMPCA,1.1514853781591228e-07
+BRCANOS,1.100927349284575e-07
+COADREAD,1.0235498848487623e-07
+BRCNOS,1.0230521496623624e-07
+NSCLC,9.84467831699476e-08
+URCC,9.794707978016959e-08
+SARCNOS,9.708332271429754e-08
+MEL,9.660030997338254e-08
+MXOV,9.167136028054301e-08
+BRCA,9.164460124111429e-08
+GINET,9.150922863909727e-08
+MUP,8.768505921352698e-08
+GNOS,8.663077721848822e-08
+CSCLC,8.40352782915943e-08
+PAASC,8.390650663159249e-08
+CHOL,8.387851124780354e-08
+NVRINT,7.996587214620376e-08
+PDC,7.627340892213397e-08
+APAD,7.168080173869384e-08
+NETNOS,5.965442539945798e-08
+RCC,5.8925582635538376e-08
+DIFG,5.8173132089223145e-08
+UMEC,4.262162889290266e-08
+GBC,3.4216750322002554e-08
+NSCLCPD,2.902431717188847e-08
+UCEC,2.4343131954651653e-08
+ADNOS,0.0
+CUP,0.0
+CUPNOS,0.0
+NOT,0.0
+UDMN,0.0

test_slides/results/755246/755246_paladin_results.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Cancer Subtype,Biomarker,Score
2	+ HCC,Del_8p,0.5229867100715637

test_slides/results/881837/881837_aeon_results.csv ADDED Viewed

	@@ -0,0 +1,161 @@

+Cancer Subtype,Confidence
+BLCA,0.9819191694259644
+UTUC,0.008712546899914742
+ESCC,0.0018056846456602216
+HNSC,0.0012019714340567589
+SCBC,0.0011546163586899638
+CSCC,0.0009968428639695048
+BLAD,0.000923480314668268
+VSC,0.0003943268384318799
+LUSC,0.0002285304362885654
+MFH,0.0002151085063815117
+CESC,0.00016243607387878
+ANSC,0.00014434086915571243
+OPHSC,0.00010440379264764488
+ERMS,9.818207036005333e-05
+ESCA,8.05684321676381e-05
+THME,7.75650842115283e-05
+DDLS,7.418631867039949e-05
+EMPD,6.874153041280806e-05
+SKCM,6.219839997356758e-05
+STAD,5.7987392210634425e-05
+ARMM,5.323389996192418e-05
+VMM,4.82685245515313e-05
+ANGS,4.764321420225315e-05
+HNMUCM,4.300122964195907e-05
+MPNST,4.062237712787464e-05
+THAP,3.992616620962508e-05
+CCOV,3.978966560680419e-05
+EGC,3.804164953180589e-05
+PECOMA,3.58297438651789e-05
+GEJ,3.361854032846168e-05
+LMS,3.190735151292756e-05
+GIST,3.053894397453405e-05
+UCP,2.6960187824442983e-05
+BCC,2.6275198251823895e-05
+WDLS,2.504539406800177e-05
+COAD,2.4500428480678238e-05
+READ,2.3131282432586886e-05
+PLMESO,2.2688231183565222e-05
+PHC,2.2610343876294792e-05
+HGSOC,2.2057527530705556e-05
+EHCH,2.0888484868919477e-05
+SFT,2.061663690255955e-05
+LUPC,2.0348075850051828e-05
+ARMS,2.0138555555604398e-05
+NPC,2.0069532183697447e-05
+SDCA,1.9388126020203345e-05
+PRAD,1.885020174086094e-05
+PAMPCA,1.8754808479570784e-05
+HGNEC,1.862645149230957e-05
+DA,1.7162436051876284e-05
+EPM,1.6749159840401262e-05
+PTAD,1.6459982361993752e-05
+NSGCT,1.5192160390142817e-05
+UM,1.4609363461204339e-05
+SSRCC,1.4532034583680797e-05
+SEM,1.3767842574452516e-05
+GCCAP,1.3716285138798412e-05
+OS,1.3539702194975689e-05
+LUCA,1.3414683053269982e-05
+BA,1.340845392405754e-05
+ODG,1.3301939361554105e-05
+ULMS,1.3188847333367448e-05
+ILC,1.3158909496269189e-05
+GBAD,1.3059830052952748e-05
+HCC,1.2799720934708603e-05
+THYC,1.2515503840404563e-05
+ACC,1.2373102435958572e-05
+EHAE,1.2079371117579285e-05
+THPD,1.2004609743598849e-05
+LUAD,1.1745101801352575e-05
+IHCH,1.1457958862592932e-05
+BMGCT,1.14437843876658e-05
+DSRCT,1.1421912859077565e-05
+ASTR,1.1173871826031245e-05
+THPA,1.0899780136242043e-05
+DES,1.0811750144057442e-05
+MNG,1.0468449545442127e-05
+PEMESO,1.0408997695776634e-05
+ES,1.0323257811251096e-05
+SCHW,1.024130961013725e-05
+ACRM,9.928556210070383e-06
+RBL,9.5573650469305e-06
+UCS,9.554930329613853e-06
+CHDM,9.308922926720697e-06
+NBL,9.223444067174569e-06
+OCS,9.14371048565954e-06
+MBC,8.864662049745675e-06
+EPIS,8.839479050948285e-06
+LUNE,7.987197932379786e-06
+MFS,7.876495146774687e-06
+GRCT,7.63608932174975e-06
+UCCC,7.5786451816384215e-06
+SBOV,7.530676612077514e-06
+LGSOC,7.442437436111504e-06
+IDC,7.195951184257865e-06
+ATM,7.1556032708031125e-06
+CCRCC,6.893693353049457e-06
+PAAC,6.823243893450126e-06
+TAC,6.447641681006644e-06
+PRCC,6.3316215346276294e-06
+THHC,6.0560396377695724e-06
+ACYC,5.6987450989254285e-06
+SYNS,5.622236585622886e-06
+EOV,5.584717200690648e-06
+PAAD,5.3922740335110575e-06
+ALUCA,5.391060767578892e-06
+SBWDNET,5.067640813649632e-06
+CHRCC,5.057868747826433e-06
+MOV,5.055631390860071e-06
+MACR,4.9109949031844735e-06
+USC,4.86157659906894e-06
+THYM,4.622621872840682e-06
+MRLS,3.963573362852912e-06
+MCC,3.7632978546753293e-06
+GBM,3.364578788023209e-06
+DASTR,3.190721145074349e-06
+WT,3.0390583560802042e-06
+PAST,3.0256139780249214e-06
+ECAD,2.9308980629139114e-06
+UEC,2.8302536065893946e-06
+SCLC,2.7639291602099547e-06
+CHS,2.524923047531047e-06
+PANET,1.8923717561847297e-06
+MAAP,1.4423669654206606e-06
+COADREAD,4.742931878354284e-07
+NECNOS,3.882300347868295e-07
+SCCNOS,3.831314643321093e-07
+SARCNOS,3.6490285992840654e-07
+CSCLC,3.121313341125642e-07
+AMPCA,3.0839311193631147e-07
+BRCANOS,3.0359336733454256e-07
+MDLC,3.011147668985359e-07
+LUAS,2.772100344827777e-07
+MXOV,2.492066641934798e-07
+UCEC,2.3689378281233076e-07
+BRCA,2.2784350051097135e-07
+PDC,2.1551392137553194e-07
+PAASC,2.1430530239285872e-07
+NETNOS,2.1316037646101904e-07
+SBC,2.033053903005566e-07
+NVRINT,1.984174957669893e-07
+URCC,1.8473356533377228e-07
+BRCNOS,1.7265826102175197e-07
+NSCLC,1.700868494936003e-07
+GBC,1.6887882736682513e-07
+GINET,1.6104266364891373e-07
+APAD,1.5570284972454829e-07
+GNOS,1.547322483475e-07
+CHOL,1.4654114011136699e-07
+DIFG,1.4512880852635135e-07
+MUP,1.4281692983786343e-07
+MEL,1.391822195273562e-07
+NSCLCPD,1.0773995029467187e-07
+RCC,9.598431205404268e-08
+UMEC,6.501737459529977e-08
+ADNOS,0.0
+CUP,0.0
+CUPNOS,0.0
+NOT,0.0
+UDMN,0.0

test_slides/results/881837/881837_paladin_results.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+Cancer Subtype,Biomarker,Score
+BLCA,Del_6q,0.14185988903045654
+BLCA,FGFR3_ONCOGENIC,0.08551423251628876
+BLCA,RB1_ONCOGENIC,0.10338973999023438
+BLCA,RB1_TRUNC,0.08562182635068893
+BLCA,TP53_PATHWAY,0.8390844464302063

test_slides/test_samples.json ADDED Viewed

	@@ -0,0 +1,29 @@

+[
+  {
+    "sample_id": "P-0000034-T01-IM3",
+    "image_id": "881837",
+    "sex": "MALE",
+    "tissue_site": "Bladder",
+    "site_type": "Primary",
+    "cancer_type": "BLCA",
+    "confidence": 0.9412469863891602
+  },
+  {
+    "sample_id": "P-0000037-T01-IM3",
+    "image_id": "744547",
+    "sex": "Male",
+    "tissue_site": "Liver",
+    "site_type": "Metastasis",
+    "cancer_type": "HCC",
+    "confidence": 0.9471913576126099
+  },
+  {
+    "sample_id": "P-0000037-T02-IM3",
+    "image_id": "755246",
+    "sex": "Male",
+    "tissue_site": "Liver",
+    "site_type": "Primary",
+    "cancer_type": "HCC",
+    "confidence": 0.9306515455245972
+  }
+]

test_slides/verification_report.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "total": 3,
+  "passed": 3,
+  "failed": 0,
+  "accuracy": 1.0,
+  "results": [
+    {
+      "slide_id": "881837",
+      "ground_truth": "BLCA",
+      "predicted": "BLCA",
+      "confidence": 0.9819191694259644,
+      "site_type": "Primary",
+      "sex": "MALE",
+      "tissue_site": "Bladder",
+      "status": "PASS"
+    },
+    {
+      "slide_id": "744547",
+      "ground_truth": "HCC",
+      "predicted": "HCC",
+      "confidence": 0.9949353337287904,
+      "site_type": "Metastasis",
+      "sex": "Male",
+      "tissue_site": "Liver",
+      "status": "PASS"
+    },
+    {
+      "slide_id": "755246",
+      "ground_truth": "HCC",
+      "predicted": "HCC",
+      "confidence": 0.9960950016975404,
+      "site_type": "Primary",
+      "sex": "Male",
+      "tissue_site": "Liver",
+      "status": "PASS"
+    }
+  ]
+}

tests/inference/test_aeon.py CHANGED Viewed

@@ -4,43 +4,43 @@ import numpy as np
 import pytest
 import torch
-from mosaic.inference.aeon import (
     CANCER_TYPE_TO_INT_MAP,
     INT_TO_CANCER_TYPE_MAP,
-    col_indices_to_drop,
 )
 class TestAeonConstants:
     """Test constants defined in aeon module."""
-    def test_col_indices_to_drop_is_list(self):
-        """Test that col_indices_to_drop is a list."""
-        assert isinstance(col_indices_to_drop, list)
-    def test_col_indices_to_drop_has_entries(self):
-        """Test that col_indices_to_drop has entries."""
-        assert len(col_indices_to_drop) > 0
-    def test_col_indices_to_drop_are_integers(self):
-        """Test that all indices are integers."""
-        for idx in col_indices_to_drop:
-            assert isinstance(idx, int)
-    def test_col_indices_to_drop_are_valid(self):
-        """Test that all indices are valid cancer type indices."""
-        max_idx = max(CANCER_TYPE_TO_INT_MAP.values())
-        for idx in col_indices_to_drop:
-            assert 0 <= idx <= max_idx
-    def test_col_indices_to_drop_contains_expected_types(self):
         """Test that specific cancer types are in the drop list."""
         # Check that some known cancer types to drop are in the list
-        drop_types = ["UDMN", "CUP", "BRCA", "MEL"]
-        for cancer_type in drop_types:
-            if cancer_type in CANCER_TYPE_TO_INT_MAP:
-                idx = CANCER_TYPE_TO_INT_MAP[cancer_type]
-                assert idx in col_indices_to_drop
     def test_cancer_type_maps_available(self):
         """Test that cancer type maps are available."""

 import pytest
 import torch
+from mosaic.inference.aeon import CANCER_TYPES_TO_DROP
+from mosaic.inference.data import (
     CANCER_TYPE_TO_INT_MAP,
     INT_TO_CANCER_TYPE_MAP,
 )
 class TestAeonConstants:
     """Test constants defined in aeon module."""
+    def test_cancer_types_to_drop_is_list(self):
+        """Test that CANCER_TYPES_TO_DROP is a list."""
+        assert isinstance(CANCER_TYPES_TO_DROP, list)
+    def test_cancer_types_to_drop_has_entries(self):
+        """Test that CANCER_TYPES_TO_DROP has entries."""
+        assert len(CANCER_TYPES_TO_DROP) > 0
+    def test_cancer_types_to_drop_are_strings(self):
+        """Test that all cancer types are strings."""
+        for cancer_type in CANCER_TYPES_TO_DROP:
+            assert isinstance(cancer_type, str)
+    def test_cancer_types_to_drop_are_valid(self):
+        """Test that all cancer types to drop are valid cancer type codes."""
+        # They should all be uppercase alphanumeric codes
+        for cancer_type in CANCER_TYPES_TO_DROP:
+            assert cancer_type.isupper()
+            assert len(cancer_type) >= 2
+            assert len(cancer_type) <= 10
+    def test_cancer_types_to_drop_contains_expected_types(self):
         """Test that specific cancer types are in the drop list."""
         # Check that some known cancer types to drop are in the list
+        expected_types = ["UDMN", "CUP", "NOT"]
+        for cancer_type in expected_types:
+            assert cancer_type in CANCER_TYPES_TO_DROP
     def test_cancer_type_maps_available(self):
         """Test that cancer type maps are available."""

uv.lock CHANGED Viewed

@@ -2336,6 +2336,40 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/70/a07dcf4f62598c8ad579df241af55ced65bed76e42e45d3c368a6d82dbc1/kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8", size = 210034, upload-time = "2025-06-01T10:19:20.436Z" },
 ]
 [[package]]
 name = "loguru"
 version = "0.7.3"
@@ -2565,11 +2599,14 @@ version = "0.1.0"
 source = { editable = "." }
 dependencies = [
     { name = "gradio" },
     { name = "loguru" },
     { name = "memory-profiler" },
     { name = "mussel", extra = ["torch-gpu"] },
     { name = "paladin" },
     { name = "spaces" },
 ]
 [package.dev-dependencies]
@@ -2584,11 +2621,14 @@ dev = [
 [package.metadata]
 requires-dist = [
     { name = "gradio", specifier = ">=5.49.0" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "memory-profiler", specifier = ">=0.61.0" },
     { name = "mussel", extras = ["torch-gpu"], git = "https://github.com/pathology-data-mining/Mussel.git?rev=mosaic-dev" },
     { name = "paladin", git = "ssh://git@github.com/pathology-data-mining/paladin.git?rev=dev" },
     { name = "spaces", specifier = ">=0.30.0" },
 ]
 [package.metadata.requires-dev]
@@ -3199,8 +3239,8 @@ wheels = [
 [[package]]
 name = "paladin"
-version = "0.1.dev164+g0aef7cad1"
-source = { git = "ssh://git@github.com/pathology-data-mining/paladin.git?rev=dev#0aef7cad1c2c4b54ea75406e3ed4e61c83591a71" }
 dependencies = [
     { name = "dvc" },
     { name = "nn-template-core" },
@@ -3280,6 +3320,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
 ]
 [[package]]
 name = "pillow"
 version = "11.3.0"
@@ -4846,6 +4898,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/5c/2e/10b7fe92ddc69e5aae177775a3c8ed890bdd6cb40c2aa04e0a982937edd1/scmrepo-3.5.2-py3-none-any.whl", hash = "sha256:6e4660572b76512d0e013ca9806692188c736e8c9c76f833e3674fc21a558788", size = 73868, upload-time = "2025-08-06T14:46:31.635Z" },
 ]
 [[package]]
 name = "semantic-version"
 version = "2.10.0"
@@ -5052,6 +5118,52 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" },
 ]
 [[package]]
 name = "stqdm"
 version = "0.0.5"

     { url = "https://files.pythonhosted.org/packages/ef/70/a07dcf4f62598c8ad579df241af55ced65bed76e42e45d3c368a6d82dbc1/kombu-5.5.4-py3-none-any.whl", hash = "sha256:a12ed0557c238897d8e518f1d1fdf84bd1516c5e305af2dacd85c2015115feb8", size = 210034, upload-time = "2025-06-01T10:19:20.436Z" },
 ]
+[[package]]
+name = "lightning"
+version = "2.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec", extra = ["http"] },
+    { name = "lightning-utilities" },
+    { name = "packaging" },
+    { name = "pytorch-lightning" },
+    { name = "pyyaml" },
+    { name = "torch" },
+    { name = "torchmetrics" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/1a/d5/892ea38816925b3511493e87b0b32494122bf8a20e66f4f2cd2667f95625/lightning-2.6.0.tar.gz", hash = "sha256:881841716b59c1837ae0c562c2e64fea9bcf49ef9de3867bd1f868557ec23d04", size = 656539, upload-time = "2025-11-28T09:34:25.069Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d6/e9/36b340c7ec01dad6f034481e98fc9fc0133307beb05c714c0542af98bbde/lightning-2.6.0-py3-none-any.whl", hash = "sha256:f1a13a48909960a3454518486f113fae4fadb2db0e28e9c50d8d38d46c9dc3d6", size = 845956, upload-time = "2025-11-28T09:34:23.273Z" },
+]
+[[package]]
+name = "lightning-utilities"
+version = "0.15.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "setuptools" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b8/39/6fc58ca81492db047149b4b8fd385aa1bfb8c28cd7cacb0c7eb0c44d842f/lightning_utilities-0.15.2.tar.gz", hash = "sha256:cdf12f530214a63dacefd713f180d1ecf5d165338101617b4742e8f22c032e24", size = 31090, upload-time = "2025-08-06T13:57:39.242Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/73/3d757cb3fc16f0f9794dd289bcd0c4a031d9cf54d8137d6b984b2d02edf3/lightning_utilities-0.15.2-py3-none-any.whl", hash = "sha256:ad3ab1703775044bbf880dbf7ddaaac899396c96315f3aa1779cec9d618a9841", size = 29431, upload-time = "2025-08-06T13:57:38.046Z" },
+]
 [[package]]
 name = "loguru"
 version = "0.7.3"
 source = { editable = "." }
 dependencies = [
     { name = "gradio" },
+    { name = "lightning" },
     { name = "loguru" },
     { name = "memory-profiler" },
     { name = "mussel", extra = ["torch-gpu"] },
     { name = "paladin" },
+    { name = "seaborn" },
     { name = "spaces" },
+    { name = "statsmodels" },
 ]
 [package.dev-dependencies]
 [package.metadata]
 requires-dist = [
     { name = "gradio", specifier = ">=5.49.0" },
+    { name = "lightning", specifier = ">=2.6.0" },
     { name = "loguru", specifier = ">=0.7.3" },
     { name = "memory-profiler", specifier = ">=0.61.0" },
     { name = "mussel", extras = ["torch-gpu"], git = "https://github.com/pathology-data-mining/Mussel.git?rev=mosaic-dev" },
     { name = "paladin", git = "ssh://git@github.com/pathology-data-mining/paladin.git?rev=dev" },
+    { name = "seaborn", specifier = ">=0.13.2" },
     { name = "spaces", specifier = ">=0.30.0" },
+    { name = "statsmodels", specifier = ">=0.14.6" },
 ]
 [package.metadata.requires-dev]
 [[package]]
 name = "paladin"
+version = "0.0.0"
+source = { git = "ssh://git@github.com/pathology-data-mining/paladin.git?rev=dev#de6dab1a40948285d2e8aad322b9aca91ae669e6" }
 dependencies = [
     { name = "dvc" },
     { name = "nn-template-core" },
     { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
 ]
+[[package]]
+name = "patsy"
+version = "1.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/be/44/ed13eccdd0519eff265f44b670d46fbb0ec813e2274932dc1c0e48520f7d/patsy-1.0.2.tar.gz", hash = "sha256:cdc995455f6233e90e22de72c37fcadb344e7586fb83f06696f54d92f8ce74c0", size = 399942, upload-time = "2025-10-20T16:17:37.535Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f1/70/ba4b949bdc0490ab78d545459acd7702b211dfccf7eb89bbc1060f52818d/patsy-1.0.2-py2.py3-none-any.whl", hash = "sha256:37bfddbc58fcf0362febb5f54f10743f8b21dd2aa73dec7e7ef59d1b02ae668a", size = 233301, upload-time = "2025-10-20T16:17:36.563Z" },
+]
 [[package]]
 name = "pillow"
 version = "11.3.0"
     { url = "https://files.pythonhosted.org/packages/5c/2e/10b7fe92ddc69e5aae177775a3c8ed890bdd6cb40c2aa04e0a982937edd1/scmrepo-3.5.2-py3-none-any.whl", hash = "sha256:6e4660572b76512d0e013ca9806692188c736e8c9c76f833e3674fc21a558788", size = 73868, upload-time = "2025-08-06T14:46:31.635Z" },
 ]
+[[package]]
+name = "seaborn"
+version = "0.13.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "matplotlib" },
+    { name = "numpy" },
+    { name = "pandas" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/86/59/a451d7420a77ab0b98f7affa3a1d78a313d2f7281a57afb1a34bae8ab412/seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7", size = 1457696, upload-time = "2024-01-25T13:21:52.551Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914, upload-time = "2024-01-25T13:21:49.598Z" },
+]
 [[package]]
 name = "semantic-version"
 version = "2.10.0"
     { url = "https://files.pythonhosted.org/packages/be/72/2db2f49247d0a18b4f1bb9a5a39a0162869acf235f3a96418363947b3d46/starlette-0.48.0-py3-none-any.whl", hash = "sha256:0764ca97b097582558ecb498132ed0c7d942f233f365b86ba37770e026510659", size = 73736, upload-time = "2025-09-13T08:41:03.869Z" },
 ]
+[[package]]
+name = "statsmodels"
+version = "0.14.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "patsy" },
+    { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+    { name = "scipy", version = "1.16.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/0d/81/e8d74b34f85285f7335d30c5e3c2d7c0346997af9f3debf9a0a9a63de184/statsmodels-0.14.6.tar.gz", hash = "sha256:4d17873d3e607d398b85126cd4ed7aad89e4e9d89fc744cdab1af3189a996c2a", size = 20689085, upload-time = "2025-12-05T23:08:39.522Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/6d/9ec309a175956f88eb8420ac564297f37cf9b1f73f89db74da861052dc29/statsmodels-0.14.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f4ff0649a2df674c7ffb6fa1a06bffdb82a6adf09a48e90e000a15a6aaa734b0", size = 10142419, upload-time = "2025-12-05T19:27:35.625Z" },
+    { url = "https://files.pythonhosted.org/packages/86/8f/338c5568315ec5bf3ac7cd4b71e34b98cb3b0f834919c0c04a0762f878a1/statsmodels-0.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:109012088b3e370080846ab053c76d125268631410142daad2f8c10770e8e8d9", size = 10022819, upload-time = "2025-12-05T19:27:49.385Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/77/5fc4cbc2d608f9b483b0675f82704a8bcd672962c379fe4d82100d388dbf/statsmodels-0.14.6-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e93bd5d220f3cb6fc5fc1bffd5b094966cab8ee99f6c57c02e95710513d6ac3f", size = 10118927, upload-time = "2025-12-05T23:07:51.256Z" },
+    { url = "https://files.pythonhosted.org/packages/94/55/b86c861c32186403fe121d9ab27bc16d05839b170d92a978beb33abb995e/statsmodels-0.14.6-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:06eec42d682fdb09fe5d70a05930857efb141754ec5a5056a03304c1b5e32fd9", size = 10413015, upload-time = "2025-12-05T23:08:53.95Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/be/daf0dba729ccdc4176605f4a0fd5cfe71cdda671749dca10e74a732b8b1c/statsmodels-0.14.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0444e88557df735eda7db330806fe09d51c9f888bb1f5906cb3a61fb1a3ed4a8", size = 10441248, upload-time = "2025-12-05T23:09:09.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/1c/2e10b7c7cc44fa418272996bf0427b8016718fd62f995d9c1f7ab37adf35/statsmodels-0.14.6-cp310-cp310-win_amd64.whl", hash = "sha256:e83a9abe653835da3b37fb6ae04b45480c1de11b3134bd40b09717192a1456ea", size = 9583410, upload-time = "2025-12-05T19:28:02.086Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/4d/df4dd089b406accfc3bb5ee53ba29bb3bdf5ae61643f86f8f604baa57656/statsmodels-0.14.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6ad5c2810fc6c684254a7792bf1cbaf1606cdee2a253f8bd259c43135d87cfb4", size = 10121514, upload-time = "2025-12-05T19:28:16.521Z" },
+    { url = "https://files.pythonhosted.org/packages/82/af/ec48daa7f861f993b91a0dcc791d66e1cf56510a235c5cbd2ab991a31d5c/statsmodels-0.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:341fa68a7403e10a95c7b6e41134b0da3a7b835ecff1eb266294408535a06eb6", size = 10003346, upload-time = "2025-12-05T19:28:29.568Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/2c/c8f7aa24cd729970728f3f98822fb45149adc216f445a9301e441f7ac760/statsmodels-0.14.6-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bdf1dfe2a3ca56f5529118baf33a13efed2783c528f4a36409b46bbd2d9d48eb", size = 10129872, upload-time = "2025-12-05T23:09:25.724Z" },
+    { url = "https://files.pythonhosted.org/packages/40/c6/9ae8e9b0721e9b6eb5f340c3a0ce8cd7cce4f66e03dd81f80d60f111987f/statsmodels-0.14.6-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a3764ba8195c9baf0925a96da0743ff218067a269f01d155ca3558deed2658ca", size = 10381964, upload-time = "2025-12-05T23:09:41.326Z" },
+    { url = "https://files.pythonhosted.org/packages/28/8c/cf3d30c8c2da78e2ad1f50ade8b7fabec3ff4cdfc56fbc02e097c4577f90/statsmodels-0.14.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e8d2e519852adb1b420e018f5ac6e6684b2b877478adf7fda2cfdb58f5acb5d", size = 10409611, upload-time = "2025-12-05T23:09:57.131Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/cc/018f14ecb58c6cb89de9d52695740b7d1f5a982aa9ea312483ea3c3d5f77/statsmodels-0.14.6-cp311-cp311-win_amd64.whl", hash = "sha256:2738a00fca51196f5a7d44b06970ace6b8b30289839e4808d656f8a98e35faa7", size = 9580385, upload-time = "2025-12-05T19:28:42.778Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ce/308e5e5da57515dd7cab3ec37ea2d5b8ff50bef1fcc8e6d31456f9fae08e/statsmodels-0.14.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe76140ae7adc5ff0e60a3f0d56f4fffef484efa803c3efebf2fcd734d72ecb5", size = 10091932, upload-time = "2025-12-05T19:28:55.446Z" },
+    { url = "https://files.pythonhosted.org/packages/05/30/affbabf3c27fb501ec7b5808230c619d4d1a4525c07301074eb4bda92fa9/statsmodels-0.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26d4f0ed3b31f3c86f83a92f5c1f5cbe63fc992cd8915daf28ca49be14463a1c", size = 9997345, upload-time = "2025-12-05T19:29:10.278Z" },
+    { url = "https://files.pythonhosted.org/packages/48/f5/3a73b51e6450c31652c53a8e12e24eac64e3824be816c0c2316e7dbdcb7d/statsmodels-0.14.6-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8c00a42863e4f4733ac9d078bbfad816249c01451740e6f5053ecc7db6d6368", size = 10058649, upload-time = "2025-12-05T23:10:12.775Z" },
+    { url = "https://files.pythonhosted.org/packages/81/68/dddd76117df2ef14c943c6bbb6618be5c9401280046f4ddfc9fb4596a1b8/statsmodels-0.14.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:19b58cf7474aa9e7e3b0771a66537148b2df9b5884fbf156096c0e6c1ff0469d", size = 10339446, upload-time = "2025-12-05T23:10:28.503Z" },
+    { url = "https://files.pythonhosted.org/packages/56/4a/dce451c74c4050535fac1ec0c14b80706d8fc134c9da22db3c8a0ec62c33/statsmodels-0.14.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:81e7dcc5e9587f2567e52deaff5220b175bf2f648951549eae5fc9383b62bc37", size = 10368705, upload-time = "2025-12-05T23:10:44.339Z" },
+    { url = "https://files.pythonhosted.org/packages/60/15/3daba2df40be8b8a9a027d7f54c8dedf24f0d81b96e54b52293f5f7e3418/statsmodels-0.14.6-cp312-cp312-win_amd64.whl", hash = "sha256:b5eb07acd115aa6208b4058211138393a7e6c2cf12b6f213ede10f658f6a714f", size = 9543991, upload-time = "2025-12-05T23:10:58.536Z" },
+    { url = "https://files.pythonhosted.org/packages/81/59/a5aad5b0cc266f5be013db8cde563ac5d2a025e7efc0c328d83b50c72992/statsmodels-0.14.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:47ee7af083623d2091954fa71c7549b8443168f41b7c5dce66510274c50fd73e", size = 10072009, upload-time = "2025-12-05T23:11:14.021Z" },
+    { url = "https://files.pythonhosted.org/packages/53/dd/d8cfa7922fc6dc3c56fa6c59b348ea7de829a94cd73208c6f8202dd33f17/statsmodels-0.14.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa60d82e29fcd0a736e86feb63a11d2380322d77a9369a54be8b0965a3985f71", size = 9980018, upload-time = "2025-12-05T23:11:30.907Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/77/0ec96803eba444efd75dba32f2ef88765ae3e8f567d276805391ec2c98c6/statsmodels-0.14.6-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89ee7d595f5939cc20bf946faedcb5137d975f03ae080f300ebb4398f16a5bd4", size = 10060269, upload-time = "2025-12-05T23:11:46.338Z" },
+    { url = "https://files.pythonhosted.org/packages/10/b9/fd41f1f6af13a1a1212a06bb377b17762feaa6d656947bf666f76300fc05/statsmodels-0.14.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:730f3297b26749b216a06e4327fe0be59b8d05f7d594fb6caff4287b69654589", size = 10324155, upload-time = "2025-12-05T23:12:01.805Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/0f/a6900e220abd2c69cd0a07e3ad26c71984be6061415a60e0f17b152ecf08/statsmodels-0.14.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f1c08befa85e93acc992b72a390ddb7bd876190f1360e61d10cf43833463bc9c", size = 10349765, upload-time = "2025-12-05T23:12:18.018Z" },
+    { url = "https://files.pythonhosted.org/packages/98/08/b79f0c614f38e566eebbdcff90c0bcacf3c6ba7a5bbb12183c09c29ca400/statsmodels-0.14.6-cp313-cp313-win_amd64.whl", hash = "sha256:8021271a79f35b842c02a1794465a651a9d06ec2080f76ebc3b7adce77d08233", size = 9540043, upload-time = "2025-12-05T23:12:33.887Z" },
+    { url = "https://files.pythonhosted.org/packages/71/de/09540e870318e0c7b58316561d417be45eff731263b4234fdd2eee3511a8/statsmodels-0.14.6-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:00781869991f8f02ad3610da6627fd26ebe262210287beb59761982a8fa88cae", size = 10069403, upload-time = "2025-12-05T23:12:48.424Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/f0/63c1bfda75dc53cee858006e1f46bd6d6f883853bea1b97949d0087766ca/statsmodels-0.14.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:73f305fbf31607b35ce919fae636ab8b80d175328ed38fdc6f354e813b86ee37", size = 9989253, upload-time = "2025-12-05T23:13:05.274Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/98/b0dfb4f542b2033a3341aa5f1bdd97024230a4ad3670c5b0839d54e3dcab/statsmodels-0.14.6-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e443e7077a6e2d3faeea72f5a92c9f12c63722686eb80bb40a0f04e4a7e267ad", size = 10090802, upload-time = "2025-12-05T23:13:20.653Z" },
+    { url = "https://files.pythonhosted.org/packages/34/0e/2408735aca9e764643196212f9069912100151414dd617d39ffc72d77eee/statsmodels-0.14.6-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3414e40c073d725007a6603a18247ab7af3467e1af4a5e5a24e4c27bc26673b4", size = 10337587, upload-time = "2025-12-05T23:13:37.597Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/36/4d44f7035ab3c0b2b6a4c4ebb98dedf36246ccbc1b3e2f51ebcd7ac83abb/statsmodels-0.14.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a518d3f9889ef920116f9fa56d0338069e110f823926356946dae83bc9e33e19", size = 10363350, upload-time = "2025-12-05T23:13:53.08Z" },
+    { url = "https://files.pythonhosted.org/packages/26/33/f1652d0c59fa51de18492ee2345b65372550501ad061daa38f950be390b6/statsmodels-0.14.6-cp314-cp314-win_amd64.whl", hash = "sha256:151b73e29f01fe619dbce7f66d61a356e9d1fe5e906529b78807df9189c37721", size = 9588010, upload-time = "2025-12-05T23:14:07.28Z" },
+]
 [[package]]
 name = "stqdm"
 version = "0.0.5"