File size: 5,135 Bytes
0506a57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/bin/bash
# Aeon Model Test Script
# This script runs the Aeon cancer subtype prediction model on test slides
# for reproducibility and validation.

set -e  # Exit on error

# Configuration
TEST_SAMPLES_FILE="test_slides/test_samples.json"
RESULTS_DIR="test_slides/results"
LOG_DIR="test_slides/logs"
SEGMENTATION_CONFIG="Biopsy"
NUM_WORKERS=4

# Colors for output
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color

echo "========================================="
echo "Aeon Model Test Suite"
echo "========================================="
echo ""

# Create directories
mkdir -p "${RESULTS_DIR}"
mkdir -p "${LOG_DIR}"

# Check if test samples file exists
if [ ! -f "${TEST_SAMPLES_FILE}" ]; then
    echo -e "${RED}Error: Test samples file not found: ${TEST_SAMPLES_FILE}${NC}"
    exit 1
fi

# Read test samples
echo "Reading test samples from ${TEST_SAMPLES_FILE}..."
SLIDE_IDS=$(python3 -c "
import json
with open('${TEST_SAMPLES_FILE}') as f:
    samples = json.load(f)
    for sample in samples:
        slide_id = sample.get('slide_id') or sample.get('image_id')
        print(slide_id)
")

# Count slides
NUM_SLIDES=$(echo "${SLIDE_IDS}" | wc -l)
echo -e "${GREEN}Found ${NUM_SLIDES} test slides${NC}"
echo ""

# Process each slide
CURRENT=0
PASSED=0
FAILED=0

for SLIDE_ID in ${SLIDE_IDS}; do
    CURRENT=$((CURRENT + 1))

    echo "========================================="
    echo -e "${YELLOW}Processing slide ${CURRENT}/${NUM_SLIDES}: ${SLIDE_ID}${NC}"
    echo "========================================="

    # Get slide metadata
    METADATA=$(python3 -c "
import json
with open('${TEST_SAMPLES_FILE}') as f:
    samples = json.load(f)
    for sample in samples:
        slide_id = sample.get('slide_id') or sample.get('image_id')
        if slide_id == '${SLIDE_ID}':
            cancer_subtype = sample.get('cancer_subtype') or sample.get('cancer_type')
            print(f\"{cancer_subtype}|{sample['site_type']}|{sample['sex']}|{sample['tissue_site']}\")
            break
")

    IFS='|' read -r CANCER_SUBTYPE SITE_TYPE SEX TISSUE_SITE <<< "${METADATA}"

    echo "Ground Truth:"
    echo "  Cancer Subtype: ${CANCER_SUBTYPE}"
    echo "  Site Type: ${SITE_TYPE}"
    echo "  Sex: ${SEX}"
    echo "  Tissue Site: ${TISSUE_SITE}"
    echo ""

    # Find slide file
    SLIDE_FILE=$(find test_slides -name "${SLIDE_ID}.svs" -o -name "${SLIDE_ID}.tiff" -o -name "${SLIDE_ID}.ndpi" 2>/dev/null | head -1)

    if [ -z "${SLIDE_FILE}" ]; then
        echo -e "${RED}Error: Slide file not found for ${SLIDE_ID}${NC}"
        FAILED=$((FAILED + 1))
        continue
    fi

    echo "Slide file: ${SLIDE_FILE}"
    echo ""

    # Run Mosaic pipeline with Aeon inference
    LOG_FILE="${LOG_DIR}/${SLIDE_ID}_aeon_test.log"

    echo "Running Mosaic pipeline..."
    if uv run python -m mosaic.cli \
        --input-slide "${SLIDE_FILE}" \
        --output-dir "${RESULTS_DIR}/${SLIDE_ID}" \
        --cancer-subtype "Unknown" \
        --site-type "${SITE_TYPE}" \
        --sex "${SEX}" \
        --tissue-site "${TISSUE_SITE}" \
        --segmentation-config "${SEGMENTATION_CONFIG}" \
        --num-workers "${NUM_WORKERS}" \
        > "${LOG_FILE}" 2>&1; then

        # Check if results exist
        AEON_RESULTS="${RESULTS_DIR}/${SLIDE_ID}/${SLIDE_ID}_aeon_results.csv"

        if [ -f "${AEON_RESULTS}" ]; then
            # Extract prediction
            PREDICTION=$(python3 -c "
import pandas as pd
df = pd.read_csv('${AEON_RESULTS}')
if not df.empty:
    print(f\"{df.iloc[0]['Cancer Subtype']}|{df.iloc[0]['Confidence']:.4f}\")
")

            IFS='|' read -r PRED_SUBTYPE CONFIDENCE <<< "${PREDICTION}"

            echo ""
            echo "Aeon Prediction:"
            echo "  Predicted: ${PRED_SUBTYPE}"
            echo "  Confidence: ${CONFIDENCE}"
            echo ""

            # Check if prediction matches ground truth
            if [ "${PRED_SUBTYPE}" == "${CANCER_SUBTYPE}" ]; then
                echo -e "${GREEN}✓ PASS: Prediction matches ground truth${NC}"
                PASSED=$((PASSED + 1))
            else
                echo -e "${RED}✗ FAIL: Prediction does not match ground truth${NC}"
                echo "  Expected: ${CANCER_SUBTYPE}"
                echo "  Got: ${PRED_SUBTYPE}"
                FAILED=$((FAILED + 1))
            fi
        else
            echo -e "${RED}✗ FAIL: Aeon results file not found${NC}"
            FAILED=$((FAILED + 1))
        fi
    else
        echo -e "${RED}✗ FAIL: Mosaic pipeline failed${NC}"
        echo "Check log file: ${LOG_FILE}"
        FAILED=$((FAILED + 1))
    fi

    echo ""
done

# Summary
echo "========================================="
echo "Test Summary"
echo "========================================="
echo "Total slides: ${NUM_SLIDES}"
echo -e "${GREEN}Passed: ${PASSED}${NC}"
if [ ${FAILED} -gt 0 ]; then
    echo -e "${RED}Failed: ${FAILED}${NC}"
else
    echo "Failed: ${FAILED}"
fi
echo ""

if [ ${FAILED} -eq 0 ]; then
    echo -e "${GREEN}All tests passed!${NC}"
    exit 0
else
    echo -e "${RED}Some tests failed. Check logs in ${LOG_DIR}${NC}"
    exit 1
fi