Spaces:

raylim
/

mosaic-test

Sleeping

File size: 4,265 Bytes

ba4c80f
b4fa8fa
745c9a6
 
f6b9dda
745c9a6
650a3bf
d113ee1
ca72d12
 
 
745c9a6
e8a801c
25f77c6
fe8b721
 
745c9a6
ca72d12
ba4c80f
c53a6f1
ba4c80f
ca72d12
 
 
 
ba4c80f
c53a6f1
ca72d12
 
 
 
b4fa8fa
f6f74ce
d113ee1
 
ca72d12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c5a7f83
 
ca72d12
c5a7f83
2123547
 
 
 
d113ee1
 
c5a7f83
ca72d12
 
c5a7f83
 
ca72d12
c5a7f83
2123547
 
 
 
 
 
d113ee1
c5a7f83
ca72d12
 
4f0b8d7
b4fa8fa
745c9a6
d113ee1
 
ca72d12
d113ee1
 
 
 
ca72d12
d113ee1
 
 
 
ca72d12
 
 
 
 
 
 
 
 
d113ee1
 
 
 
0d12724
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
745c9a6
0d12724
 
b4fa8fa
 
0d12724
 
 
745c9a6
 
c09c60f
1413a57

import subprocess
import os
import gradio as gr
import numpy as np
import pandas as pd
from PIL import Image
import torch
import spaces
from huggingface_hub import hf_hub_download, snapshot_download
from loguru import logger
import time

from mussel.utils import get_features, segment_tissue
from mussel.models import ModelType
from aeon_inference import run_aeon
from paladin_inference import run_paladin

NUM_WORKERS = 0  # Can't have multiple workers in HF ZeroGPU Gradio app

# Install Paladin from GitHub
GIT_TOKEN = os.environ.get("GH_TOKEN")
subprocess.run(
    f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev",
    shell=True,
)

# Download pre-trained models if not present
local_repo_path = snapshot_download(
    repo_id="PDM-Group/paladin-aeon-models", local_dir="data"
)


@spaces.GPU(duration=300)
def get_features_and_infer(coords, slide_path, attrs, site_type):
    use_gpu = torch.cuda.is_available()
    optimus_model_path = hf_hub_download(
        repo_id="PDM-Group/paladin-aeon-models", filename="optimus.pkl"
    )
    start_time = time.time()
    features, _ = get_features(
        coords,
        slide_path,
        attrs,
        model_type=ModelType.OPTIMUS,
        model_path=optimus_model_path,
        use_gpu=use_gpu,
        batch_size=64,
        num_workers=NUM_WORKERS,
    )
    end_time = time.time()
    logger.info(f"Feature extraction completed in {end_time - start_time:.2f} seconds.")

    # Step 3: Run Aeon to predict histology
    start_time = time.time()
    aeon_results, _ = run_aeon(
        features=features,
        model_path="data/aeon_model.pkl",
        metastatic=(site_type == "Metastatic"),
        batch_size=8,
        num_workers=NUM_WORKERS,
        use_cpu=not use_gpu,
    )
    end_time = time.time()
    logger.info(f"Aeon inference completed in {end_time - start_time:.2f} seconds.")

    # Step 4: Run Paladin to predict biomarkers
    start_time = time.time()
    paladin_results = run_paladin(
        features=features,
        model_map_path="data/paladin_model_map.csv",
        aeon_results=aeon_results,
        metastatic=(site_type == "Metastatic"),
        batch_size=8,
        num_workers=NUM_WORKERS,
        use_cpu=not use_gpu,
    )
    end_time = time.time()
    logger.info(f"Paladin inference completed in {end_time - start_time:.2f} seconds.")

    return aeon_results, paladin_results


def analyze_slide(slide_path, site_type):
    start_time = time.time()
    _, _, coords, attrs = segment_tissue(
        slide_path=slide_path,
        patch_size=224,
        segment_threshold=15,
        seg_level=-1,
        median_blur_ksize=11,
        morphology_ex_kernel=2,
        tissue_area_threshold=2,
        hole_area_threshold=1,
        max_num_holes=2,
    )
    end_time = time.time()
    logger.info(
        f"Tissue segmentation completed in {end_time - start_time:.2f} seconds."
    )
    logger.info(f"Number of tissue patches: {len(coords)}")
    aeon_results, paladin_results = get_features_and_infer(
        coords, slide_path, attrs, site_type
    )
    return aeon_results, paladin_results


if __name__ == "__main__":
    input_slide = gr.File(
        label="Upload H&E Whole Slide Image",
        file_types=[".svs", ".tiff", ".tif"],
    )
    input_dropdown = gr.Dropdown(
        choices=["Primary", "Metastatic"],
        label="Site Type",
        value="Primary",
    )
    aeon_output_table = gr.Dataframe(
        headers=["Cancer Subtype", "Confidence"],
        label="Cancer Subtype Inference",
        datatype=["str", "number"],
    )
    paladin_output_table = gr.Dataframe(
        headers=["Cancer Subtype", "Biomarker", "Score"],
        label="Biomarker Inference",
        datatype=["str", "str", "number"],
    )

    iface = gr.Interface(
        fn=analyze_slide,
        inputs=[input_slide, input_dropdown],
        outputs=[aeon_output_table, paladin_output_table],
        title="Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference",
        description="Upload an H&E whole slide image and select the site type to infer cancer subtype and biomarkers.",
        allow_flagging="never",
    )

    iface.queue(max_size=10, default_concurrency_limit=1)
    iface.launch()