import subprocess import os import gradio as gr import numpy as np import pandas as pd from PIL import Image import torch import spaces from huggingface_hub import hf_hub_download, snapshot_download from loguru import logger import time from mussel.utils import get_features, segment_tissue from mussel.models import ModelType from aeon_inference import run_aeon from paladin_inference import run_paladin NUM_WORKERS = 0 # Can't have multiple workers in HF ZeroGPU Gradio app # Install Paladin from GitHub GIT_TOKEN = os.environ.get("GH_TOKEN") subprocess.run( f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev", shell=True, ) # Download pre-trained models if not present local_repo_path = snapshot_download( repo_id="PDM-Group/paladin-aeon-models", local_dir="data" ) @spaces.GPU(duration=300) def get_features_and_infer(coords, slide_path, attrs, site_type): use_gpu = torch.cuda.is_available() optimus_model_path = hf_hub_download( repo_id="PDM-Group/paladin-aeon-models", filename="optimus.pkl" ) start_time = time.time() features, _ = get_features( coords, slide_path, attrs, model_type=ModelType.OPTIMUS, model_path=optimus_model_path, use_gpu=use_gpu, batch_size=64, num_workers=NUM_WORKERS, ) end_time = time.time() logger.info(f"Feature extraction completed in {end_time - start_time:.2f} seconds.") # Step 3: Run Aeon to predict histology start_time = time.time() aeon_results, _ = run_aeon( features=features, model_path="data/aeon_model.pkl", metastatic=(site_type == "Metastatic"), batch_size=8, num_workers=NUM_WORKERS, use_cpu=not use_gpu, ) end_time = time.time() logger.info(f"Aeon inference completed in {end_time - start_time:.2f} seconds.") # Step 4: Run Paladin to predict biomarkers start_time = time.time() paladin_results = run_paladin( features=features, model_map_path="data/paladin_model_map.csv", aeon_results=aeon_results, metastatic=(site_type == "Metastatic"), batch_size=8, num_workers=NUM_WORKERS, use_cpu=not use_gpu, ) end_time = time.time() logger.info(f"Paladin inference completed in {end_time - start_time:.2f} seconds.") return aeon_results, paladin_results def analyze_slide(slide_path, site_type): start_time = time.time() _, _, coords, attrs = segment_tissue( slide_path=slide_path, patch_size=224, segment_threshold=15, seg_level=-1, median_blur_ksize=11, morphology_ex_kernel=2, tissue_area_threshold=2, hole_area_threshold=1, max_num_holes=2, ) end_time = time.time() logger.info( f"Tissue segmentation completed in {end_time - start_time:.2f} seconds." ) logger.info(f"Number of tissue patches: {len(coords)}") aeon_results, paladin_results = get_features_and_infer( coords, slide_path, attrs, site_type ) return aeon_results, paladin_results if __name__ == "__main__": input_slide = gr.File( label="Upload H&E Whole Slide Image", file_types=[".svs", ".tiff", ".tif"], ) input_dropdown = gr.Dropdown( choices=["Primary", "Metastatic"], label="Site Type", value="Primary", ) aeon_output_table = gr.Dataframe( headers=["Cancer Subtype", "Confidence"], label="Cancer Subtype Inference", datatype=["str", "number"], ) paladin_output_table = gr.Dataframe( headers=["Cancer Subtype", "Biomarker", "Score"], label="Biomarker Inference", datatype=["str", "str", "number"], ) iface = gr.Interface( fn=analyze_slide, inputs=[input_slide, input_dropdown], outputs=[aeon_output_table, paladin_output_table], title="Mosaic: H&E Whole Slide Image Cancer Subtype and Biomarker Inference", description="Upload an H&E whole slide image and select the site type to infer cancer subtype and biomarkers.", allow_flagging="never", ) iface.queue(max_size=10, default_concurrency_limit=1) iface.launch()