Spaces:

raylim
/

mosaic-test

Sleeping

App Files Files Community

raylim commited on Oct 15, 2025

Commit

ca72d12

1 Parent(s): a9cd161

fix: tiling level

Browse files

Files changed (2) hide show

app.py +45 -7
paladin_inference.py +26 -26

app.py CHANGED Viewed

@@ -6,28 +6,52 @@ import pandas as pd
 from PIL import Image
 import torch
 import spaces
-from huggingface_hub import snapshot_download
 from mussel.utils import get_features, segment_tissue
 from mussel.models import ModelType
 from aeon_inference import run_aeon
 from paladin_inference import run_paladin
-NUM_WORKERS = 0
 # Install Paladin from GitHub
 GIT_TOKEN = os.environ.get("GH_TOKEN")
-subprocess.run(f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev", shell=True)
 # Download pre-trained models if not present
-local_repo_path = snapshot_download(repo_id="PDM-Group/paladin-aeon-models", local_dir="data")
 @spaces.GPU(duration=300)
 def get_features_and_infer(coords, slide_path, attrs, site_type):
     use_gpu = torch.cuda.is_available()
-    features, _ = get_features(coords, slide_path, attrs, model_type=ModelType.OPTIMUS, model_path="data/optimus.pkl", use_gpu=use_gpu, batch_size=64, num_workers=NUM_WORKERS)
     # Step 3: Run Aeon to predict histology
     aeon_results, _ = run_aeon(
         features=features,
         model_path="data/aeon_model.pkl",
@@ -36,8 +60,11 @@ def get_features_and_infer(coords, slide_path, attrs, site_type):
         num_workers=NUM_WORKERS,
         use_cpu=not use_gpu,
     )
     # Step 4: Run Paladin to predict biomarkers
     paladin_results = run_paladin(
         features=features,
         model_map_path="data/paladin_model_map.csv",
@@ -47,22 +74,33 @@ def get_features_and_infer(coords, slide_path, attrs, site_type):
         num_workers=NUM_WORKERS,
         use_cpu=not use_gpu,
     )
     return aeon_results, paladin_results
 def analyze_slide(slide_path, site_type):
     _, _, coords, attrs = segment_tissue(
         slide_path=slide_path,
         patch_size=224,
         segment_threshold=15,
         median_blur_ksize=11,
         morphology_ex_kernel=2,
         tissue_area_threshold=2,
         hole_area_threshold=1,
-        max_num_holes=2
     )
-    aeon_results, paladin_results = get_features_and_infer(coords, slide_path, attrs, site_type)
     return aeon_results, paladin_results

 from PIL import Image
 import torch
 import spaces
+from huggingface_hub import hf_hub_download, snapshot_download
+from loguru import logger
+import time
 from mussel.utils import get_features, segment_tissue
 from mussel.models import ModelType
 from aeon_inference import run_aeon
 from paladin_inference import run_paladin
+NUM_WORKERS = 0  # Can't have multiple workers in HF ZeroGPU Gradio app
 # Install Paladin from GitHub
 GIT_TOKEN = os.environ.get("GH_TOKEN")
+subprocess.run(
+    f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev",
+    shell=True,
+)
 # Download pre-trained models if not present
+local_repo_path = snapshot_download(
+    repo_id="PDM-Group/paladin-aeon-models", local_dir="data"
+)
 @spaces.GPU(duration=300)
 def get_features_and_infer(coords, slide_path, attrs, site_type):
     use_gpu = torch.cuda.is_available()
+    optimus_model_path = hf_hub_download(
+        repo_id="PDM-Group/paladin-aeon-models", filename="optimus.pkl"
+    )
+    start_time = time.time()
+    features, _ = get_features(
+        coords,
+        slide_path,
+        attrs,
+        model_type=ModelType.OPTIMUS,
+        model_path=optimus_model_path,
+        use_gpu=use_gpu,
+        batch_size=64,
+        num_workers=NUM_WORKERS,
+    )
+    end_time = time.time()
+    logger.info(f"Feature extraction completed in {end_time - start_time:.2f} seconds.")
     # Step 3: Run Aeon to predict histology
+    start_time = time.time()
     aeon_results, _ = run_aeon(
         features=features,
         model_path="data/aeon_model.pkl",
         num_workers=NUM_WORKERS,
         use_cpu=not use_gpu,
     )
+    end_time = time.time()
+    logger.info(f"Aeon inference completed in {end_time - start_time:.2f} seconds.")
     # Step 4: Run Paladin to predict biomarkers
+    start_time = time.time()
     paladin_results = run_paladin(
         features=features,
         model_map_path="data/paladin_model_map.csv",
         num_workers=NUM_WORKERS,
         use_cpu=not use_gpu,
     )
+    end_time = time.time()
+    logger.info(f"Paladin inference completed in {end_time - start_time:.2f} seconds.")
     return aeon_results, paladin_results
 def analyze_slide(slide_path, site_type):
+    start_time = time.time()
     _, _, coords, attrs = segment_tissue(
         slide_path=slide_path,
         patch_size=224,
         segment_threshold=15,
+        seg_level=-1,
         median_blur_ksize=11,
         morphology_ex_kernel=2,
         tissue_area_threshold=2,
         hole_area_threshold=1,
+        max_num_holes=2,
+    )
+    end_time = time.time()
+    logger.info(
+        f"Tissue segmentation completed in {end_time - start_time:.2f} seconds."
+    )
+    logger.info(f"Number of tissue patches: {len(coords)}")
+    aeon_results, paladin_results = get_features_and_infer(
+        coords, slide_path, attrs, site_type
     )
     return aeon_results, paladin_results

paladin_inference.py CHANGED Viewed

@@ -37,7 +37,7 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
     with Path(model_map_path).open() as fp:
         rdr = csv.DictReader(fp)
         for row in rdr:
-            histology = row["histology"]
             target = row["target_name"]
             model = row["model_path"]
             models[histology][target] = model
@@ -113,15 +113,17 @@ def logits_to_point_estimates(logits):
     return logits[:, ::2] / (logits[:, ::2] + logits[:, 1::2])
-def run_paladin(features: np.ndarray,
-        aeon_results: Optional[pd.DataFrame] = None,
-        histology_codes: List[str] = None,
-        model_map_path: str = None,
-        model_path: str = None,
-        metastatic: bool = False,
-        batch_size: int = BATCH_SIZE,
-        num_workers: int = NUM_WORKERS,
-        use_cpu: bool = False):
     """Run Paladin inference on a single slide, using the given embeddings
     and either a single model or a table mapping histologies and targets to models.
     If histology_codes is given, it is a list of OncoTree codes for the slide.
@@ -137,7 +139,6 @@ def run_paladin(features: np.ndarray,
     else:
         target_histologies = histology_codes
     # Build a dataset to feed to the model
     site = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
@@ -155,9 +156,7 @@ def run_paladin(features: np.ndarray,
     if model_path:
         histology, target = "None", "None"
         try:
-            score = run_model(
-                device, dataset, model_path, num_workers, batch_size
-            )
             results.append((histology, target, score))
             logger.info(f"histology: {histology}  target: {target}  score: {score}")
         except Exception as exc:
@@ -172,9 +171,7 @@ def run_paladin(features: np.ndarray,
             for target, model in sorted(model_map[histology].items()):
                 try:
-                    score = run_model(
-                        device, dataset, model, num_workers, batch_size
-                    )
                     results.append((histology, target, score))
                     logger.info(
                         f"histology: {histology}  target: {target}  score: {score}"
@@ -247,6 +244,7 @@ def parse_args():
     return opt
 def main():
     opt = parse_args()
     features = torch.load(opt.features_path)
@@ -255,15 +253,17 @@ def main():
     if opt.aeon_predictions_path:
         aeon_results = pd.read_csv(opt.aeon_predictions_path)
         logger.info(f"Loaded Aeon results from {opt.aeon_predictions_path}")
-    df = run_paladin(features=features,
-             aeon_results=aeon_results,
-             histology_codes=opt.histology_codes,
-             model_map_path=opt.model_map_path,
-             model_path=opt.model_path,
-             metastatic=opt.metastatic,
-             batch_size=opt.batch_size,
-             num_workers=opt.num_workers,
-             use_cpu=opt.use_cpu)
     df.to_csv(opt.output_path, index=False)
     logger.info(f"Wrote {opt.output_path}")

     with Path(model_map_path).open() as fp:
         rdr = csv.DictReader(fp)
         for row in rdr:
+            histology = row["cancer_subtype"]
             target = row["target_name"]
             model = row["model_path"]
             models[histology][target] = model
     return logits[:, ::2] / (logits[:, ::2] + logits[:, 1::2])
+def run_paladin(
+    features: np.ndarray,
+    aeon_results: Optional[pd.DataFrame] = None,
+    histology_codes: List[str] = None,
+    model_map_path: str = None,
+    model_path: str = None,
+    metastatic: bool = False,
+    batch_size: int = BATCH_SIZE,
+    num_workers: int = NUM_WORKERS,
+    use_cpu: bool = False,
+):
     """Run Paladin inference on a single slide, using the given embeddings
     and either a single model or a table mapping histologies and targets to models.
     If histology_codes is given, it is a list of OncoTree codes for the slide.
     else:
         target_histologies = histology_codes
     # Build a dataset to feed to the model
     site = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
     if model_path:
         histology, target = "None", "None"
         try:
+            score = run_model(device, dataset, model_path, num_workers, batch_size)
             results.append((histology, target, score))
             logger.info(f"histology: {histology}  target: {target}  score: {score}")
         except Exception as exc:
             for target, model in sorted(model_map[histology].items()):
                 try:
+                    score = run_model(device, dataset, model, num_workers, batch_size)
                     results.append((histology, target, score))
                     logger.info(
                         f"histology: {histology}  target: {target}  score: {score}"
     return opt
 def main():
     opt = parse_args()
     features = torch.load(opt.features_path)
     if opt.aeon_predictions_path:
         aeon_results = pd.read_csv(opt.aeon_predictions_path)
         logger.info(f"Loaded Aeon results from {opt.aeon_predictions_path}")
+    df = run_paladin(
+        features=features,
+        aeon_results=aeon_results,
+        histology_codes=opt.histology_codes,
+        model_map_path=opt.model_map_path,
+        model_path=opt.model_path,
+        metastatic=opt.metastatic,
+        batch_size=opt.batch_size,
+        num_workers=opt.num_workers,
+        use_cpu=opt.use_cpu,
+    )
     df.to_csv(opt.output_path, index=False)
     logger.info(f"Wrote {opt.output_path}")