raylim commited on
Commit
ca72d12
·
1 Parent(s): a9cd161

fix: tiling level

Browse files
Files changed (2) hide show
  1. app.py +45 -7
  2. paladin_inference.py +26 -26
app.py CHANGED
@@ -6,28 +6,52 @@ import pandas as pd
6
  from PIL import Image
7
  import torch
8
  import spaces
9
- from huggingface_hub import snapshot_download
 
 
10
 
11
  from mussel.utils import get_features, segment_tissue
12
  from mussel.models import ModelType
13
  from aeon_inference import run_aeon
14
  from paladin_inference import run_paladin
15
 
16
- NUM_WORKERS = 0
17
 
18
  # Install Paladin from GitHub
19
  GIT_TOKEN = os.environ.get("GH_TOKEN")
20
- subprocess.run(f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev", shell=True)
 
 
 
21
 
22
  # Download pre-trained models if not present
23
- local_repo_path = snapshot_download(repo_id="PDM-Group/paladin-aeon-models", local_dir="data")
 
 
 
24
 
25
  @spaces.GPU(duration=300)
26
  def get_features_and_infer(coords, slide_path, attrs, site_type):
27
  use_gpu = torch.cuda.is_available()
28
- features, _ = get_features(coords, slide_path, attrs, model_type=ModelType.OPTIMUS, model_path="data/optimus.pkl", use_gpu=use_gpu, batch_size=64, num_workers=NUM_WORKERS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Step 3: Run Aeon to predict histology
 
31
  aeon_results, _ = run_aeon(
32
  features=features,
33
  model_path="data/aeon_model.pkl",
@@ -36,8 +60,11 @@ def get_features_and_infer(coords, slide_path, attrs, site_type):
36
  num_workers=NUM_WORKERS,
37
  use_cpu=not use_gpu,
38
  )
 
 
39
 
40
  # Step 4: Run Paladin to predict biomarkers
 
41
  paladin_results = run_paladin(
42
  features=features,
43
  model_map_path="data/paladin_model_map.csv",
@@ -47,22 +74,33 @@ def get_features_and_infer(coords, slide_path, attrs, site_type):
47
  num_workers=NUM_WORKERS,
48
  use_cpu=not use_gpu,
49
  )
 
 
50
 
51
  return aeon_results, paladin_results
52
 
53
 
54
  def analyze_slide(slide_path, site_type):
 
55
  _, _, coords, attrs = segment_tissue(
56
  slide_path=slide_path,
57
  patch_size=224,
58
  segment_threshold=15,
 
59
  median_blur_ksize=11,
60
  morphology_ex_kernel=2,
61
  tissue_area_threshold=2,
62
  hole_area_threshold=1,
63
- max_num_holes=2
 
 
 
 
 
 
 
 
64
  )
65
- aeon_results, paladin_results = get_features_and_infer(coords, slide_path, attrs, site_type)
66
  return aeon_results, paladin_results
67
 
68
 
 
6
  from PIL import Image
7
  import torch
8
  import spaces
9
+ from huggingface_hub import hf_hub_download, snapshot_download
10
+ from loguru import logger
11
+ import time
12
 
13
  from mussel.utils import get_features, segment_tissue
14
  from mussel.models import ModelType
15
  from aeon_inference import run_aeon
16
  from paladin_inference import run_paladin
17
 
18
+ NUM_WORKERS = 0 # Can't have multiple workers in HF ZeroGPU Gradio app
19
 
20
  # Install Paladin from GitHub
21
  GIT_TOKEN = os.environ.get("GH_TOKEN")
22
+ subprocess.run(
23
+ f"pip install git+https://{GIT_TOKEN}@github.com/pathology-data-mining/paladin.git@dev",
24
+ shell=True,
25
+ )
26
 
27
  # Download pre-trained models if not present
28
+ local_repo_path = snapshot_download(
29
+ repo_id="PDM-Group/paladin-aeon-models", local_dir="data"
30
+ )
31
+
32
 
33
  @spaces.GPU(duration=300)
34
  def get_features_and_infer(coords, slide_path, attrs, site_type):
35
  use_gpu = torch.cuda.is_available()
36
+ optimus_model_path = hf_hub_download(
37
+ repo_id="PDM-Group/paladin-aeon-models", filename="optimus.pkl"
38
+ )
39
+ start_time = time.time()
40
+ features, _ = get_features(
41
+ coords,
42
+ slide_path,
43
+ attrs,
44
+ model_type=ModelType.OPTIMUS,
45
+ model_path=optimus_model_path,
46
+ use_gpu=use_gpu,
47
+ batch_size=64,
48
+ num_workers=NUM_WORKERS,
49
+ )
50
+ end_time = time.time()
51
+ logger.info(f"Feature extraction completed in {end_time - start_time:.2f} seconds.")
52
 
53
  # Step 3: Run Aeon to predict histology
54
+ start_time = time.time()
55
  aeon_results, _ = run_aeon(
56
  features=features,
57
  model_path="data/aeon_model.pkl",
 
60
  num_workers=NUM_WORKERS,
61
  use_cpu=not use_gpu,
62
  )
63
+ end_time = time.time()
64
+ logger.info(f"Aeon inference completed in {end_time - start_time:.2f} seconds.")
65
 
66
  # Step 4: Run Paladin to predict biomarkers
67
+ start_time = time.time()
68
  paladin_results = run_paladin(
69
  features=features,
70
  model_map_path="data/paladin_model_map.csv",
 
74
  num_workers=NUM_WORKERS,
75
  use_cpu=not use_gpu,
76
  )
77
+ end_time = time.time()
78
+ logger.info(f"Paladin inference completed in {end_time - start_time:.2f} seconds.")
79
 
80
  return aeon_results, paladin_results
81
 
82
 
83
  def analyze_slide(slide_path, site_type):
84
+ start_time = time.time()
85
  _, _, coords, attrs = segment_tissue(
86
  slide_path=slide_path,
87
  patch_size=224,
88
  segment_threshold=15,
89
+ seg_level=-1,
90
  median_blur_ksize=11,
91
  morphology_ex_kernel=2,
92
  tissue_area_threshold=2,
93
  hole_area_threshold=1,
94
+ max_num_holes=2,
95
+ )
96
+ end_time = time.time()
97
+ logger.info(
98
+ f"Tissue segmentation completed in {end_time - start_time:.2f} seconds."
99
+ )
100
+ logger.info(f"Number of tissue patches: {len(coords)}")
101
+ aeon_results, paladin_results = get_features_and_infer(
102
+ coords, slide_path, attrs, site_type
103
  )
 
104
  return aeon_results, paladin_results
105
 
106
 
paladin_inference.py CHANGED
@@ -37,7 +37,7 @@ def load_model_map(model_map_path: str) -> dict[Any, Any]:
37
  with Path(model_map_path).open() as fp:
38
  rdr = csv.DictReader(fp)
39
  for row in rdr:
40
- histology = row["histology"]
41
  target = row["target_name"]
42
  model = row["model_path"]
43
  models[histology][target] = model
@@ -113,15 +113,17 @@ def logits_to_point_estimates(logits):
113
  return logits[:, ::2] / (logits[:, ::2] + logits[:, 1::2])
114
 
115
 
116
- def run_paladin(features: np.ndarray,
117
- aeon_results: Optional[pd.DataFrame] = None,
118
- histology_codes: List[str] = None,
119
- model_map_path: str = None,
120
- model_path: str = None,
121
- metastatic: bool = False,
122
- batch_size: int = BATCH_SIZE,
123
- num_workers: int = NUM_WORKERS,
124
- use_cpu: bool = False):
 
 
125
  """Run Paladin inference on a single slide, using the given embeddings
126
  and either a single model or a table mapping histologies and targets to models.
127
  If histology_codes is given, it is a list of OncoTree codes for the slide.
@@ -137,7 +139,6 @@ def run_paladin(features: np.ndarray,
137
  else:
138
  target_histologies = histology_codes
139
 
140
-
141
  # Build a dataset to feed to the model
142
  site = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
143
 
@@ -155,9 +156,7 @@ def run_paladin(features: np.ndarray,
155
  if model_path:
156
  histology, target = "None", "None"
157
  try:
158
- score = run_model(
159
- device, dataset, model_path, num_workers, batch_size
160
- )
161
  results.append((histology, target, score))
162
  logger.info(f"histology: {histology} target: {target} score: {score}")
163
  except Exception as exc:
@@ -172,9 +171,7 @@ def run_paladin(features: np.ndarray,
172
 
173
  for target, model in sorted(model_map[histology].items()):
174
  try:
175
- score = run_model(
176
- device, dataset, model, num_workers, batch_size
177
- )
178
  results.append((histology, target, score))
179
  logger.info(
180
  f"histology: {histology} target: {target} score: {score}"
@@ -247,6 +244,7 @@ def parse_args():
247
 
248
  return opt
249
 
 
250
  def main():
251
  opt = parse_args()
252
  features = torch.load(opt.features_path)
@@ -255,15 +253,17 @@ def main():
255
  if opt.aeon_predictions_path:
256
  aeon_results = pd.read_csv(opt.aeon_predictions_path)
257
  logger.info(f"Loaded Aeon results from {opt.aeon_predictions_path}")
258
- df = run_paladin(features=features,
259
- aeon_results=aeon_results,
260
- histology_codes=opt.histology_codes,
261
- model_map_path=opt.model_map_path,
262
- model_path=opt.model_path,
263
- metastatic=opt.metastatic,
264
- batch_size=opt.batch_size,
265
- num_workers=opt.num_workers,
266
- use_cpu=opt.use_cpu)
 
 
267
  df.to_csv(opt.output_path, index=False)
268
  logger.info(f"Wrote {opt.output_path}")
269
 
 
37
  with Path(model_map_path).open() as fp:
38
  rdr = csv.DictReader(fp)
39
  for row in rdr:
40
+ histology = row["cancer_subtype"]
41
  target = row["target_name"]
42
  model = row["model_path"]
43
  models[histology][target] = model
 
113
  return logits[:, ::2] / (logits[:, ::2] + logits[:, 1::2])
114
 
115
 
116
+ def run_paladin(
117
+ features: np.ndarray,
118
+ aeon_results: Optional[pd.DataFrame] = None,
119
+ histology_codes: List[str] = None,
120
+ model_map_path: str = None,
121
+ model_path: str = None,
122
+ metastatic: bool = False,
123
+ batch_size: int = BATCH_SIZE,
124
+ num_workers: int = NUM_WORKERS,
125
+ use_cpu: bool = False,
126
+ ):
127
  """Run Paladin inference on a single slide, using the given embeddings
128
  and either a single model or a table mapping histologies and targets to models.
129
  If histology_codes is given, it is a list of OncoTree codes for the slide.
 
139
  else:
140
  target_histologies = histology_codes
141
 
 
142
  # Build a dataset to feed to the model
143
  site = SiteType.METASTASIS if metastatic else SiteType.PRIMARY
144
 
 
156
  if model_path:
157
  histology, target = "None", "None"
158
  try:
159
+ score = run_model(device, dataset, model_path, num_workers, batch_size)
 
 
160
  results.append((histology, target, score))
161
  logger.info(f"histology: {histology} target: {target} score: {score}")
162
  except Exception as exc:
 
171
 
172
  for target, model in sorted(model_map[histology].items()):
173
  try:
174
+ score = run_model(device, dataset, model, num_workers, batch_size)
 
 
175
  results.append((histology, target, score))
176
  logger.info(
177
  f"histology: {histology} target: {target} score: {score}"
 
244
 
245
  return opt
246
 
247
+
248
  def main():
249
  opt = parse_args()
250
  features = torch.load(opt.features_path)
 
253
  if opt.aeon_predictions_path:
254
  aeon_results = pd.read_csv(opt.aeon_predictions_path)
255
  logger.info(f"Loaded Aeon results from {opt.aeon_predictions_path}")
256
+ df = run_paladin(
257
+ features=features,
258
+ aeon_results=aeon_results,
259
+ histology_codes=opt.histology_codes,
260
+ model_map_path=opt.model_map_path,
261
+ model_path=opt.model_path,
262
+ metastatic=opt.metastatic,
263
+ batch_size=opt.batch_size,
264
+ num_workers=opt.num_workers,
265
+ use_cpu=opt.use_cpu,
266
+ )
267
  df.to_csv(opt.output_path, index=False)
268
  logger.info(f"Wrote {opt.output_path}")
269