raylim commited on
Commit
445c0ed
·
unverified ·
1 Parent(s): d69d3b8

Implement chunked processing for ZeroGPU to prevent token expiry

Browse files

- Split CTransPath processing into 15k tile chunks
- Split Optimus processing into 10k tile chunks
- Each chunk gets fresh GPU token (180s/300s limit per chunk)
- Multiple smaller GPU calls instead of one large call
- Prevents timeout when processing large slides
- Non-ZeroGPU environments process all tiles at once (no change)

This allows processing of larger slides within ZeroGPU constraints

Files changed (1) hide show
  1. src/mosaic/analysis.py +109 -25
src/mosaic/analysis.py CHANGED
@@ -39,7 +39,33 @@ from loguru import logger
39
  from mosaic.inference import run_aeon, run_paladin
40
 
41
 
42
- @spaces.GPU(duration=120)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
44
  """Extract CTransPath features on GPU.
45
 
@@ -55,25 +81,41 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
55
  if IS_ZEROGPU:
56
  num_workers = 0
57
  logger.info("Running CTransPath on ZeroGPU: setting num_workers=0")
 
 
 
 
58
  else:
59
  num_workers = max(num_workers, 8)
60
  logger.info(f"Running CTransPath with num_workers={num_workers}")
 
61
 
62
  # Use larger batch size on H100 for better throughput
63
  batch_size = 128 if IS_ZEROGPU else 64
64
 
65
  start_time = pd.Timestamp.now()
66
- logger.info(f"Extracting CTransPath features with batch_size={batch_size}")
67
- ctranspath_features, _ = get_features(
68
- coords,
69
- slide_path,
70
- attrs,
71
- model_type=ModelType.CTRANSPATH,
72
- model_path="data/ctranspath.pth",
73
- num_workers=num_workers,
74
- batch_size=batch_size,
75
- use_gpu=True,
76
- )
 
 
 
 
 
 
 
 
 
 
 
77
  end_time = pd.Timestamp.now()
78
  max_gpu_memory = (
79
  torch.cuda.max_memory_allocated() / (1024**3)
@@ -89,6 +131,32 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
89
 
90
 
91
  @spaces.GPU(duration=300)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
93
  """Extract Optimus features on GPU.
94
 
@@ -104,25 +172,41 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
104
  if IS_ZEROGPU:
105
  num_workers = 0
106
  logger.info("Running Optimus on ZeroGPU: setting num_workers=0")
 
 
 
 
107
  else:
108
  num_workers = max(num_workers, 8)
109
  logger.info(f"Running Optimus with num_workers={num_workers}")
 
110
 
111
  # Use larger batch size on H100 for better throughput
112
  batch_size = 128 if IS_ZEROGPU else 64
113
 
114
  start_time = pd.Timestamp.now()
115
- logger.info(f"Extracting Optimus features with batch_size={batch_size}")
116
- features, _ = get_features(
117
- filtered_coords,
118
- slide_path,
119
- attrs,
120
- model_type=ModelType.OPTIMUS,
121
- model_path="data/optimus.pkl",
122
- num_workers=num_workers,
123
- batch_size=batch_size,
124
- use_gpu=True,
125
- )
 
 
 
 
 
 
 
 
 
 
 
126
  end_time = pd.Timestamp.now()
127
  max_gpu_memory = (
128
  torch.cuda.max_memory_allocated() / (1024**3)
@@ -137,7 +221,7 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
137
  return features
138
 
139
 
140
- @spaces.GPU(duration=60)
141
  def _run_aeon_inference(features, site_type, num_workers):
142
  """Run Aeon cancer subtype inference on GPU.
143
 
@@ -180,7 +264,7 @@ def _run_aeon_inference(features, site_type, num_workers):
180
  return aeon_results
181
 
182
 
183
- @spaces.GPU(duration=60)
184
  def _run_paladin_inference(features, aeon_results, site_type, num_workers):
185
  """Run Paladin biomarker inference on GPU.
186
 
 
39
  from mosaic.inference import run_aeon, run_paladin
40
 
41
 
42
+ @spaces.GPU(duration=180)
43
+ def _extract_ctranspath_features_chunk(coords_chunk, slide_path, attrs, num_workers, batch_size):
44
+ """Extract CTransPath features for a chunk of coordinates on GPU.
45
+
46
+ Args:
47
+ coords_chunk: Chunk of tissue tile coordinates
48
+ slide_path: Path to the whole slide image file
49
+ attrs: Slide attributes
50
+ num_workers: Number of worker processes
51
+ batch_size: Batch size for inference
52
+
53
+ Returns:
54
+ tuple: (ctranspath_features, coords_chunk)
55
+ """
56
+ features, _ = get_features(
57
+ coords_chunk,
58
+ slide_path,
59
+ attrs,
60
+ model_type=ModelType.CTRANSPATH,
61
+ model_path="data/ctranspath.pth",
62
+ num_workers=num_workers,
63
+ batch_size=batch_size,
64
+ use_gpu=True,
65
+ )
66
+ return features
67
+
68
+
69
  def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
70
  """Extract CTransPath features on GPU.
71
 
 
81
  if IS_ZEROGPU:
82
  num_workers = 0
83
  logger.info("Running CTransPath on ZeroGPU: setting num_workers=0")
84
+ # Split into chunks to stay within GPU time limits
85
+ chunk_size = 15000
86
+ total_tiles = len(coords)
87
+ logger.info(f"Processing {total_tiles} tiles in chunks of {chunk_size}")
88
  else:
89
  num_workers = max(num_workers, 8)
90
  logger.info(f"Running CTransPath with num_workers={num_workers}")
91
+ chunk_size = len(coords) # Process all at once
92
 
93
  # Use larger batch size on H100 for better throughput
94
  batch_size = 128 if IS_ZEROGPU else 64
95
 
96
  start_time = pd.Timestamp.now()
97
+
98
+ # Process in chunks
99
+ all_features = []
100
+ for i in range(0, len(coords), chunk_size):
101
+ chunk_coords = coords[i:i+chunk_size]
102
+ chunk_num = i // chunk_size + 1
103
+ total_chunks = (len(coords) + chunk_size - 1) // chunk_size
104
+
105
+ logger.info(f"Extracting CTransPath features for chunk {chunk_num}/{total_chunks} "
106
+ f"({len(chunk_coords)} tiles, batch_size={batch_size})")
107
+
108
+ chunk_features = _extract_ctranspath_features_chunk(
109
+ chunk_coords, slide_path, attrs, num_workers, batch_size
110
+ )
111
+ all_features.append(chunk_features)
112
+
113
+ logger.info(f"Chunk {chunk_num}/{total_chunks} completed")
114
+
115
+ # Concatenate all features
116
+ import numpy as np
117
+ ctranspath_features = np.concatenate(all_features, axis=0)
118
+
119
  end_time = pd.Timestamp.now()
120
  max_gpu_memory = (
121
  torch.cuda.max_memory_allocated() / (1024**3)
 
131
 
132
 
133
  @spaces.GPU(duration=300)
134
+ def _extract_optimus_features_chunk(coords_chunk, slide_path, attrs, num_workers, batch_size):
135
+ """Extract Optimus features for a chunk of coordinates on GPU.
136
+
137
+ Args:
138
+ coords_chunk: Chunk of tissue tile coordinates
139
+ slide_path: Path to the whole slide image file
140
+ attrs: Slide attributes
141
+ num_workers: Number of worker processes
142
+ batch_size: Batch size for inference
143
+
144
+ Returns:
145
+ Optimus features for this chunk
146
+ """
147
+ features, _ = get_features(
148
+ coords_chunk,
149
+ slide_path,
150
+ attrs,
151
+ model_type=ModelType.OPTIMUS,
152
+ model_path="data/optimus.pkl",
153
+ num_workers=num_workers,
154
+ batch_size=batch_size,
155
+ use_gpu=True,
156
+ )
157
+ return features
158
+
159
+
160
  def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
161
  """Extract Optimus features on GPU.
162
 
 
172
  if IS_ZEROGPU:
173
  num_workers = 0
174
  logger.info("Running Optimus on ZeroGPU: setting num_workers=0")
175
+ # Split into chunks to stay within GPU time limits
176
+ chunk_size = 10000
177
+ total_tiles = len(filtered_coords)
178
+ logger.info(f"Processing {total_tiles} tiles in chunks of {chunk_size}")
179
  else:
180
  num_workers = max(num_workers, 8)
181
  logger.info(f"Running Optimus with num_workers={num_workers}")
182
+ chunk_size = len(filtered_coords) # Process all at once
183
 
184
  # Use larger batch size on H100 for better throughput
185
  batch_size = 128 if IS_ZEROGPU else 64
186
 
187
  start_time = pd.Timestamp.now()
188
+
189
+ # Process in chunks
190
+ all_features = []
191
+ for i in range(0, len(filtered_coords), chunk_size):
192
+ chunk_coords = filtered_coords[i:i+chunk_size]
193
+ chunk_num = i // chunk_size + 1
194
+ total_chunks = (len(filtered_coords) + chunk_size - 1) // chunk_size
195
+
196
+ logger.info(f"Extracting Optimus features for chunk {chunk_num}/{total_chunks} "
197
+ f"({len(chunk_coords)} tiles, batch_size={batch_size})")
198
+
199
+ chunk_features = _extract_optimus_features_chunk(
200
+ chunk_coords, slide_path, attrs, num_workers, batch_size
201
+ )
202
+ all_features.append(chunk_features)
203
+
204
+ logger.info(f"Chunk {chunk_num}/{total_chunks} completed")
205
+
206
+ # Concatenate all features
207
+ import numpy as np
208
+ features = np.concatenate(all_features, axis=0)
209
+
210
  end_time = pd.Timestamp.now()
211
  max_gpu_memory = (
212
  torch.cuda.max_memory_allocated() / (1024**3)
 
221
  return features
222
 
223
 
224
+ @spaces.GPU(duration=90)
225
  def _run_aeon_inference(features, site_type, num_workers):
226
  """Run Aeon cancer subtype inference on GPU.
227
 
 
264
  return aeon_results
265
 
266
 
267
+ @spaces.GPU(duration=90)
268
  def _run_paladin_inference(features, aeon_results, site_type, num_workers):
269
  """Run Paladin biomarker inference on GPU.
270