raylim commited on
Commit
aafc601
·
unverified ·
1 Parent(s): 445c0ed

Fix CUDA device error by checking availability before reset

Browse files

- Add torch.cuda.is_available() check before reset_peak_memory_stats()
- Prevents RuntimeError on ZeroGPU when GPU context is released
- Applied to all 4 GPU functions: CTransPath, Optimus, Aeon, Paladin

Files changed (1) hide show
  1. src/mosaic/analysis.py +8 -4
src/mosaic/analysis.py CHANGED
@@ -125,7 +125,8 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
125
  logger.info(
126
  f"CTransPath extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
127
  )
128
- torch.cuda.reset_peak_memory_stats()
 
129
 
130
  return ctranspath_features, coords
131
 
@@ -216,7 +217,8 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
216
  logger.info(
217
  f"Optimus extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
218
  )
219
- torch.cuda.reset_peak_memory_stats()
 
220
 
221
  return features
222
 
@@ -259,7 +261,8 @@ def _run_aeon_inference(features, site_type, num_workers):
259
  logger.info(
260
  f"Aeon inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
261
  )
262
- torch.cuda.reset_peak_memory_stats()
 
263
 
264
  return aeon_results
265
 
@@ -304,7 +307,8 @@ def _run_paladin_inference(features, aeon_results, site_type, num_workers):
304
  logger.info(
305
  f"Paladin inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
306
  )
307
- torch.cuda.reset_peak_memory_stats()
 
308
 
309
  return paladin_results
310
 
 
125
  logger.info(
126
  f"CTransPath extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
127
  )
128
+ if torch.cuda.is_available():
129
+ torch.cuda.reset_peak_memory_stats()
130
 
131
  return ctranspath_features, coords
132
 
 
217
  logger.info(
218
  f"Optimus extraction took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
219
  )
220
+ if torch.cuda.is_available():
221
+ torch.cuda.reset_peak_memory_stats()
222
 
223
  return features
224
 
 
261
  logger.info(
262
  f"Aeon inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
263
  )
264
+ if torch.cuda.is_available():
265
+ torch.cuda.reset_peak_memory_stats()
266
 
267
  return aeon_results
268
 
 
307
  logger.info(
308
  f"Paladin inference took {end_time - start_time} and used {max_gpu_memory:.2f} GB GPU memory"
309
  )
310
+ if torch.cuda.is_available():
311
+ torch.cuda.reset_peak_memory_stats()
312
 
313
  return paladin_results
314