raylim commited on
Commit
5549e2b
·
unverified ·
1 Parent(s): c96b0f8

Add aggressive GPU memory cleanup for T4 instances

Browse files

- Force CUDA synchronization before cache clearing
- Add explicit garbage collection after model cleanup
- Log GPU memory usage after cleanup for debugging
- Prevents OOM crashes when running multiple slides consecutively
- Especially important for T4 GPUs with limited VRAM (~16GB)

Files changed (1) hide show
  1. src/mosaic/analysis.py +10 -0
src/mosaic/analysis.py CHANGED
@@ -54,6 +54,7 @@ else:
54
  GPU_TYPE = f"Standard GPU ({GPU_NAME})"
55
 
56
  import pickle
 
57
  import pandas as pd
58
  import gradio as gr
59
  from pathlib import Path
@@ -438,7 +439,16 @@ def _run_inference_pipeline_impl(
438
  return aeon_results, paladin_results
439
  finally:
440
  # Clean up models to free GPU memory
 
441
  model_cache.cleanup()
 
 
 
 
 
 
 
 
442
 
443
 
444
  # ============================================================================
 
54
  GPU_TYPE = f"Standard GPU ({GPU_NAME})"
55
 
56
  import pickle
57
+ import gc
58
  import pandas as pd
59
  import gradio as gr
60
  from pathlib import Path
 
439
  return aeon_results, paladin_results
440
  finally:
441
  # Clean up models to free GPU memory
442
+ logger.info("Cleaning up models after single-slide inference")
443
  model_cache.cleanup()
444
+
445
+ # Extra aggressive cleanup for T4 instances
446
+ if torch.cuda.is_available():
447
+ torch.cuda.synchronize()
448
+ torch.cuda.empty_cache()
449
+ gc.collect()
450
+ mem_allocated = torch.cuda.memory_allocated() / (1024**3)
451
+ logger.info(f"GPU memory after cleanup: {mem_allocated:.2f} GB")
452
 
453
 
454
  # ============================================================================