Spaces:
Sleeping
Sleeping
Add aggressive GPU memory cleanup for T4 instances
Browse files- Force CUDA synchronization before cache clearing
- Add explicit garbage collection after model cleanup
- Log GPU memory usage after cleanup for debugging
- Prevents OOM crashes when running multiple slides consecutively
- Especially important for T4 GPUs with limited VRAM (~16GB)
- src/mosaic/analysis.py +10 -0
src/mosaic/analysis.py
CHANGED
|
@@ -54,6 +54,7 @@ else:
|
|
| 54 |
GPU_TYPE = f"Standard GPU ({GPU_NAME})"
|
| 55 |
|
| 56 |
import pickle
|
|
|
|
| 57 |
import pandas as pd
|
| 58 |
import gradio as gr
|
| 59 |
from pathlib import Path
|
|
@@ -438,7 +439,16 @@ def _run_inference_pipeline_impl(
|
|
| 438 |
return aeon_results, paladin_results
|
| 439 |
finally:
|
| 440 |
# Clean up models to free GPU memory
|
|
|
|
| 441 |
model_cache.cleanup()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
|
| 444 |
# ============================================================================
|
|
|
|
| 54 |
GPU_TYPE = f"Standard GPU ({GPU_NAME})"
|
| 55 |
|
| 56 |
import pickle
|
| 57 |
+
import gc
|
| 58 |
import pandas as pd
|
| 59 |
import gradio as gr
|
| 60 |
from pathlib import Path
|
|
|
|
| 439 |
return aeon_results, paladin_results
|
| 440 |
finally:
|
| 441 |
# Clean up models to free GPU memory
|
| 442 |
+
logger.info("Cleaning up models after single-slide inference")
|
| 443 |
model_cache.cleanup()
|
| 444 |
+
|
| 445 |
+
# Extra aggressive cleanup for T4 instances
|
| 446 |
+
if torch.cuda.is_available():
|
| 447 |
+
torch.cuda.synchronize()
|
| 448 |
+
torch.cuda.empty_cache()
|
| 449 |
+
gc.collect()
|
| 450 |
+
mem_allocated = torch.cuda.memory_allocated() / (1024**3)
|
| 451 |
+
logger.info(f"GPU memory after cleanup: {mem_allocated:.2f} GB")
|
| 452 |
|
| 453 |
|
| 454 |
# ============================================================================
|