Spaces:
Sleeping
Sleeping
Optimize batch sizes for H100 ZeroGPU and reduce Optimus duration
Browse files- Reduce Optimus GPU duration from 600s to 300s to fit ZeroGPU limits
- Increase batch_size to 128 on ZeroGPU (H100) for better throughput
- Apply batch_size optimization to both CTransPath and Optimus
- Keep batch_size=64 for non-ZeroGPU environments
- Estimated throughput: 12,000-15,000 tiles in 300s with H-optimus-0
- src/mosaic/analysis.py +11 -5
src/mosaic/analysis.py
CHANGED
|
@@ -59,8 +59,11 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
|
|
| 59 |
num_workers = max(num_workers, 8)
|
| 60 |
logger.info(f"Running CTransPath with num_workers={num_workers}")
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
start_time = pd.Timestamp.now()
|
| 63 |
-
logger.info("Extracting CTransPath features")
|
| 64 |
ctranspath_features, _ = get_features(
|
| 65 |
coords,
|
| 66 |
slide_path,
|
|
@@ -68,7 +71,7 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
|
|
| 68 |
model_type=ModelType.CTRANSPATH,
|
| 69 |
model_path="data/ctranspath.pth",
|
| 70 |
num_workers=num_workers,
|
| 71 |
-
batch_size=
|
| 72 |
use_gpu=True,
|
| 73 |
)
|
| 74 |
end_time = pd.Timestamp.now()
|
|
@@ -85,7 +88,7 @@ def _extract_ctranspath_features(coords, slide_path, attrs, num_workers):
|
|
| 85 |
return ctranspath_features, coords
|
| 86 |
|
| 87 |
|
| 88 |
-
@spaces.GPU(duration=
|
| 89 |
def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
|
| 90 |
"""Extract Optimus features on GPU.
|
| 91 |
|
|
@@ -105,8 +108,11 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
|
|
| 105 |
num_workers = max(num_workers, 8)
|
| 106 |
logger.info(f"Running Optimus with num_workers={num_workers}")
|
| 107 |
|
|
|
|
|
|
|
|
|
|
| 108 |
start_time = pd.Timestamp.now()
|
| 109 |
-
logger.info("Extracting Optimus features")
|
| 110 |
features, _ = get_features(
|
| 111 |
filtered_coords,
|
| 112 |
slide_path,
|
|
@@ -114,7 +120,7 @@ def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
|
|
| 114 |
model_type=ModelType.OPTIMUS,
|
| 115 |
model_path="data/optimus.pkl",
|
| 116 |
num_workers=num_workers,
|
| 117 |
-
batch_size=
|
| 118 |
use_gpu=True,
|
| 119 |
)
|
| 120 |
end_time = pd.Timestamp.now()
|
|
|
|
| 59 |
num_workers = max(num_workers, 8)
|
| 60 |
logger.info(f"Running CTransPath with num_workers={num_workers}")
|
| 61 |
|
| 62 |
+
# Use larger batch size on H100 for better throughput
|
| 63 |
+
batch_size = 128 if IS_ZEROGPU else 64
|
| 64 |
+
|
| 65 |
start_time = pd.Timestamp.now()
|
| 66 |
+
logger.info(f"Extracting CTransPath features with batch_size={batch_size}")
|
| 67 |
ctranspath_features, _ = get_features(
|
| 68 |
coords,
|
| 69 |
slide_path,
|
|
|
|
| 71 |
model_type=ModelType.CTRANSPATH,
|
| 72 |
model_path="data/ctranspath.pth",
|
| 73 |
num_workers=num_workers,
|
| 74 |
+
batch_size=batch_size,
|
| 75 |
use_gpu=True,
|
| 76 |
)
|
| 77 |
end_time = pd.Timestamp.now()
|
|
|
|
| 88 |
return ctranspath_features, coords
|
| 89 |
|
| 90 |
|
| 91 |
+
@spaces.GPU(duration=300)
|
| 92 |
def _extract_optimus_features(filtered_coords, slide_path, attrs, num_workers):
|
| 93 |
"""Extract Optimus features on GPU.
|
| 94 |
|
|
|
|
| 108 |
num_workers = max(num_workers, 8)
|
| 109 |
logger.info(f"Running Optimus with num_workers={num_workers}")
|
| 110 |
|
| 111 |
+
# Use larger batch size on H100 for better throughput
|
| 112 |
+
batch_size = 128 if IS_ZEROGPU else 64
|
| 113 |
+
|
| 114 |
start_time = pd.Timestamp.now()
|
| 115 |
+
logger.info(f"Extracting Optimus features with batch_size={batch_size}")
|
| 116 |
features, _ = get_features(
|
| 117 |
filtered_coords,
|
| 118 |
slide_path,
|
|
|
|
| 120 |
model_type=ModelType.OPTIMUS,
|
| 121 |
model_path="data/optimus.pkl",
|
| 122 |
num_workers=num_workers,
|
| 123 |
+
batch_size=batch_size,
|
| 124 |
use_gpu=True,
|
| 125 |
)
|
| 126 |
end_time = pd.Timestamp.now()
|