open GPU pool + drop tee + warmup pass in __init__
Browse files- chute_config: exclude broad pool instead of include:pro_6000, remove tee:true (lower compute_multiplier, wider scheduler pool)
- miner.py: dummy inference in Miner.__init__ to warm CUDA kernels before first /predict
- chute_config.yml +6 -3
- miner.py +8 -0
chute_config.yml
CHANGED
|
@@ -9,11 +9,14 @@ NodeSelector:
|
|
| 9 |
gpu_count: 1
|
| 10 |
min_vram_gb_per_gpu: 16
|
| 11 |
max_hourly_price_per_gpu: 2
|
| 12 |
-
|
| 13 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
Chute:
|
| 16 |
-
tee: true
|
| 17 |
timeout_seconds: 900
|
| 18 |
shutdown_after_seconds: 86400
|
| 19 |
concurrency: 4
|
|
|
|
| 9 |
gpu_count: 1
|
| 10 |
min_vram_gb_per_gpu: 16
|
| 11 |
max_hourly_price_per_gpu: 2
|
| 12 |
+
exclude:
|
| 13 |
+
- "5090"
|
| 14 |
+
- b200
|
| 15 |
+
- h200
|
| 16 |
+
- h20
|
| 17 |
+
- mi300x
|
| 18 |
|
| 19 |
Chute:
|
|
|
|
| 20 |
timeout_seconds: 900
|
| 21 |
shutdown_after_seconds: 86400
|
| 22 |
concurrency: 4
|
miner.py
CHANGED
|
@@ -62,6 +62,14 @@ class Miner:
|
|
| 62 |
active = self.sess.get_providers()[0]
|
| 63 |
print(f"✅ ONNX beverage model loaded (provider={active})")
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
def __repr__(self) -> str:
|
| 66 |
return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
|
| 67 |
|
|
|
|
| 62 |
active = self.sess.get_providers()[0]
|
| 63 |
print(f"✅ ONNX beverage model loaded (provider={active})")
|
| 64 |
|
| 65 |
+
# Warm CUDA kernels / ORT graph so the very first /predict isn't slow.
|
| 66 |
+
warm = np.zeros((64, 64, 3), dtype=np.uint8)
|
| 67 |
+
try:
|
| 68 |
+
self._infer(warm)
|
| 69 |
+
print("✅ ONNX warmup pass done")
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"⚠️ ONNX warmup pass failed: {e}")
|
| 72 |
+
|
| 73 |
def __repr__(self) -> str:
|
| 74 |
return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
|
| 75 |
|