cold-start fix: GPU onnx runtime + shutdown_after_seconds=86400
Browse files- chute_config: onnxruntime-gpu, shutdown 300->86400, timeout 900, min_vram 16, max_hourly_price 2
- miner.py: CUDA provider with CPU fallback
- chute_config.yml +5 -2
- miner.py +6 -2
chute_config.yml
CHANGED
|
@@ -2,17 +2,20 @@ Image:
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
-
- pip install huggingface_hub=
|
| 6 |
set_workdir: /app
|
| 7 |
|
| 8 |
NodeSelector:
|
| 9 |
gpu_count: 1
|
|
|
|
|
|
|
| 10 |
include:
|
| 11 |
- pro_6000
|
| 12 |
|
| 13 |
Chute:
|
| 14 |
tee: true
|
| 15 |
-
|
|
|
|
| 16 |
concurrency: 4
|
| 17 |
max_instances: 5
|
| 18 |
scaling_threshold: 0.5
|
|
|
|
| 2 |
from_base: parachutes/python:3.12
|
| 3 |
run_command:
|
| 4 |
- pip install --upgrade setuptools wheel
|
| 5 |
+
- pip install 'huggingface_hub>=0.19.4' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python-headless>=4.7' 'numpy>=1.23' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
|
| 6 |
set_workdir: /app
|
| 7 |
|
| 8 |
NodeSelector:
|
| 9 |
gpu_count: 1
|
| 10 |
+
min_vram_gb_per_gpu: 16
|
| 11 |
+
max_hourly_price_per_gpu: 2
|
| 12 |
include:
|
| 13 |
- pro_6000
|
| 14 |
|
| 15 |
Chute:
|
| 16 |
tee: true
|
| 17 |
+
timeout_seconds: 900
|
| 18 |
+
shutdown_after_seconds: 86400
|
| 19 |
concurrency: 4
|
| 20 |
max_instances: 5
|
| 21 |
scaling_threshold: 0.5
|
miner.py
CHANGED
|
@@ -52,11 +52,15 @@ class Miner:
|
|
| 52 |
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 53 |
self.sess = ort.InferenceSession(
|
| 54 |
str(Path(path_hf_repo) / self.weights_file),
|
| 55 |
-
providers=[
|
|
|
|
|
|
|
|
|
|
| 56 |
sess_options=so,
|
| 57 |
)
|
| 58 |
self.inp = self.sess.get_inputs()[0].name
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
def __repr__(self) -> str:
|
| 62 |
return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
|
|
|
|
| 52 |
so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
|
| 53 |
self.sess = ort.InferenceSession(
|
| 54 |
str(Path(path_hf_repo) / self.weights_file),
|
| 55 |
+
providers=[
|
| 56 |
+
("CUDAExecutionProvider", {"device_id": 0}),
|
| 57 |
+
"CPUExecutionProvider",
|
| 58 |
+
],
|
| 59 |
sess_options=so,
|
| 60 |
)
|
| 61 |
self.inp = self.sess.get_inputs()[0].name
|
| 62 |
+
active = self.sess.get_providers()[0]
|
| 63 |
+
print(f"✅ ONNX beverage model loaded (provider={active})")
|
| 64 |
|
| 65 |
def __repr__(self) -> str:
|
| 66 |
return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
|