Detect-Vehicle / chute_config.yml
meaculpitt's picture
v6-fp16: update chute_config.yml
651f424 verified
Image:
from_base: parachutes/python:3.12
run_command:
- pip install --upgrade setuptools wheel
- >-
pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16'
'opencv-python-headless>=4.7' 'pillow>=9.5'
'huggingface_hub>=0.19.4' 'pydantic>=2.0'
'pyyaml>=6.0' 'aiohttp>=3.9'
'tensorrt' 'tensorrt-lean'
- >-
python3 -c "
import os, nvidia.cudnn, nvidia.cublas;
cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib');
cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib');
open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10))
" && ldconfig
# Bake model weights into image at build time — eliminates HF download on cold start.
- >-
python3 -c "
import os; os.makedirs('/opt/model', exist_ok=True);
from huggingface_hub import hf_hub_download;
[hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model')
for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']];
print('Model baked into image at /opt/model/')
"
# Attempt TRT engine pre-compilation (succeeds only if builder has GPU).
- >-
python3 -c "
import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True);
_TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs';
[ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL)
for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10']
if os.path.exists(os.path.join(_TRT,l))];
import onnxruntime as ort;
sess=ort.InferenceSession('/opt/model/weights.onnx',
providers=[('TensorrtExecutionProvider',
{'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True,
'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}),
'CUDAExecutionProvider']);
sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')});
print('TRT engine baked:', os.listdir('/opt/trt_cache'))
" || echo 'TRT pre-warm skipped (no GPU at build time)'
environment:
MODEL_PATH: /opt/model
TRT_CACHE_PATH: /opt/trt_cache
NodeSelector:
gpu_count: 1
min_vram_gb_per_gpu: 16
include:
- 4090
- a100
- a100_sxm
- h100
- h100_sxm
- l40s
- a40
Chute:
timeout_seconds: 300
concurrency: 4
max_instances: 5
scaling_threshold: 0.5