| Image: |
| from_base: parachutes/python:3.12 |
| run_command: |
| - pip install --upgrade setuptools wheel |
| - >- |
| pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' |
| 'opencv-python-headless>=4.7' 'pillow>=9.5' |
| 'huggingface_hub>=0.19.4' 'pydantic>=2.0' |
| 'pyyaml>=6.0' 'aiohttp>=3.9' |
| 'tensorrt' 'tensorrt-lean' |
| - >- |
| python3 -c " |
| import os, nvidia.cudnn, nvidia.cublas; |
| cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib'); |
| cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib'); |
| open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10)) |
| " && ldconfig |
| |
| - >- |
| python3 -c " |
| import os; os.makedirs('/opt/model', exist_ok=True); |
| from huggingface_hub import hf_hub_download; |
| [hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model') |
| for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']]; |
| print('Model baked into image at /opt/model/') |
| " |
| |
| - >- |
| python3 -c " |
| import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True); |
| _TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs'; |
| [ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL) |
| for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10'] |
| if os.path.exists(os.path.join(_TRT,l))]; |
| import onnxruntime as ort; |
| sess=ort.InferenceSession('/opt/model/weights.onnx', |
| providers=[('TensorrtExecutionProvider', |
| {'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True, |
| 'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}), |
| 'CUDAExecutionProvider']); |
| sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')}); |
| print('TRT engine baked:', os.listdir('/opt/trt_cache')) |
| " || echo 'TRT pre-warm skipped (no GPU at build time)' |
| environment: |
| MODEL_PATH: /opt/model |
| TRT_CACHE_PATH: /opt/trt_cache |
|
|
| NodeSelector: |
| gpu_count: 1 |
| min_vram_gb_per_gpu: 16 |
| include: |
| - 4090 |
| - a100 |
| - a100_sxm |
| - h100 |
| - h100_sxm |
| - l40s |
| - a40 |
|
|
| Chute: |
| timeout_seconds: 300 |
| concurrency: 4 |
| max_instances: 5 |
| scaling_threshold: 0.5 |
|
|