Image: from_base: parachutes/python:3.12 run_command: - pip install --upgrade setuptools wheel - >- pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'tensorrt' 'tensorrt-lean' - >- python3 -c " import os, nvidia.cudnn, nvidia.cublas; cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib'); cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib'); open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10)) " && ldconfig # Bake model weights into image at build time — eliminates HF download on cold start. - >- python3 -c " import os; os.makedirs('/opt/model', exist_ok=True); from huggingface_hub import hf_hub_download; [hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model') for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']]; print('Model baked into image at /opt/model/') " # Attempt TRT engine pre-compilation (succeeds only if builder has GPU). - >- python3 -c " import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True); _TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs'; [ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL) for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10'] if os.path.exists(os.path.join(_TRT,l))]; import onnxruntime as ort; sess=ort.InferenceSession('/opt/model/weights.onnx', providers=[('TensorrtExecutionProvider', {'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True, 'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}), 'CUDAExecutionProvider']); sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')}); print('TRT engine baked:', os.listdir('/opt/trt_cache')) " || echo 'TRT pre-warm skipped (no GPU at build time)' environment: MODEL_PATH: /opt/model TRT_CACHE_PATH: /opt/trt_cache NodeSelector: gpu_count: 1 min_vram_gb_per_gpu: 16 include: - 4090 - a100 - a100_sxm - h100 - h100_sxm - l40s - a40 Chute: timeout_seconds: 300 concurrency: 4 max_instances: 5 scaling_threshold: 0.5