File size: 2,486 Bytes
93dd8b4
 
 
 
651f424
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93dd8b4
 
 
651f424
 
 
 
 
 
 
 
 
93dd8b4
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
Image:
  from_base: parachutes/python:3.12
  run_command:
    - pip install --upgrade setuptools wheel
    - >-
      pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16'
      'opencv-python-headless>=4.7' 'pillow>=9.5'
      'huggingface_hub>=0.19.4' 'pydantic>=2.0'
      'pyyaml>=6.0' 'aiohttp>=3.9'
      'tensorrt' 'tensorrt-lean'
    - >-
      python3 -c "
      import os, nvidia.cudnn, nvidia.cublas;
      cudnn=os.path.join(os.path.dirname(nvidia.cudnn.__file__),'lib');
      cublas=os.path.join(os.path.dirname(nvidia.cublas.__file__),'lib');
      open('/etc/ld.so.conf.d/nvidia-ort.conf','w').write(cudnn+chr(10)+cublas+chr(10))
      " && ldconfig
    # Bake model weights into image at build time — eliminates HF download on cold start.
    - >-
      python3 -c "
      import os; os.makedirs('/opt/model', exist_ok=True);
      from huggingface_hub import hf_hub_download;
      [hf_hub_download(repo_id='meaculpitt/Detect-Vehicle', filename=f, local_dir='/opt/model')
       for f in ['weights.onnx','class_names.txt','model_type.json','main.py','miner.py']];
      print('Model baked into image at /opt/model/')
      "
    # Attempt TRT engine pre-compilation (succeeds only if builder has GPU).
    - >-
      python3 -c "
      import os, ctypes, numpy as np; os.makedirs('/opt/trt_cache', exist_ok=True);
      _TRT='/usr/local/lib/python3.12/dist-packages/tensorrt_libs';
      [ctypes.CDLL(os.path.join(_TRT,l),mode=ctypes.RTLD_GLOBAL)
       for l in ['libnvinfer.so.10','libnvinfer_plugin.so.10','libnvonnxparser.so.10']
       if os.path.exists(os.path.join(_TRT,l))];
      import onnxruntime as ort;
      sess=ort.InferenceSession('/opt/model/weights.onnx',
        providers=[('TensorrtExecutionProvider',
          {'device_id':0,'trt_fp16_enable':True,'trt_engine_cache_enable':True,
           'trt_engine_cache_path':'/opt/trt_cache','trt_max_workspace_size':2*1024**3}),
          'CUDAExecutionProvider']);
      sess.run(None, {sess.get_inputs()[0].name: np.zeros((1,3,1280,1280),dtype='float32')});
      print('TRT engine baked:', os.listdir('/opt/trt_cache'))
      " || echo 'TRT pre-warm skipped (no GPU at build time)'
  environment:
    MODEL_PATH: /opt/model
    TRT_CACHE_PATH: /opt/trt_cache

NodeSelector:
  gpu_count: 1
  min_vram_gb_per_gpu: 16
  include:
    - 4090
    - a100
    - a100_sxm
    - h100
    - h100_sxm
    - l40s
    - a40

Chute:
  timeout_seconds: 300
  concurrency: 4
  max_instances: 5
  scaling_threshold: 0.5