baxtos commited on
Commit
96ceb31
·
verified ·
1 Parent(s): f5bcfe5

cold-start fix: GPU onnx runtime + shutdown_after_seconds=86400

Browse files

- chute_config: onnxruntime-gpu, shutdown 300->86400, timeout 900, min_vram 16, max_hourly_price 2
- miner.py: CUDA provider with CPU fallback

Files changed (2) hide show
  1. chute_config.yml +5 -2
  2. miner.py +6 -2
chute_config.yml CHANGED
@@ -2,17 +2,20 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install huggingface_hub==0.19.4 onnxruntime==1.* opencv-python-headless numpy pydantic
6
  set_workdir: /app
7
 
8
  NodeSelector:
9
  gpu_count: 1
 
 
10
  include:
11
  - pro_6000
12
 
13
  Chute:
14
  tee: true
15
- shutdown_after_seconds: 300
 
16
  concurrency: 4
17
  max_instances: 5
18
  scaling_threshold: 0.5
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - pip install 'huggingface_hub>=0.19.4' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python-headless>=4.7' 'numpy>=1.23' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
  set_workdir: /app
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
+ min_vram_gb_per_gpu: 16
11
+ max_hourly_price_per_gpu: 2
12
  include:
13
  - pro_6000
14
 
15
  Chute:
16
  tee: true
17
+ timeout_seconds: 900
18
+ shutdown_after_seconds: 86400
19
  concurrency: 4
20
  max_instances: 5
21
  scaling_threshold: 0.5
miner.py CHANGED
@@ -52,11 +52,15 @@ class Miner:
52
  so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
53
  self.sess = ort.InferenceSession(
54
  str(Path(path_hf_repo) / self.weights_file),
55
- providers=["CPUExecutionProvider"],
 
 
 
56
  sess_options=so,
57
  )
58
  self.inp = self.sess.get_inputs()[0].name
59
- print("✅ ONNX beverage model loaded")
 
60
 
61
  def __repr__(self) -> str:
62
  return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
 
52
  so.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
53
  self.sess = ort.InferenceSession(
54
  str(Path(path_hf_repo) / self.weights_file),
55
+ providers=[
56
+ ("CUDAExecutionProvider", {"device_id": 0}),
57
+ "CPUExecutionProvider",
58
+ ],
59
  sess_options=so,
60
  )
61
  self.inp = self.sess.get_inputs()[0].name
62
+ active = self.sess.get_providers()[0]
63
+ print(f"✅ ONNX beverage model loaded (provider={active})")
64
 
65
  def __repr__(self) -> str:
66
  return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"