baxtos commited on
Commit
6337d33
·
verified ·
1 Parent(s): 96ceb31

open GPU pool + drop tee + warmup pass in __init__

Browse files

- chute_config: exclude broad pool instead of include:pro_6000, remove tee:true (lower compute_multiplier, wider scheduler pool)
- miner.py: dummy inference in Miner.__init__ to warm CUDA kernels before first /predict

Files changed (2) hide show
  1. chute_config.yml +6 -3
  2. miner.py +8 -0
chute_config.yml CHANGED
@@ -9,11 +9,14 @@ NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
11
  max_hourly_price_per_gpu: 2
12
- include:
13
- - pro_6000
 
 
 
 
14
 
15
  Chute:
16
- tee: true
17
  timeout_seconds: 900
18
  shutdown_after_seconds: 86400
19
  concurrency: 4
 
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
11
  max_hourly_price_per_gpu: 2
12
+ exclude:
13
+ - "5090"
14
+ - b200
15
+ - h200
16
+ - h20
17
+ - mi300x
18
 
19
  Chute:
 
20
  timeout_seconds: 900
21
  shutdown_after_seconds: 86400
22
  concurrency: 4
miner.py CHANGED
@@ -62,6 +62,14 @@ class Miner:
62
  active = self.sess.get_providers()[0]
63
  print(f"✅ ONNX beverage model loaded (provider={active})")
64
 
 
 
 
 
 
 
 
 
65
  def __repr__(self) -> str:
66
  return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
67
 
 
62
  active = self.sess.get_providers()[0]
63
  print(f"✅ ONNX beverage model loaded (provider={active})")
64
 
65
+ # Warm CUDA kernels / ORT graph so the very first /predict isn't slow.
66
+ warm = np.zeros((64, 64, 3), dtype=np.uint8)
67
+ try:
68
+ self._infer(warm)
69
+ print("✅ ONNX warmup pass done")
70
+ except Exception as e:
71
+ print(f"⚠️ ONNX warmup pass failed: {e}")
72
+
73
  def __repr__(self) -> str:
74
  return f"BeverageONNX(in={self.input_size}, cls={self.num_classes})"
75