licensy commited on
Commit
53ff481
·
verified ·
1 Parent(s): 308ca73

Fix CUDA: ldconfig + LD_LIBRARY_PATH, shutdown_after=14400

Browse files
Files changed (3) hide show
  1. __pycache__/miner.cpython-312.pyc +0 -0
  2. chute_config.yml +2 -0
  3. miner.py +17 -60
__pycache__/miner.cpython-312.pyc ADDED
Binary file (9.77 kB). View file
 
chute_config.yml CHANGED
@@ -3,6 +3,7 @@ Image:
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
  - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'torch>=2.8'
 
6
 
7
  NodeSelector:
8
  gpu_count: 1
@@ -13,3 +14,4 @@ Chute:
13
  concurrency: 4
14
  max_instances: 5
15
  scaling_threshold: 0.5
 
 
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
  - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9' 'torch>=2.8'
6
+ - python3 -c "import os,glob;libs=[d for p in ['/usr/local/lib/python3.12/dist-packages','/usr/lib/python3/dist-packages'] for d in glob.glob(os.path.join(p,'nvidia','*','lib'))];open('/etc/ld.so.conf.d/nvidia-pip.conf','w').write('\n'.join(libs)+'\n');print('Registered CUDA libs:',libs)" && ldconfig
7
 
8
  NodeSelector:
9
  gpu_count: 1
 
14
  concurrency: 4
15
  max_instances: 5
16
  scaling_threshold: 0.5
17
+ shutdown_after_seconds: 14400
miner.py CHANGED
@@ -1,5 +1,13 @@
1
  from pathlib import Path
2
  import math
 
 
 
 
 
 
 
 
3
 
4
  import cv2
5
  import numpy as np
@@ -38,12 +46,10 @@ class Miner:
38
  )
39
  self.input_name = self.session.get_inputs()[0].name
40
  input_shape = self.session.get_inputs()[0].shape
41
- # expected [N, C, H, W]
42
  self.input_h = int(input_shape[2])
43
  self.input_w = int(input_shape[3])
44
- self.conf_threshold = 0.20
45
  self.iou_threshold = 0.3
46
- self.use_tta = True
47
 
48
  def __repr__(self) -> str:
49
  return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
@@ -67,64 +73,48 @@ class Miner:
67
  def _nms(self, dets: list[tuple[float, float, float, float, float, int]]) -> list[tuple[float, float, float, float, float, int]]:
68
  if not dets:
69
  return []
70
-
71
  boxes = np.array([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
72
  scores = np.array([d[4] for d in dets], dtype=np.float32)
73
  order = scores.argsort()[::-1]
74
  keep = []
75
-
76
  while order.size > 0:
77
  i = order[0]
78
  keep.append(i)
79
-
80
  xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
81
  yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
82
  xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
83
  yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
84
-
85
  w = np.maximum(0.0, xx2 - xx1)
86
  h = np.maximum(0.0, yy2 - yy1)
87
  inter = w * h
88
-
89
  area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
90
  area_rest = (boxes[order[1:], 2] - boxes[order[1:], 0]) * (boxes[order[1:], 3] - boxes[order[1:], 1])
91
  union = np.maximum(area_i + area_rest - inter, 1e-6)
92
  iou = inter / union
93
-
94
  remaining = np.where(iou <= self.iou_threshold)[0]
95
  order = order[remaining + 1]
96
-
97
  return [dets[idx] for idx in keep]
98
 
99
- def _decode(self, image_bgr: ndarray) -> list[tuple[float, float, float, float, float, int]]:
100
- """Run model and return raw detections before NMS."""
101
  inp, (orig_h, orig_w) = self._preprocess(image_bgr)
102
  out = self.session.run(None, {self.input_name: inp})[0]
103
  pred = self._normalize_predictions(out)
104
-
105
  if pred.shape[1] < 5:
106
  return []
107
-
108
  boxes = pred[:, :4]
109
  cls_scores = pred[:, 4:]
110
-
111
  if cls_scores.shape[1] == 0:
112
  return []
113
-
114
  cls_ids = np.argmax(cls_scores, axis=1)
115
  confs = np.max(cls_scores, axis=1)
116
  keep = confs >= self.conf_threshold
117
-
118
  boxes = boxes[keep]
119
  confs = confs[keep]
120
  cls_ids = cls_ids[keep]
121
-
122
  if boxes.shape[0] == 0:
123
  return []
124
-
125
  sx = orig_w / float(self.input_w)
126
  sy = orig_h / float(self.input_h)
127
-
128
  dets: list[tuple[float, float, float, float, float, int]] = []
129
  for i in range(boxes.shape[0]):
130
  cx, cy, bw, bh = boxes[i].tolist()
@@ -133,58 +123,25 @@ class Miner:
133
  x2 = (cx + bw / 2.0) * sx
134
  y2 = (cy + bh / 2.0) * sy
135
  dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
136
-
137
- return dets
138
-
139
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
140
- orig_h, orig_w = image_bgr.shape[:2]
141
-
142
- # Original pass
143
- all_dets = self._decode(image_bgr)
144
-
145
- # TTA: horizontal flip pass
146
- if self.use_tta:
147
- flipped = cv2.flip(image_bgr, 1)
148
- flip_dets = self._decode(flipped)
149
- for x1, y1, x2, y2, conf, cls_id in flip_dets:
150
- all_dets.append((orig_w - x2, y1, orig_w - x1, y2, conf, cls_id))
151
-
152
- # NMS
153
- all_dets = self._nms(all_dets)
154
-
155
  out_boxes: list[BoundingBox] = []
156
- for x1, y1, x2, y2, conf, cls_id in all_dets:
157
  ix1 = max(0, min(orig_w, math.floor(x1)))
158
  iy1 = max(0, min(orig_h, math.floor(y1)))
159
  ix2 = max(0, min(orig_w, math.ceil(x2)))
160
  iy2 = max(0, min(orig_h, math.ceil(y2)))
161
  out_boxes.append(
162
- BoundingBox(
163
- x1=ix1,
164
- y1=iy1,
165
- x2=ix2,
166
- y2=iy2,
167
- cls_id=cls_id,
168
- conf=max(0.0, min(1.0, conf)),
169
- )
170
- )
171
  return out_boxes
172
 
173
  def predict_batch(
174
- self,
175
- batch_images: list[ndarray],
176
- offset: int,
177
- n_keypoints: int,
178
  ) -> list[TVFrameResult]:
179
  results: list[TVFrameResult] = []
180
  for idx, image in enumerate(batch_images):
181
  boxes = self._infer_single(image)
182
  keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
183
- results.append(
184
- TVFrameResult(
185
- frame_id=offset + idx,
186
- boxes=boxes,
187
- keypoints=keypoints,
188
- )
189
- )
190
  return results
 
1
  from pathlib import Path
2
  import math
3
+ import os
4
+ import glob
5
+ import site
6
+
7
+ # Ensure pip-installed NVIDIA CUDA/cuDNN libraries are discoverable
8
+ for sp in site.getsitepackages():
9
+ for d in glob.glob(os.path.join(sp, 'nvidia', '*', 'lib')):
10
+ os.environ['LD_LIBRARY_PATH'] = d + ':' + os.environ.get('LD_LIBRARY_PATH', '')
11
 
12
  import cv2
13
  import numpy as np
 
46
  )
47
  self.input_name = self.session.get_inputs()[0].name
48
  input_shape = self.session.get_inputs()[0].shape
 
49
  self.input_h = int(input_shape[2])
50
  self.input_w = int(input_shape[3])
51
+ self.conf_threshold = 0.15
52
  self.iou_threshold = 0.3
 
53
 
54
  def __repr__(self) -> str:
55
  return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
 
73
  def _nms(self, dets: list[tuple[float, float, float, float, float, int]]) -> list[tuple[float, float, float, float, float, int]]:
74
  if not dets:
75
  return []
 
76
  boxes = np.array([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
77
  scores = np.array([d[4] for d in dets], dtype=np.float32)
78
  order = scores.argsort()[::-1]
79
  keep = []
 
80
  while order.size > 0:
81
  i = order[0]
82
  keep.append(i)
 
83
  xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
84
  yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
85
  xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
86
  yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
 
87
  w = np.maximum(0.0, xx2 - xx1)
88
  h = np.maximum(0.0, yy2 - yy1)
89
  inter = w * h
 
90
  area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
91
  area_rest = (boxes[order[1:], 2] - boxes[order[1:], 0]) * (boxes[order[1:], 3] - boxes[order[1:], 1])
92
  union = np.maximum(area_i + area_rest - inter, 1e-6)
93
  iou = inter / union
 
94
  remaining = np.where(iou <= self.iou_threshold)[0]
95
  order = order[remaining + 1]
 
96
  return [dets[idx] for idx in keep]
97
 
98
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
 
99
  inp, (orig_h, orig_w) = self._preprocess(image_bgr)
100
  out = self.session.run(None, {self.input_name: inp})[0]
101
  pred = self._normalize_predictions(out)
 
102
  if pred.shape[1] < 5:
103
  return []
 
104
  boxes = pred[:, :4]
105
  cls_scores = pred[:, 4:]
 
106
  if cls_scores.shape[1] == 0:
107
  return []
 
108
  cls_ids = np.argmax(cls_scores, axis=1)
109
  confs = np.max(cls_scores, axis=1)
110
  keep = confs >= self.conf_threshold
 
111
  boxes = boxes[keep]
112
  confs = confs[keep]
113
  cls_ids = cls_ids[keep]
 
114
  if boxes.shape[0] == 0:
115
  return []
 
116
  sx = orig_w / float(self.input_w)
117
  sy = orig_h / float(self.input_h)
 
118
  dets: list[tuple[float, float, float, float, float, int]] = []
119
  for i in range(boxes.shape[0]):
120
  cx, cy, bw, bh = boxes[i].tolist()
 
123
  x2 = (cx + bw / 2.0) * sx
124
  y2 = (cy + bh / 2.0) * sy
125
  dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
126
+ dets = self._nms(dets)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  out_boxes: list[BoundingBox] = []
128
+ for x1, y1, x2, y2, conf, cls_id in dets:
129
  ix1 = max(0, min(orig_w, math.floor(x1)))
130
  iy1 = max(0, min(orig_h, math.floor(y1)))
131
  ix2 = max(0, min(orig_w, math.ceil(x2)))
132
  iy2 = max(0, min(orig_h, math.ceil(y2)))
133
  out_boxes.append(
134
+ BoundingBox(x1=ix1, y1=iy1, x2=ix2, y2=iy2,
135
+ cls_id=cls_id, conf=max(0.0, min(1.0, conf))))
 
 
 
 
 
 
 
136
  return out_boxes
137
 
138
  def predict_batch(
139
+ self, batch_images: list[ndarray], offset: int, n_keypoints: int,
 
 
 
140
  ) -> list[TVFrameResult]:
141
  results: list[TVFrameResult] = []
142
  for idx, image in enumerate(batch_images):
143
  boxes = self._infer_single(image)
144
  keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
145
+ results.append(TVFrameResult(
146
+ frame_id=offset + idx, boxes=boxes, keypoints=keypoints))
 
 
 
 
 
147
  return results