meaculpitt commited on
Commit
e1b0efc
·
verified ·
1 Parent(s): 520f0ed

scorevision: push artifact

Browse files
Files changed (1) hide show
  1. miner.py +170 -334
miner.py CHANGED
@@ -1,339 +1,175 @@
1
- """
2
- Score Vision SN44 — VehicleDetect miner endpoint.
3
-
4
- Class mapping (output indices):
5
- 0 = car (COCO class 2)
6
- 1 = bus (COCO class 5)
7
- 2 = truck (COCO class 7)
8
- 3 = motorcycle (COCO class 3)
9
-
10
- Accepts: base64-encoded image or raw image bytes via chutes cord.
11
- Returns: list of {bbox: [x1,y1,x2,y2], score: float, class_id: int, class_name: str}
12
-
13
- CUDA fix: onnxruntime-gpu finds cuDNN via ldconfig (registered during image build),
14
- with ctypes preload as belt-and-suspenders fallback.
15
- """
16
-
17
- from __future__ import annotations
18
-
19
- import base64
20
- import io
21
- import os
22
- import time
23
  from pathlib import Path
24
- from typing import Any
25
 
26
- import ctypes
27
  import cv2
28
  import numpy as np
29
- from PIL import Image
30
-
31
- # ── cuDNN preload (belt-and-suspenders fallback) ──────────────────────────────
32
- # Primary fix is ldconfig at image build time (see Image builder below).
33
- # This ctypes preload catches any edge cases where ld.so.cache isn't used.
34
- def _preload_cuda_libs() -> None:
35
- _NVIDIA = "/usr/local/lib/python3.12/dist-packages/nvidia"
36
- _LIBS = [
37
- "/usr/lib/x86_64-linux-gnu/libcuda.so.1", # driver stub — must be first
38
- f"{_NVIDIA}/cublas/lib/libcublasLt.so.12",
39
- f"{_NVIDIA}/cublas/lib/libcublas.so.12",
40
- f"{_NVIDIA}/cudnn/lib/libcudnn.so.9",
41
- ]
42
- for path in _LIBS:
43
- if os.path.exists(path):
44
- try:
45
- ctypes.CDLL(path, mode=ctypes.RTLD_GLOBAL)
46
- except OSError:
47
- pass
48
-
49
- _preload_cuda_libs()
50
-
51
- import onnxruntime as ort # noqa: E402 must come after preload
52
-
53
- # ── Constants ────────────────────────────────────────────────────────────────
54
- MODEL_DIR = Path(__file__).parent
55
- WEIGHTS = MODEL_DIR / "weights.onnx"
56
- IMG_SIZE = 640
57
- CONF_THRESH = 0.55 # sweep: max composite score (0.60×mAP + 0.40×FP_score) at conf=0.55
58
- IOU_THRESH = 0.45
59
-
60
- # COCO class index → submission class index
61
- COCO_TO_OUT: dict[int, int] = {2: 0, 5: 1, 7: 2, 3: 3}
62
- COCO_VEHICLE_IDX = list(COCO_TO_OUT.keys())
63
- OUT_NAMES = ["car", "bus", "truck", "motorcycle"]
64
-
65
- # ── Model loader (singleton) ─────────────────────────────────────────────────
66
- _SESSION: ort.InferenceSession | None = None
67
-
68
-
69
- def get_session() -> ort.InferenceSession:
70
- global _SESSION
71
- if _SESSION is None:
72
- opts = ort.SessionOptions()
73
- opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
74
- opts.enable_mem_pattern = True
75
- opts.enable_mem_reuse = True
76
- cuda_opts = {
77
- "device_id": 0,
78
- "arena_extend_strategy": "kNextPowerOfTwo",
79
- "gpu_mem_limit": 2 * 1024 ** 3,
80
- "cudnn_conv_algo_search": "EXHAUSTIVE",
81
- "do_copy_in_default_stream": True,
82
- }
83
- _SESSION = ort.InferenceSession(
84
- str(WEIGHTS),
85
- sess_options=opts,
86
- providers=[
87
- ("CUDAExecutionProvider", cuda_opts),
88
- "CPUExecutionProvider",
89
- ],
90
- )
91
- provider = _SESSION.get_providers()[0]
92
- print(f"[miner] Model loaded. Provider: {provider}", flush=True)
93
- return _SESSION
94
-
95
-
96
- # ── Preprocessing ────────────────────────────────────────────────────────────
97
-
98
- def letterbox(img: np.ndarray, size: int = IMG_SIZE) -> tuple[np.ndarray, float, int, int]:
99
- h, w = img.shape[:2]
100
- r = min(size / h, size / w)
101
- new_w, new_h = int(round(w * r)), int(round(h * r))
102
- img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
103
- dw, dh = size - new_w, size - new_h
104
- pad_l, pad_t = dw // 2, dh // 2
105
- img_p = cv2.copyMakeBorder(
106
- img_r, pad_t, dh - pad_t, pad_l, dw - pad_l,
107
- cv2.BORDER_CONSTANT, value=(114, 114, 114),
108
- )
109
- return img_p, r, pad_l, pad_t
110
-
111
-
112
- def preprocess(img_bgr: np.ndarray) -> tuple[np.ndarray, float, int, int]:
113
- img_p, ratio, pad_l, pad_t = letterbox(img_bgr)
114
- img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
115
- inp = img_rgb.transpose(2, 0, 1).astype(np.float32) * (1.0 / 255.0)
116
- return np.ascontiguousarray(inp[np.newaxis]), ratio, pad_l, pad_t
117
-
118
-
119
- # ── NMS ──────────────────────────────────────────────────────────────────────
120
-
121
- def nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float = IOU_THRESH) -> list[int]:
122
- if not len(boxes):
123
- return []
124
- x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
125
- areas = (x2 - x1) * (y2 - y1)
126
- order = scores.argsort()[::-1]
127
- keep: list[int] = []
128
- while len(order):
129
- i = order[0]
130
- keep.append(int(i))
131
- xx1 = np.maximum(x1[i], x1[order[1:]])
132
- yy1 = np.maximum(y1[i], y1[order[1:]])
133
- xx2 = np.minimum(x2[i], x2[order[1:]])
134
- yy2 = np.minimum(y2[i], y2[order[1:]])
135
- inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
136
- iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-7)
137
- order = order[1:][iou <= iou_thresh]
138
- return keep
139
-
140
-
141
- # ── Postprocessing ───────────────────────────────────────────────────────────
142
-
143
- def postprocess(
144
- raw: np.ndarray,
145
- ratio: float,
146
- pad_l: int,
147
- pad_t: int,
148
- orig_w: int,
149
- orig_h: int,
150
- ) -> list[dict[str, Any]]:
151
- pred = raw # [84, 8400]
152
- veh_row_idx = np.array([4 + c for c in COCO_VEHICLE_IDX])
153
- max_veh_score = pred[veh_row_idx].max(axis=0)
154
- mask = max_veh_score > CONF_THRESH
155
- if not mask.any():
156
- return []
157
-
158
- pred_f = pred[:, mask]
159
- cx, cy, bw, bh = pred_f[0], pred_f[1], pred_f[2], pred_f[3]
160
-
161
- x1 = np.clip((cx - bw / 2 - pad_l) / ratio, 0, orig_w)
162
- y1 = np.clip((cy - bh / 2 - pad_t) / ratio, 0, orig_h)
163
- x2 = np.clip((cx + bw / 2 - pad_l) / ratio, 0, orig_w)
164
- y2 = np.clip((cy + bh / 2 - pad_t) / ratio, 0, orig_h)
165
- boxes = np.stack([x1, y1, x2, y2], axis=1)
166
-
167
- results: list[dict[str, Any]] = []
168
- for coco_cls in COCO_VEHICLE_IDX:
169
- scores = pred_f[4 + coco_cls]
170
- cls_mask = scores > CONF_THRESH
171
- if not cls_mask.any():
172
- continue
173
- keep = nms(boxes[cls_mask], scores[cls_mask])
174
- out_cls = COCO_TO_OUT[coco_cls]
175
- for k in keep:
176
- box = boxes[cls_mask][k]
177
- results.append({
178
- "bbox": [
179
- float(box[0]), float(box[1]),
180
- float(box[2]), float(box[3]),
181
- ],
182
- "score": float(scores[cls_mask][k]),
183
- "class_id": out_cls,
184
- "class_name": OUT_NAMES[out_cls],
185
- })
186
- return results
187
-
188
-
189
- # ── Image decoding helpers ───────────────────────────────────────────────────
190
-
191
- def decode_image(data: bytes | str) -> np.ndarray:
192
- if isinstance(data, str):
193
- data = base64.b64decode(data)
194
- elif isinstance(data, (bytes, bytearray)):
195
- try:
196
- data = base64.b64decode(data)
197
- except Exception:
198
- pass
199
- arr = np.frombuffer(data, dtype=np.uint8)
200
- img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
201
- if img is None:
202
- pil = Image.open(io.BytesIO(data)).convert("RGB")
203
- img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
204
- return img
205
-
206
-
207
- # ── Core predict function ────────────────────────────────────────────────────
208
-
209
- def predict(image_data: bytes | str | np.ndarray) -> dict[str, Any]:
210
- sess = get_session()
211
-
212
- if isinstance(image_data, np.ndarray):
213
- img_bgr = image_data
214
- else:
215
- img_bgr = decode_image(image_data)
216
-
217
- orig_h, orig_w = img_bgr.shape[:2]
218
- inp, ratio, pad_l, pad_t = preprocess(img_bgr)
219
-
220
- t0 = time.perf_counter()
221
- outputs = sess.run(None, {"images": inp})
222
- infer_ms = (time.perf_counter() - t0) * 1000.0
223
-
224
- raw = outputs[0][0] # [84, 8400]
225
- detections = postprocess(raw, ratio, pad_l, pad_t, orig_w, orig_h)
226
-
227
- return {
228
- "detections": detections,
229
- "inference_ms": round(infer_ms, 3),
230
- "provider": sess.get_providers()[0],
231
- }
232
-
233
-
234
- # ── Chutes cord wrapper ──────────────────────────────────────────────────────
235
-
236
- try:
237
- from chutes.chute import Chute
238
- from chutes.chute.node_selector import NodeSelector
239
- from chutes.image import Image as ChuteImage
240
-
241
- chute_image = (
242
- ChuteImage(
243
- username="lculpitt",
244
- name="vehicle-detect-sn44",
245
- tag="v4-cuda",
246
- readme=(Path(__file__).parent / "README.md").read_text(),
247
- )
248
- .from_base("parachutes/python:3.12")
249
- .run_command("pip install --upgrade setuptools wheel")
250
- .run_command(
251
- "pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' "
252
- "'opencv-python-headless>=4.7' 'pillow>=9.5' "
253
- "'huggingface_hub>=0.19.4' 'pydantic>=2.0' "
254
- "'pyyaml>=6.0' 'aiohttp>=3.9'"
255
  )
256
- # Bake cuDNN/cuBLAS paths into the image as Docker ENV so onnxruntime
257
- # CUDAExecutionProvider finds libcudnn.so.9 on every node at container start.
258
- .with_env(
259
- "LD_LIBRARY_PATH",
260
- "/usr/local/lib/python3.12/dist-packages/nvidia/cudnn/lib"
261
- ":/usr/local/lib/python3.12/dist-packages/nvidia/cublas/lib",
262
- )
263
- )
264
-
265
- chute = Chute(
266
- username="lculpitt",
267
- name="vehicle-detect-sn44",
268
- tagline="YOLO11n vehicle detector — car, bus, truck, motorcycle",
269
- readme=(Path(__file__).parent / "README.md").read_text(),
270
- image=chute_image,
271
- concurrency=4,
272
- max_instances=5,
273
- shutdown_after_seconds=300,
274
- scaling_threshold=0.5,
275
- node_selector=NodeSelector(
276
- gpu_count=1,
277
- min_vram_gb_per_gpu=16,
278
- # All CUDA 12.x, all $0.40–$0.85/hr (within 2.5× spread from cheapest)
279
- include=["4090", "a40", "a6000", "l40", "l40s"],
280
- ),
281
- )
282
-
283
- @chute.cord(path="/predict", method="POST")
284
- async def predict_cord(image_b64: str) -> dict:
285
- """
286
- POST /predict
287
- Body: {"image_b64": "<base64-encoded image>"}
288
- Returns detection JSON.
289
- """
290
- return predict(image_b64)
291
-
292
- except ImportError:
293
- pass
294
-
295
-
296
- # ── Local test ───────────────────────────────────────────────────────────────
297
-
298
- if __name__ == "__main__":
299
- import sys
300
-
301
- print("=" * 55)
302
- print(" miner.py local smoke test")
303
- print("=" * 55)
304
-
305
- dummy_bgr = np.full((720, 1280, 3), 128, dtype=np.uint8)
306
- cv2.rectangle(dummy_bgr, (100, 100), (400, 300), (0, 255, 0), 3)
307
-
308
- if len(sys.argv) > 1:
309
- loaded = cv2.imread(sys.argv[1])
310
- if loaded is not None:
311
- dummy_bgr = loaded
312
- print(f" Using image: {sys.argv[1]} ({loaded.shape[1]}x{loaded.shape[0]})")
313
- else:
314
- print(f" Could not load {sys.argv[1]}, using dummy.")
315
- else:
316
- print(" Using synthetic 1280x720 dummy image.")
317
-
318
- result = predict(dummy_bgr)
319
- print(f"\n Provider : {result['provider']}")
320
- print(f" Inference : {result['inference_ms']:.2f} ms")
321
- print(f" Detections : {len(result['detections'])}")
322
- for d in result["detections"]:
323
- x1, y1, x2, y2 = [round(v, 1) for v in d["bbox"]]
324
- print(f" [{d['class_id']}] {d['class_name']:12s} score={d['score']:.3f} "
325
- f"bbox=[{x1},{y1},{x2},{y2}]")
326
-
327
- print("\n Latency benchmark (50 runs)...")
328
- times = []
329
- for _ in range(50):
330
- t0 = time.perf_counter()
331
- predict(dummy_bgr)
332
- times.append((time.perf_counter() - t0) * 1000)
333
- times.sort()
334
- p50, p95 = times[25], times[47]
335
- fps = 1000.0 / p50
336
- print(f" P50={p50:.2f}ms P95={p95:.2f}ms FPS={fps:.1f}")
337
- print(f" Target >=30 FPS : {'PASS' if fps >= 30 else 'FAIL'}")
338
- print(f" Target P95<50ms : {'PASS' if p95 < 50 else 'FAIL'}")
339
- print("=" * 55)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from pathlib import Path
2
+ import math
3
 
 
4
  import cv2
5
  import numpy as np
6
+ import onnxruntime as ort
7
+ from numpy import ndarray
8
+ from pydantic import BaseModel
9
+
10
+
11
+ class BoundingBox(BaseModel):
12
+ x1: int
13
+ y1: int
14
+ x2: int
15
+ y2: int
16
+ cls_id: int
17
+ conf: float
18
+
19
+
20
+ class TVFrameResult(BaseModel):
21
+ frame_id: int
22
+ boxes: list[BoundingBox]
23
+ keypoints: list[tuple[int, int]]
24
+
25
+
26
+ class Miner:
27
+ """
28
+ Auto-generated by subnet_bridge from a Manako element repo.
29
+ This miner is intentionally self-contained for chute import restrictions.
30
+ """
31
+
32
+ def __init__(self, path_hf_repo: Path) -> None:
33
+ self.path_hf_repo = path_hf_repo
34
+ self.class_names = ['person']
35
+ self.session = ort.InferenceSession(
36
+ str(path_hf_repo / "weights.onnx"),
37
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  )
39
+ self.input_name = self.session.get_inputs()[0].name
40
+ input_shape = self.session.get_inputs()[0].shape
41
+ # expected [N, C, H, W]
42
+ self.input_h = int(input_shape[2])
43
+ self.input_w = int(input_shape[3])
44
+ self.conf_threshold = 0.70 # sweep-optimised: max composite 0.65×mAP+0.35×FP_score
45
+ self.iou_threshold = 0.45
46
+
47
+ def __repr__(self) -> str:
48
+ return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
49
+
50
+ def _preprocess(self, image_bgr: ndarray) -> tuple[np.ndarray, tuple[int, int]]:
51
+ h, w = image_bgr.shape[:2]
52
+ rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
53
+ resized = cv2.resize(rgb, (self.input_w, self.input_h))
54
+ x = resized.astype(np.float32) / 255.0
55
+ x = np.transpose(x, (2, 0, 1))[None, ...]
56
+ return x, (h, w)
57
+
58
+ def _normalize_predictions(self, raw: np.ndarray) -> np.ndarray:
59
+ # Common ultralytics export shapes:
60
+ # - [1, C, N] where C=4+num_classes
61
+ # - [1, N, C]
62
+ pred = raw[0]
63
+ if pred.ndim != 2:
64
+ raise ValueError(f"Unexpected prediction shape: {raw.shape}")
65
+ if pred.shape[0] < pred.shape[1]:
66
+ pred = pred.transpose(1, 0)
67
+ return pred
68
+
69
+ def _nms(self, dets: list[tuple[float, float, float, float, float, int]]) -> list[tuple[float, float, float, float, float, int]]:
70
+ if not dets:
71
+ return []
72
+
73
+ boxes = np.array([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
74
+ scores = np.array([d[4] for d in dets], dtype=np.float32)
75
+ order = scores.argsort()[::-1]
76
+ keep = []
77
+
78
+ while order.size > 0:
79
+ i = order[0]
80
+ keep.append(i)
81
+
82
+ xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
83
+ yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
84
+ xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
85
+ yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
86
+
87
+ w = np.maximum(0.0, xx2 - xx1)
88
+ h = np.maximum(0.0, yy2 - yy1)
89
+ inter = w * h
90
+
91
+ area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
92
+ area_rest = (boxes[order[1:], 2] - boxes[order[1:], 0]) * (boxes[order[1:], 3] - boxes[order[1:], 1])
93
+ union = np.maximum(area_i + area_rest - inter, 1e-6)
94
+ iou = inter / union
95
+
96
+ remaining = np.where(iou <= self.iou_threshold)[0]
97
+ order = order[remaining + 1]
98
+
99
+ return [dets[idx] for idx in keep]
100
+
101
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
102
+ inp, (orig_h, orig_w) = self._preprocess(image_bgr)
103
+ out = self.session.run(None, {self.input_name: inp})[0]
104
+ pred = self._normalize_predictions(out)
105
+
106
+ if pred.shape[1] < 5:
107
+ return []
108
+
109
+ boxes = pred[:, :4]
110
+ cls_scores = pred[:, 4:]
111
+
112
+ if cls_scores.shape[1] == 0:
113
+ return []
114
+
115
+ cls_ids = np.argmax(cls_scores, axis=1)
116
+ confs = np.max(cls_scores, axis=1)
117
+ keep = confs >= self.conf_threshold
118
+
119
+ boxes = boxes[keep]
120
+ confs = confs[keep]
121
+ cls_ids = cls_ids[keep]
122
+
123
+ if boxes.shape[0] == 0:
124
+ return []
125
+
126
+ sx = orig_w / float(self.input_w)
127
+ sy = orig_h / float(self.input_h)
128
+
129
+ dets: list[tuple[float, float, float, float, float, int]] = []
130
+ for i in range(boxes.shape[0]):
131
+ cx, cy, bw, bh = boxes[i].tolist()
132
+ x1 = (cx - bw / 2.0) * sx
133
+ y1 = (cy - bh / 2.0) * sy
134
+ x2 = (cx + bw / 2.0) * sx
135
+ y2 = (cy + bh / 2.0) * sy
136
+ dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
137
+
138
+ dets = self._nms(dets)
139
+
140
+ out_boxes: list[BoundingBox] = []
141
+ for x1, y1, x2, y2, conf, cls_id in dets:
142
+ ix1 = max(0, min(orig_w, math.floor(x1)))
143
+ iy1 = max(0, min(orig_h, math.floor(y1)))
144
+ ix2 = max(0, min(orig_w, math.ceil(x2)))
145
+ iy2 = max(0, min(orig_h, math.ceil(y2)))
146
+ out_boxes.append(
147
+ BoundingBox(
148
+ x1=ix1,
149
+ y1=iy1,
150
+ x2=ix2,
151
+ y2=iy2,
152
+ cls_id=cls_id,
153
+ conf=max(0.0, min(1.0, conf)),
154
+ )
155
+ )
156
+ return out_boxes
157
+
158
+ def predict_batch(
159
+ self,
160
+ batch_images: list[ndarray],
161
+ offset: int,
162
+ n_keypoints: int,
163
+ ) -> list[TVFrameResult]:
164
+ results: list[TVFrameResult] = []
165
+ for idx, image in enumerate(batch_images):
166
+ boxes = self._infer_single(image)
167
+ keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
168
+ results.append(
169
+ TVFrameResult(
170
+ frame_id=offset + idx,
171
+ boxes=boxes,
172
+ keypoints=keypoints,
173
+ )
174
+ )
175
+ return results