meaculpitt commited on
Commit
93dd8b4
Β·
verified Β·
1 Parent(s): 689e902

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +75 -0
  2. chute_config.yml +13 -0
  3. class_names.txt +4 -0
  4. miner.py +357 -0
  5. model_type.json +18 -0
  6. weights.onnx +3 -0
README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Score Vision SN44 β€” VehicleDetect Miner
2
+
3
+ **Wallet:** LukeTao | **Hotkey:** default | **UID:** 128 | **Netuid:** 44
4
+
5
+ ## Model
6
+
7
+ | Property | Value |
8
+ |---|---|
9
+ | Architecture | YOLO11-nano |
10
+ | Input size | 640Γ—640 |
11
+ | Model file | `weights.onnx` |
12
+ | Size | ~11 MB (well under 30 MB limit) |
13
+ | Framework | ONNX Runtime (CUDA EP) |
14
+ | mAP\@50 | **63.05%** (COCO val2017, vehicle classes) |
15
+
16
+ ## Classes
17
+
18
+ | Output ID | Class | COCO Index |
19
+ |---|---|---|
20
+ | 0 | car | 2 |
21
+ | 1 | bus | 5 |
22
+ | 2 | truck | 7 |
23
+ | 3 | motorcycle | 3 |
24
+
25
+ ## Performance
26
+
27
+ Measured on RTX 4090, COCO val2017 images (640Γ—640 letterbox):
28
+
29
+ | Metric | Value | Target |
30
+ |---|---|---|
31
+ | Mean FPS (CUDA) | ~371 | β‰₯ 30 |
32
+ | Mean FPS (CPU) | ~34 | β‰₯ 30 |
33
+ | P95 latency (CUDA) | 2.83 ms | < 50 ms |
34
+ | Inference (GPU) | 2.70 ms | β€” |
35
+
36
+ ## API
37
+
38
+ ### `POST /predict`
39
+
40
+ **Request:**
41
+ ```json
42
+ {"image_b64": "<base64-encoded JPEG/PNG image>"}
43
+ ```
44
+
45
+ **Response:**
46
+ ```json
47
+ {
48
+ "detections": [
49
+ {
50
+ "bbox": [x1, y1, x2, y2],
51
+ "score": 0.91,
52
+ "class_id": 0,
53
+ "class_name": "car"
54
+ }
55
+ ],
56
+ "inference_ms": 2.3,
57
+ "provider": "CUDAExecutionProvider"
58
+ }
59
+ ```
60
+
61
+ ## Preprocessing
62
+
63
+ Standard YOLO letterbox: resize to 640Γ—640 maintaining aspect ratio,
64
+ pad with grey (114, 114, 114), normalise to [0, 1], BGR→RGB, HWC→CHW.
65
+
66
+ ## Files
67
+
68
+ | File | Purpose |
69
+ |---|---|
70
+ | `weights.onnx` | ONNX model (YOLO11-nano, opset 12) |
71
+ | `miner.py` | Chutes endpoint + predict logic |
72
+ | `class_names.txt` | One class name per line |
73
+ | `model_type.json` | Model metadata |
74
+ | `chute_config.yml` | Chutes deployment config |
75
+ | `README.md` | This file |
chute_config.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Image:
2
+ from_base: parachutes/python:3.12
3
+ run_command:
4
+ - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
+ NodeSelector:
7
+ gpu_count: 1
8
+ min_vram_gb_per_gpu: 16
9
+ Chute:
10
+ timeout_seconds: 300
11
+ concurrency: 4
12
+ max_instances: 5
13
+ scaling_threshold: 0.5
class_names.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ car
2
+ bus
3
+ truck
4
+ motorcycle
miner.py ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Score Vision SN44 β€” VehicleDetect miner endpoint.
3
+
4
+ Class mapping (output indices):
5
+ 0 = car (COCO class 2)
6
+ 1 = bus (COCO class 5)
7
+ 2 = truck (COCO class 7)
8
+ 3 = motorcycle (COCO class 3)
9
+
10
+ Accepts: base64-encoded image or raw image bytes via chutes cord.
11
+ Returns: list of {bbox: [x1,y1,x2,y2], score: float, class_id: int, class_name: str}
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import base64
17
+ import io
18
+ import os
19
+ import time
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ import ctypes
24
+ import cv2
25
+ import numpy as np
26
+ from PIL import Image
27
+
28
+ # ── cuDNN preload via ctypes (must happen before onnxruntime import) ──────────
29
+ # os.environ["LD_LIBRARY_PATH"] is too late β€” the dynamic linker has already
30
+ # resolved paths when this process started. ctypes.CDLL triggers an explicit
31
+ # dlopen() which works at any point before ort tries to open the CUDA provider.
32
+ def _preload_cuda_libs() -> None:
33
+ """
34
+ Explicitly dlopen the CUDA dependency chain before onnxruntime is imported.
35
+ libcuda.so.1 must come first so cuBLAS/cuDNN resolve their own dependency on it.
36
+ Without this, onnxruntime CUDAExecutionProvider reports 'no CUDA-capable device'.
37
+ """
38
+ _NVIDIA = "/usr/local/lib/python3.12/dist-packages/nvidia"
39
+ _LIBS = [
40
+ "/usr/lib/x86_64-linux-gnu/libcuda.so.1", # driver β€” must be first
41
+ f"{_NVIDIA}/cublas/lib/libcublasLt.so.12",
42
+ f"{_NVIDIA}/cublas/lib/libcublas.so.12",
43
+ f"{_NVIDIA}/cudnn/lib/libcudnn.so.9",
44
+ ]
45
+ for path in _LIBS:
46
+ if os.path.exists(path):
47
+ try:
48
+ ctypes.CDLL(path, mode=ctypes.RTLD_GLOBAL)
49
+ except OSError:
50
+ pass # already loaded or not present β€” ort will fall back to CPU
51
+
52
+ _preload_cuda_libs()
53
+
54
+ import onnxruntime as ort # noqa: E402 β€” must come after preload
55
+
56
+ # ── Constants ────────────────────────────────────────────────────────────────
57
+ MODEL_DIR = Path(__file__).parent
58
+ WEIGHTS = MODEL_DIR / "weights.onnx"
59
+ IMG_SIZE = 640
60
+ CONF_THRESH = 0.25
61
+ IOU_THRESH = 0.45
62
+
63
+ # COCO class index β†’ submission class index
64
+ # car=2β†’0, bus=5β†’1, truck=7β†’2, motorcycle=3β†’3
65
+ COCO_TO_OUT: dict[int, int] = {2: 0, 5: 1, 7: 2, 3: 3}
66
+ COCO_VEHICLE_IDX = list(COCO_TO_OUT.keys()) # [2, 5, 7, 3]
67
+ OUT_NAMES = ["car", "bus", "truck", "motorcycle"]
68
+
69
+ # ── Model loader (singleton) ─────────────────────────────────────────────────
70
+ _SESSION: ort.InferenceSession | None = None
71
+
72
+
73
+ def get_session() -> ort.InferenceSession:
74
+ global _SESSION
75
+ if _SESSION is None:
76
+ opts = ort.SessionOptions()
77
+ opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
78
+ opts.enable_mem_pattern = True
79
+ opts.enable_mem_reuse = True
80
+ cuda_opts = {
81
+ "device_id": 0,
82
+ "arena_extend_strategy": "kNextPowerOfTwo",
83
+ "gpu_mem_limit": 2 * 1024 ** 3,
84
+ "cudnn_conv_algo_search": "EXHAUSTIVE",
85
+ "do_copy_in_default_stream": True,
86
+ }
87
+ _SESSION = ort.InferenceSession(
88
+ str(WEIGHTS),
89
+ sess_options=opts,
90
+ providers=[
91
+ ("CUDAExecutionProvider", cuda_opts),
92
+ "CPUExecutionProvider",
93
+ ],
94
+ )
95
+ provider = _SESSION.get_providers()[0]
96
+ print(f"[miner] Model loaded. Provider: {provider}", flush=True)
97
+ return _SESSION
98
+
99
+
100
+ # ── Preprocessing ────────────────────────────────────────────────────────────
101
+
102
+ def letterbox(img: np.ndarray, size: int = IMG_SIZE) -> tuple[np.ndarray, float, int, int]:
103
+ """Resize + pad to square, return (padded_img, scale_ratio, pad_left, pad_top)."""
104
+ h, w = img.shape[:2]
105
+ r = min(size / h, size / w)
106
+ new_w, new_h = int(round(w * r)), int(round(h * r))
107
+ img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
108
+ dw, dh = size - new_w, size - new_h
109
+ pad_l, pad_t = dw // 2, dh // 2
110
+ img_p = cv2.copyMakeBorder(
111
+ img_r, pad_t, dh - pad_t, pad_l, dw - pad_l,
112
+ cv2.BORDER_CONSTANT, value=(114, 114, 114),
113
+ )
114
+ return img_p, r, pad_l, pad_t
115
+
116
+
117
+ def preprocess(img_bgr: np.ndarray) -> tuple[np.ndarray, float, int, int]:
118
+ img_p, ratio, pad_l, pad_t = letterbox(img_bgr)
119
+ img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
120
+ inp = img_rgb.transpose(2, 0, 1).astype(np.float32) * (1.0 / 255.0)
121
+ return np.ascontiguousarray(inp[np.newaxis]), ratio, pad_l, pad_t
122
+
123
+
124
+ # ── NMS ─────────────────────────────────────────────────────────��────────────
125
+
126
+ def nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float = IOU_THRESH) -> list[int]:
127
+ if not len(boxes):
128
+ return []
129
+ x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
130
+ areas = (x2 - x1) * (y2 - y1)
131
+ order = scores.argsort()[::-1]
132
+ keep: list[int] = []
133
+ while len(order):
134
+ i = order[0]
135
+ keep.append(int(i))
136
+ xx1 = np.maximum(x1[i], x1[order[1:]])
137
+ yy1 = np.maximum(y1[i], y1[order[1:]])
138
+ xx2 = np.minimum(x2[i], x2[order[1:]])
139
+ yy2 = np.minimum(y2[i], y2[order[1:]])
140
+ inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
141
+ iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-7)
142
+ order = order[1:][iou <= iou_thresh]
143
+ return keep
144
+
145
+
146
+ # ── Postprocessing ───────────────────────────────────────────────────────────
147
+
148
+ def postprocess(
149
+ raw: np.ndarray,
150
+ ratio: float,
151
+ pad_l: int,
152
+ pad_t: int,
153
+ orig_w: int,
154
+ orig_h: int,
155
+ ) -> list[dict[str, Any]]:
156
+ """
157
+ raw: [84, 8400] β€” first 4 rows cx/cy/w/h, rows 4+ are class scores.
158
+ Returns list of detection dicts with remapped class_id.
159
+ """
160
+ pred = raw # [84, 8400]
161
+
162
+ # Pre-filter: keep anchors where any vehicle class exceeds conf
163
+ veh_row_idx = np.array([4 + c for c in COCO_VEHICLE_IDX]) # [4+2, 4+5, 4+7, 4+3]
164
+ max_veh_score = pred[veh_row_idx].max(axis=0) # [8400]
165
+ mask = max_veh_score > CONF_THRESH
166
+ if not mask.any():
167
+ return []
168
+
169
+ pred_f = pred[:, mask] # [84, N]
170
+ cx, cy, bw, bh = pred_f[0], pred_f[1], pred_f[2], pred_f[3]
171
+
172
+ x1 = np.clip((cx - bw / 2 - pad_l) / ratio, 0, orig_w)
173
+ y1 = np.clip((cy - bh / 2 - pad_t) / ratio, 0, orig_h)
174
+ x2 = np.clip((cx + bw / 2 - pad_l) / ratio, 0, orig_w)
175
+ y2 = np.clip((cy + bh / 2 - pad_t) / ratio, 0, orig_h)
176
+ boxes = np.stack([x1, y1, x2, y2], axis=1) # [N, 4]
177
+
178
+ results: list[dict[str, Any]] = []
179
+ for coco_cls in COCO_VEHICLE_IDX:
180
+ scores = pred_f[4 + coco_cls]
181
+ cls_mask = scores > CONF_THRESH
182
+ if not cls_mask.any():
183
+ continue
184
+ keep = nms(boxes[cls_mask], scores[cls_mask])
185
+ out_cls = COCO_TO_OUT[coco_cls]
186
+ for k in keep:
187
+ box = boxes[cls_mask][k]
188
+ results.append(
189
+ {
190
+ "bbox": [
191
+ float(box[0]), float(box[1]),
192
+ float(box[2]), float(box[3]),
193
+ ],
194
+ "score": float(scores[cls_mask][k]),
195
+ "class_id": out_cls,
196
+ "class_name": OUT_NAMES[out_cls],
197
+ }
198
+ )
199
+ return results
200
+
201
+
202
+ # ── Image decoding helpers ───────────────────────────────────────────────────
203
+
204
+ def decode_image(data: bytes | str) -> np.ndarray:
205
+ """Accept raw bytes, base64 string, or base64 bytes."""
206
+ if isinstance(data, str):
207
+ data = base64.b64decode(data)
208
+ elif isinstance(data, (bytes, bytearray)):
209
+ # Try base64 first; fall back to raw image bytes
210
+ try:
211
+ data = base64.b64decode(data)
212
+ except Exception:
213
+ pass
214
+ arr = np.frombuffer(data, dtype=np.uint8)
215
+ img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
216
+ if img is None:
217
+ # Fallback via PIL for unusual formats (webp, etc.)
218
+ pil = Image.open(io.BytesIO(data)).convert("RGB")
219
+ img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
220
+ return img
221
+
222
+
223
+ # ── Core predict function ────────────────────────────────────────────────────
224
+
225
+ def predict(image_data: bytes | str | np.ndarray) -> dict[str, Any]:
226
+ """
227
+ Main entry point called by the Chutes cord.
228
+
229
+ Args:
230
+ image_data: raw image bytes, base64-encoded bytes/str, or BGR numpy array.
231
+
232
+ Returns:
233
+ {
234
+ "detections": [ {"bbox": [x1,y1,x2,y2], "score": float,
235
+ "class_id": int, "class_name": str}, ... ],
236
+ "inference_ms": float,
237
+ "provider": str,
238
+ }
239
+ """
240
+ sess = get_session()
241
+
242
+ # Decode
243
+ if isinstance(image_data, np.ndarray):
244
+ img_bgr = image_data
245
+ else:
246
+ img_bgr = decode_image(image_data)
247
+
248
+ orig_h, orig_w = img_bgr.shape[:2]
249
+
250
+ # Preprocess
251
+ inp, ratio, pad_l, pad_t = preprocess(img_bgr)
252
+
253
+ # Inference
254
+ t0 = time.perf_counter()
255
+ outputs = sess.run(None, {"images": inp})
256
+ infer_ms = (time.perf_counter() - t0) * 1000.0
257
+
258
+ # Postprocess
259
+ raw = outputs[0][0] # [84, 8400] (squeeze batch dim)
260
+ detections = postprocess(raw, ratio, pad_l, pad_t, orig_w, orig_h)
261
+
262
+ return {
263
+ "detections": detections,
264
+ "inference_ms": round(infer_ms, 3),
265
+ "provider": sess.get_providers()[0],
266
+ }
267
+
268
+
269
+ # ── Chutes cord wrapper ──────────────────────────────────────────────────────
270
+ # The Chutes runtime calls the function decorated with @chute.cord().
271
+ # We guard the import so miner.py is also directly testable without chutes.
272
+
273
+ try:
274
+ from chutes.chute import Chute # type: ignore
275
+
276
+ chute = Chute(
277
+ username="LukeTao",
278
+ name="vehicle-detect-sn44",
279
+ tagline="YOLOv8n vehicle detector β€” car, bus, truck, motorcycle",
280
+ readme=(Path(__file__).parent / "README.md").read_text(),
281
+ image="parachutes/python:3.12",
282
+ )
283
+
284
+ @chute.cord("/predict", method="POST")
285
+ async def predict_cord(image_b64: str) -> dict:
286
+ """
287
+ POST /predict
288
+ Body: {"image_b64": "<base64-encoded image>"}
289
+ Returns detection JSON.
290
+ """
291
+ return predict(image_b64)
292
+
293
+ except ImportError:
294
+ # Running locally without chutes installed β€” that's fine for testing.
295
+ pass
296
+
297
+
298
+ # ── Local test ───────────────────────────────────────────────────────────────
299
+
300
+ if __name__ == "__main__":
301
+ import sys
302
+
303
+ print("=" * 55)
304
+ print(" miner.py β€” local smoke test")
305
+ print("=" * 55)
306
+
307
+ # Build a dummy 1280Γ—720 BGR image (grey with a rectangle)
308
+ dummy_bgr = np.full((720, 1280, 3), 128, dtype=np.uint8)
309
+ cv2.rectangle(dummy_bgr, (100, 100), (400, 300), (0, 255, 0), 3)
310
+
311
+ # Optionally test with a real image
312
+ if len(sys.argv) > 1:
313
+ img_path = sys.argv[1]
314
+ loaded = cv2.imread(img_path)
315
+ if loaded is not None:
316
+ dummy_bgr = loaded
317
+ print(f" Using image : {img_path} ({loaded.shape[1]}x{loaded.shape[0]})")
318
+ else:
319
+ print(f" Could not load {img_path}, using dummy image.")
320
+ else:
321
+ print(" Using synthetic 1280Γ—720 dummy image.")
322
+
323
+ # Test via numpy path
324
+ result = predict(dummy_bgr)
325
+ print(f"\n Provider : {result['provider']}")
326
+ print(f" Inference : {result['inference_ms']:.2f} ms")
327
+ print(f" Detections : {len(result['detections'])}")
328
+ for d in result["detections"]:
329
+ x1, y1, x2, y2 = [round(v, 1) for v in d["bbox"]]
330
+ print(f" [{d['class_id']}] {d['class_name']:12s} score={d['score']:.3f} "
331
+ f"bbox=[{x1},{y1},{x2},{y2}]")
332
+
333
+ # Test base64 round-trip
334
+ print("\n Testing base64 round-trip...")
335
+ _, buf = cv2.imencode(".jpg", dummy_bgr)
336
+ b64 = base64.b64encode(buf.tobytes()).decode()
337
+ result2 = predict(b64)
338
+ print(f" Detections (base64 path): {len(result2['detections'])}")
339
+ assert result2["provider"] == result["provider"]
340
+
341
+ # Latency over 50 runs
342
+ print("\n Latency benchmark (50 runs)...")
343
+ times = []
344
+ for _ in range(50):
345
+ t0 = time.perf_counter()
346
+ predict(dummy_bgr)
347
+ times.append((time.perf_counter() - t0) * 1000)
348
+ times.sort()
349
+ print(f" P50={times[25]:.2f}ms P95={times[47]:.2f}ms "
350
+ f"FPS={1000/times[25]:.1f}")
351
+ p95 = times[47]
352
+ fps = 1000.0 / times[25]
353
+ print(f"\n Target >=30 FPS : {'PASS' if fps >= 30 else 'FAIL'}")
354
+ print(f" Target P95<50ms : {'PASS' if p95 < 50 else 'FAIL'}")
355
+ print("=" * 55)
356
+ print(" Smoke test complete.")
357
+ print("=" * 55)
model_type.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "yolo11-nano",
3
+ "task": "detect",
4
+ "input_size": 640,
5
+ "num_classes": 4,
6
+ "class_names": ["car", "bus", "truck", "motorcycle"],
7
+ "coco_class_map": {
8
+ "2": 0,
9
+ "5": 1,
10
+ "7": 2,
11
+ "3": 3
12
+ },
13
+ "framework": "onnxruntime",
14
+ "opset": 12,
15
+ "model_file": "weights.onnx",
16
+ "conf_threshold": 0.25,
17
+ "iou_threshold": 0.45
18
+ }
weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8792356fd6366ccf69290191d321d928d670ed5226804c901a52aec523a1663
3
+ size 10741269