Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +75 -0
chute_config.yml +13 -0
class_names.txt +4 -0
miner.py +357 -0
model_type.json +18 -0
weights.onnx +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,75 @@

+# Score Vision SN44 — VehicleDetect Miner
+**Wallet:** LukeTao | **Hotkey:** default | **UID:** 128 | **Netuid:** 44
+## Model
+| Property | Value |
+|---|---|
+| Architecture | YOLO11-nano |
+| Input size | 640×640 |
+| Model file | `weights.onnx` |
+| Size | ~11 MB (well under 30 MB limit) |
+| Framework | ONNX Runtime (CUDA EP) |
+| mAP\@50 | **63.05%** (COCO val2017, vehicle classes) |
+## Classes
+| Output ID | Class | COCO Index |
+|---|---|---|
+| 0 | car | 2 |
+| 1 | bus | 5 |
+| 2 | truck | 7 |
+| 3 | motorcycle | 3 |
+## Performance
+Measured on RTX 4090, COCO val2017 images (640×640 letterbox):
+| Metric | Value | Target |
+|---|---|---|
+| Mean FPS (CUDA) | ~371 | ≥ 30 |
+| Mean FPS (CPU)  | ~34  | ≥ 30 |
+| P95 latency (CUDA) | 2.83 ms | < 50 ms |
+| Inference (GPU) | 2.70 ms | — |
+## API
+### `POST /predict`
+**Request:**
+```json
+{"image_b64": "<base64-encoded JPEG/PNG image>"}
+```
+**Response:**
+```json
+{
+  "detections": [
+    {
+      "bbox": [x1, y1, x2, y2],
+      "score": 0.91,
+      "class_id": 0,
+      "class_name": "car"
+    }
+  ],
+  "inference_ms": 2.3,
+  "provider": "CUDAExecutionProvider"
+}
+```
+## Preprocessing
+Standard YOLO letterbox: resize to 640×640 maintaining aspect ratio,
+pad with grey (114, 114, 114), normalise to [0, 1], BGR→RGB, HWC→CHW.
+## Files
+| File | Purpose |
+|---|---|
+| `weights.onnx` | ONNX model (YOLO11-nano, opset 12) |
+| `miner.py` | Chutes endpoint + predict logic |
+| `class_names.txt` | One class name per line |
+| `model_type.json` | Model metadata |
+| `chute_config.yml` | Chutes deployment config |
+| `README.md` | This file |

chute_config.yml ADDED Viewed

	@@ -0,0 +1,13 @@

+Image:
+  from_base: parachutes/python:3.12
+  run_command:
+    - pip install --upgrade setuptools wheel
+    - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python-headless>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
+NodeSelector:
+  gpu_count: 1
+  min_vram_gb_per_gpu: 16
+Chute:
+  timeout_seconds: 300
+  concurrency: 4
+  max_instances: 5
+  scaling_threshold: 0.5

class_names.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+car
+bus
+truck
+motorcycle

miner.py ADDED Viewed

	@@ -0,0 +1,357 @@

+"""
+Score Vision SN44 — VehicleDetect miner endpoint.
+Class mapping (output indices):
+  0 = car        (COCO class 2)
+  1 = bus        (COCO class 5)
+  2 = truck      (COCO class 7)
+  3 = motorcycle (COCO class 3)
+Accepts: base64-encoded image or raw image bytes via chutes cord.
+Returns: list of {bbox: [x1,y1,x2,y2], score: float, class_id: int, class_name: str}
+"""
+from __future__ import annotations
+import base64
+import io
+import os
+import time
+from pathlib import Path
+from typing import Any
+import ctypes
+import cv2
+import numpy as np
+from PIL import Image
+# ── cuDNN preload via ctypes (must happen before onnxruntime import) ──────────
+# os.environ["LD_LIBRARY_PATH"] is too late — the dynamic linker has already
+# resolved paths when this process started.  ctypes.CDLL triggers an explicit
+# dlopen() which works at any point before ort tries to open the CUDA provider.
+def _preload_cuda_libs() -> None:
+    """
+    Explicitly dlopen the CUDA dependency chain before onnxruntime is imported.
+    libcuda.so.1 must come first so cuBLAS/cuDNN resolve their own dependency on it.
+    Without this, onnxruntime CUDAExecutionProvider reports 'no CUDA-capable device'.
+    """
+    _NVIDIA = "/usr/local/lib/python3.12/dist-packages/nvidia"
+    _LIBS = [
+        "/usr/lib/x86_64-linux-gnu/libcuda.so.1",         # driver — must be first
+        f"{_NVIDIA}/cublas/lib/libcublasLt.so.12",
+        f"{_NVIDIA}/cublas/lib/libcublas.so.12",
+        f"{_NVIDIA}/cudnn/lib/libcudnn.so.9",
+    ]
+    for path in _LIBS:
+        if os.path.exists(path):
+            try:
+                ctypes.CDLL(path, mode=ctypes.RTLD_GLOBAL)
+            except OSError:
+                pass  # already loaded or not present — ort will fall back to CPU
+_preload_cuda_libs()
+import onnxruntime as ort  # noqa: E402 — must come after preload
+# ── Constants ────────────────────────────────────────────────────────────────
+MODEL_DIR   = Path(__file__).parent
+WEIGHTS     = MODEL_DIR / "weights.onnx"
+IMG_SIZE    = 640
+CONF_THRESH = 0.25
+IOU_THRESH  = 0.45
+# COCO class index → submission class index
+# car=2→0, bus=5→1, truck=7→2, motorcycle=3→3
+COCO_TO_OUT: dict[int, int] = {2: 0, 5: 1, 7: 2, 3: 3}
+COCO_VEHICLE_IDX = list(COCO_TO_OUT.keys())          # [2, 5, 7, 3]
+OUT_NAMES = ["car", "bus", "truck", "motorcycle"]
+# ── Model loader (singleton) ─────────────────────────────────────────────────
+_SESSION: ort.InferenceSession | None = None
+def get_session() -> ort.InferenceSession:
+    global _SESSION
+    if _SESSION is None:
+        opts = ort.SessionOptions()
+        opts.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        opts.enable_mem_pattern = True
+        opts.enable_mem_reuse   = True
+        cuda_opts = {
+            "device_id": 0,
+            "arena_extend_strategy": "kNextPowerOfTwo",
+            "gpu_mem_limit": 2 * 1024 ** 3,
+            "cudnn_conv_algo_search": "EXHAUSTIVE",
+            "do_copy_in_default_stream": True,
+        }
+        _SESSION = ort.InferenceSession(
+            str(WEIGHTS),
+            sess_options=opts,
+            providers=[
+                ("CUDAExecutionProvider", cuda_opts),
+                "CPUExecutionProvider",
+            ],
+        )
+        provider = _SESSION.get_providers()[0]
+        print(f"[miner] Model loaded. Provider: {provider}", flush=True)
+    return _SESSION
+# ── Preprocessing ────────────────────────────────────────────────────────────
+def letterbox(img: np.ndarray, size: int = IMG_SIZE) -> tuple[np.ndarray, float, int, int]:
+    """Resize + pad to square, return (padded_img, scale_ratio, pad_left, pad_top)."""
+    h, w = img.shape[:2]
+    r = min(size / h, size / w)
+    new_w, new_h = int(round(w * r)), int(round(h * r))
+    img_r = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
+    dw, dh = size - new_w, size - new_h
+    pad_l, pad_t = dw // 2, dh // 2
+    img_p = cv2.copyMakeBorder(
+        img_r, pad_t, dh - pad_t, pad_l, dw - pad_l,
+        cv2.BORDER_CONSTANT, value=(114, 114, 114),
+    )
+    return img_p, r, pad_l, pad_t
+def preprocess(img_bgr: np.ndarray) -> tuple[np.ndarray, float, int, int]:
+    img_p, ratio, pad_l, pad_t = letterbox(img_bgr)
+    img_rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
+    inp = img_rgb.transpose(2, 0, 1).astype(np.float32) * (1.0 / 255.0)
+    return np.ascontiguousarray(inp[np.newaxis]), ratio, pad_l, pad_t
+# ── NMS ─────────────────────────────────────────────────────────��────────────
+def nms(boxes: np.ndarray, scores: np.ndarray, iou_thresh: float = IOU_THRESH) -> list[int]:
+    if not len(boxes):
+        return []
+    x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
+    areas = (x2 - x1) * (y2 - y1)
+    order = scores.argsort()[::-1]
+    keep: list[int] = []
+    while len(order):
+        i = order[0]
+        keep.append(int(i))
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        inter = np.maximum(0, xx2 - xx1) * np.maximum(0, yy2 - yy1)
+        iou = inter / (areas[i] + areas[order[1:]] - inter + 1e-7)
+        order = order[1:][iou <= iou_thresh]
+    return keep
+# ── Postprocessing ───────────────────────────────────────────────────────────
+def postprocess(
+    raw: np.ndarray,
+    ratio: float,
+    pad_l: int,
+    pad_t: int,
+    orig_w: int,
+    orig_h: int,
+) -> list[dict[str, Any]]:
+    """
+    raw: [84, 8400] — first 4 rows cx/cy/w/h, rows 4+ are class scores.
+    Returns list of detection dicts with remapped class_id.
+    """
+    pred = raw  # [84, 8400]
+    # Pre-filter: keep anchors where any vehicle class exceeds conf
+    veh_row_idx = np.array([4 + c for c in COCO_VEHICLE_IDX])   # [4+2, 4+5, 4+7, 4+3]
+    max_veh_score = pred[veh_row_idx].max(axis=0)                # [8400]
+    mask = max_veh_score > CONF_THRESH
+    if not mask.any():
+        return []
+    pred_f = pred[:, mask]                       # [84, N]
+    cx, cy, bw, bh = pred_f[0], pred_f[1], pred_f[2], pred_f[3]
+    x1 = np.clip((cx - bw / 2 - pad_l) / ratio, 0, orig_w)
+    y1 = np.clip((cy - bh / 2 - pad_t) / ratio, 0, orig_h)
+    x2 = np.clip((cx + bw / 2 - pad_l) / ratio, 0, orig_w)
+    y2 = np.clip((cy + bh / 2 - pad_t) / ratio, 0, orig_h)
+    boxes = np.stack([x1, y1, x2, y2], axis=1)  # [N, 4]
+    results: list[dict[str, Any]] = []
+    for coco_cls in COCO_VEHICLE_IDX:
+        scores = pred_f[4 + coco_cls]
+        cls_mask = scores > CONF_THRESH
+        if not cls_mask.any():
+            continue
+        keep = nms(boxes[cls_mask], scores[cls_mask])
+        out_cls = COCO_TO_OUT[coco_cls]
+        for k in keep:
+            box = boxes[cls_mask][k]
+            results.append(
+                {
+                    "bbox": [
+                        float(box[0]), float(box[1]),
+                        float(box[2]), float(box[3]),
+                    ],
+                    "score": float(scores[cls_mask][k]),
+                    "class_id": out_cls,
+                    "class_name": OUT_NAMES[out_cls],
+                }
+            )
+    return results
+# ── Image decoding helpers ───────────────────────────────────────────────────
+def decode_image(data: bytes | str) -> np.ndarray:
+    """Accept raw bytes, base64 string, or base64 bytes."""
+    if isinstance(data, str):
+        data = base64.b64decode(data)
+    elif isinstance(data, (bytes, bytearray)):
+        # Try base64 first; fall back to raw image bytes
+        try:
+            data = base64.b64decode(data)
+        except Exception:
+            pass
+    arr = np.frombuffer(data, dtype=np.uint8)
+    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+    if img is None:
+        # Fallback via PIL for unusual formats (webp, etc.)
+        pil = Image.open(io.BytesIO(data)).convert("RGB")
+        img = cv2.cvtColor(np.array(pil), cv2.COLOR_RGB2BGR)
+    return img
+# ── Core predict function ────────────────────────────────────────────────────
+def predict(image_data: bytes | str | np.ndarray) -> dict[str, Any]:
+    """
+    Main entry point called by the Chutes cord.
+    Args:
+        image_data: raw image bytes, base64-encoded bytes/str, or BGR numpy array.
+    Returns:
+        {
+          "detections": [ {"bbox": [x1,y1,x2,y2], "score": float,
+                           "class_id": int, "class_name": str}, ... ],
+          "inference_ms": float,
+          "provider": str,
+        }
+    """
+    sess = get_session()
+    # Decode
+    if isinstance(image_data, np.ndarray):
+        img_bgr = image_data
+    else:
+        img_bgr = decode_image(image_data)
+    orig_h, orig_w = img_bgr.shape[:2]
+    # Preprocess
+    inp, ratio, pad_l, pad_t = preprocess(img_bgr)
+    # Inference
+    t0 = time.perf_counter()
+    outputs = sess.run(None, {"images": inp})
+    infer_ms = (time.perf_counter() - t0) * 1000.0
+    # Postprocess
+    raw = outputs[0][0]   # [84, 8400]  (squeeze batch dim)
+    detections = postprocess(raw, ratio, pad_l, pad_t, orig_w, orig_h)
+    return {
+        "detections": detections,
+        "inference_ms": round(infer_ms, 3),
+        "provider": sess.get_providers()[0],
+    }
+# ── Chutes cord wrapper ──────────────────────────────────────────────────────
+# The Chutes runtime calls the function decorated with @chute.cord().
+# We guard the import so miner.py is also directly testable without chutes.
+try:
+    from chutes.chute import Chute  # type: ignore
+    chute = Chute(
+        username="LukeTao",
+        name="vehicle-detect-sn44",
+        tagline="YOLOv8n vehicle detector — car, bus, truck, motorcycle",
+        readme=(Path(__file__).parent / "README.md").read_text(),
+        image="parachutes/python:3.12",
+    )
+    @chute.cord("/predict", method="POST")
+    async def predict_cord(image_b64: str) -> dict:
+        """
+        POST /predict
+        Body: {"image_b64": "<base64-encoded image>"}
+        Returns detection JSON.
+        """
+        return predict(image_b64)
+except ImportError:
+    # Running locally without chutes installed — that's fine for testing.
+    pass
+# ── Local test ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    import sys
+    print("=" * 55)
+    print("  miner.py — local smoke test")
+    print("=" * 55)
+    # Build a dummy 1280×720 BGR image (grey with a rectangle)
+    dummy_bgr = np.full((720, 1280, 3), 128, dtype=np.uint8)
+    cv2.rectangle(dummy_bgr, (100, 100), (400, 300), (0, 255, 0), 3)
+    # Optionally test with a real image
+    if len(sys.argv) > 1:
+        img_path = sys.argv[1]
+        loaded = cv2.imread(img_path)
+        if loaded is not None:
+            dummy_bgr = loaded
+            print(f"  Using image : {img_path}  ({loaded.shape[1]}x{loaded.shape[0]})")
+        else:
+            print(f"  Could not load {img_path}, using dummy image.")
+    else:
+        print("  Using synthetic 1280×720 dummy image.")
+    # Test via numpy path
+    result = predict(dummy_bgr)
+    print(f"\n  Provider     : {result['provider']}")
+    print(f"  Inference    : {result['inference_ms']:.2f} ms")
+    print(f"  Detections   : {len(result['detections'])}")
+    for d in result["detections"]:
+        x1, y1, x2, y2 = [round(v, 1) for v in d["bbox"]]
+        print(f"    [{d['class_id']}] {d['class_name']:12s}  score={d['score']:.3f}  "
+              f"bbox=[{x1},{y1},{x2},{y2}]")
+    # Test base64 round-trip
+    print("\n  Testing base64 round-trip...")
+    _, buf = cv2.imencode(".jpg", dummy_bgr)
+    b64 = base64.b64encode(buf.tobytes()).decode()
+    result2 = predict(b64)
+    print(f"  Detections (base64 path): {len(result2['detections'])}")
+    assert result2["provider"] == result["provider"]
+    # Latency over 50 runs
+    print("\n  Latency benchmark (50 runs)...")
+    times = []
+    for _ in range(50):
+        t0 = time.perf_counter()
+        predict(dummy_bgr)
+        times.append((time.perf_counter() - t0) * 1000)
+    times.sort()
+    print(f"  P50={times[25]:.2f}ms  P95={times[47]:.2f}ms  "
+          f"FPS={1000/times[25]:.1f}")
+    p95 = times[47]
+    fps = 1000.0 / times[25]
+    print(f"\n  Target >=30 FPS  : {'PASS' if fps >= 30 else 'FAIL'}")
+    print(f"  Target P95<50ms  : {'PASS' if p95 < 50 else 'FAIL'}")
+    print("=" * 55)
+    print("  Smoke test complete.")
+    print("=" * 55)

model_type.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_type": "yolo11-nano",
+  "task": "detect",
+  "input_size": 640,
+  "num_classes": 4,
+  "class_names": ["car", "bus", "truck", "motorcycle"],
+  "coco_class_map": {
+    "2": 0,
+    "5": 1,
+    "7": 2,
+    "3": 3
+  },
+  "framework": "onnxruntime",
+  "opset": 12,
+  "model_file": "weights.onnx",
+  "conf_threshold": 0.25,
+  "iou_threshold": 0.45
+}

weights.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8792356fd6366ccf69290191d321d928d670ed5226804c901a52aec523a1663
+size 10741269