Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

README.md +73 -0
inference.py +131 -0
yolov11_phd_s.onnx +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+# PHD Person + Head Detection — YOLOv11 ONNX Engine
+Dual-class detection model (Person Head Detection) based on YOLOv11, exported to ONNX and configured for DeepStream/ONNX Runtime inference. Detects both **persons** (class 0) and **heads** (class 1) simultaneously.
+## Files
+| File | Description |
+|---|---|
+| `yolov11_phd_s.onnx` | YOLOv11-small ONNX model weights |
+| `model.phd.cfg` | DeepStream nvinfer configuration |
+| `inference.py` | Standalone ONNX Runtime inference script |
+## Model Details
+| Property | Value |
+|---|---|
+| Architecture | YOLOv11-small |
+| Task | Dual-class detection (person + head) |
+| Classes | `0` — person, `1` — head |
+| Dataset | CrowdHuman |
+| Input format | BGR, NCHW |
+| Scale factor | 0.0039215697906911373 (≈ 1/255) |
+## Inference Configuration (`model.phd.cfg`)
+| Parameter | Value | Description |
+|---|---|---|
+| `gpu-id` | 0 | GPU device index |
+| `model-color-format` | 0 | BGR input (no channel swap) |
+| `net-scale-factor` | 0.0039215697906911373 | Pixel normalization factor |
+| `pre-cluster-threshold` | 0.2 | Confidence threshold for detections |
+| `nms-iou-threshold` | 0.6 | IoU threshold for NMS |
+| `topk` | 300 | Maximum detections kept after NMS |
+| `labelfile-path` | `../models/crowd_human.names` | Class label file |
+## Running Standalone Inference
+### Requirements
+```bash
+pip install onnxruntime-gpu opencv-python numpy
+```
+For CPU-only:
+```bash
+pip install onnxruntime opencv-python numpy
+```
+### Usage
+Place a test image in the same directory, then:
+```bash
+python inference.py
+```
+By default the script reads `test_image.jpg`, runs inference, and writes `output.jpg` with bounding boxes drawn.
+To change the input image or thresholds, edit the config block at the top of `inference.py`:
+```python
+MODEL_PATH = "yolov11_phd_s.onnx"
+LABEL_PATH = "../models/crowd_human.names"
+IMAGE_PATH = "test_image.jpg"
+CONF_THRESHOLD = 0.2   # pre-cluster-threshold
+IOU_THRESHOLD  = 0.6   # nms-iou-threshold
+TOPK           = 300
+```
+### Output
+- Console: detection count, bounding boxes, and confidence scores
+- `output.jpg`: input image with green bounding boxes and labels overlaid

inference.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import cv2
+import numpy as np
+import onnxruntime as ort
+# --- Config (from model.phd.cfg) ---
+MODEL_PATH = "yolov11_phd_s.onnx"
+LABEL_PATH = "../models/crowd_human.names"
+IMAGE_PATH = "test_image.jpg"
+CONF_THRESHOLD = 0.2        # pre-cluster-threshold
+IOU_THRESHOLD = 0.6         # nms-iou-threshold
+NET_SCALE_FACTOR = 0.0039215697906911373  # net-scale-factor (≈1/255)
+MODEL_COLOR_FORMAT = 0      # 0 = BGR (no channel swap)
+TOPK = 300                  # topk
+def load_labels(label_path):
+    with open(label_path) as f:
+        return [line.strip() for line in f if line.strip()]
+def load_model(model_path):
+    session = ort.InferenceSession(
+        model_path,
+        providers=["CUDAExecutionProvider", "CPUExecutionProvider"]  # gpu-id=0, CPU fallback
+    )
+    input_meta = session.get_inputs()[0]
+    input_name = input_meta.name
+    _, _, h, w = input_meta.shape   # NCHW → extract H, W
+    return session, input_name, (h, w)
+def preprocess(image, input_size):
+    """Letterbox resize + normalize."""
+    h_in, w_in = input_size
+    h_orig, w_orig = image.shape[:2]
+    # Letterbox scaling (preserves aspect ratio)
+    scale = min(w_in / w_orig, h_in / h_orig)
+    new_w, new_h = int(w_orig * scale), int(h_orig * scale)
+    resized = cv2.resize(image, (new_w, new_h))
+    # Pad to input size
+    canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8)
+    pad_top = (h_in - new_h) // 2
+    pad_left = (w_in - new_w) // 2
+    canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized
+    # Normalize — model-color-format=0 means BGR input, no channel swap
+    img = canvas.astype(np.float32) * NET_SCALE_FACTOR    # scale by net-scale-factor
+    img = np.transpose(img, (2, 0, 1))                    # HWC → CHW
+    img = np.expand_dims(img, axis=0)                     # Add batch dim
+    return img, scale, pad_top, pad_left
+def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh):
+    """
+    YOLOv11 output shape: (1, 4 + num_classes, num_anchors)
+    For dual-class (person + head): (1, 6, 8400)
+    """
+    preds = output[0]           # (1, 6, 8400)
+    preds = preds[0]            # (6, 8400)
+    preds = preds.T             # (8400, 6) → each row = one anchor
+    boxes_raw = preds[:, :4]    # cx, cy, w, h
+    class_scores = preds[:, 4:] # (8400, 2) — one score per class
+    # Best class per anchor
+    class_ids = np.argmax(class_scores, axis=1)
+    scores = class_scores[np.arange(len(class_scores)), class_ids]
+    # Filter by confidence
+    mask = scores >= conf_thresh
+    boxes_raw = boxes_raw[mask]
+    scores = scores[mask]
+    class_ids = class_ids[mask]
+    if len(scores) == 0:
+        return []
+    # Convert cx,cy,w,h → x1,y1,x2,y2 and undo letterbox
+    x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale
+    y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top)  / scale
+    x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale
+    y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top)  / scale
+    boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int)  # for NMS
+    # NMS with topk cap
+    indices = cv2.dnn.NMSBoxes(
+        boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh
+    )
+    results = []
+    for i in indices[:TOPK]:
+        idx = i[0] if isinstance(i, (list, np.ndarray)) else i
+        x, y, w, h = boxes_xyxy[idx]
+        results.append({
+            "bbox": (x, y, x + w, y + h),
+            "score": float(scores[idx]),
+            "class_id": int(class_ids[idx])
+        })
+    return results
+def draw(image, detections, labels):
+    for det in detections:
+        x1, y1, x2, y2 = det["bbox"]
+        label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}"
+        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2)
+        cv2.putText(image, f"{label} {det['score']:.2f}",
+                    (x1, max(y1 - 8, 0)),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
+    return image
+# --- Main ---
+labels = load_labels(LABEL_PATH)
+session, input_name, input_size = load_model(MODEL_PATH)
+print(f"Model input size: {input_size}")
+image = cv2.imread(IMAGE_PATH)
+tensor, scale, pad_top, pad_left = preprocess(image, input_size)
+outputs = session.run(None, {input_name: tensor})
+detections = postprocess(outputs, scale, pad_top, pad_left,
+                         CONF_THRESHOLD, IOU_THRESHOLD)
+print(f"Detected {len(detections)} heads")
+for d in detections:
+    print(f"  BBox: {d['bbox']}, Score: {d['score']:.3f}")
+result = draw(image.copy(), detections, labels)
+cv2.imwrite("output.jpg", result)
+cv2.imshow("Detections", result)
+cv2.waitKey(0)

yolov11_phd_s.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0a061b37f1398be76fc344c116a8a6c42fb835583cabe33ddd80812ef4af10
+size 37850759