File size: 4,695 Bytes
45f16b6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | import cv2
import numpy as np
import onnxruntime as ort
# --- Config (from model.phd.cfg) ---
MODEL_PATH = "yolov11_phd_s.onnx"
LABEL_PATH = "../models/crowd_human.names"
IMAGE_PATH = "test_image.jpg"
CONF_THRESHOLD = 0.2 # pre-cluster-threshold
IOU_THRESHOLD = 0.6 # nms-iou-threshold
NET_SCALE_FACTOR = 0.0039215697906911373 # net-scale-factor (≈1/255)
MODEL_COLOR_FORMAT = 0 # 0 = BGR (no channel swap)
TOPK = 300 # topk
def load_labels(label_path):
with open(label_path) as f:
return [line.strip() for line in f if line.strip()]
def load_model(model_path):
session = ort.InferenceSession(
model_path,
providers=["CUDAExecutionProvider", "CPUExecutionProvider"] # gpu-id=0, CPU fallback
)
input_meta = session.get_inputs()[0]
input_name = input_meta.name
_, _, h, w = input_meta.shape # NCHW → extract H, W
return session, input_name, (h, w)
def preprocess(image, input_size):
"""Letterbox resize + normalize."""
h_in, w_in = input_size
h_orig, w_orig = image.shape[:2]
# Letterbox scaling (preserves aspect ratio)
scale = min(w_in / w_orig, h_in / h_orig)
new_w, new_h = int(w_orig * scale), int(h_orig * scale)
resized = cv2.resize(image, (new_w, new_h))
# Pad to input size
canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8)
pad_top = (h_in - new_h) // 2
pad_left = (w_in - new_w) // 2
canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized
# Normalize — model-color-format=0 means BGR input, no channel swap
img = canvas.astype(np.float32) * NET_SCALE_FACTOR # scale by net-scale-factor
img = np.transpose(img, (2, 0, 1)) # HWC → CHW
img = np.expand_dims(img, axis=0) # Add batch dim
return img, scale, pad_top, pad_left
def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh):
"""
YOLOv11 output shape: (1, 4 + num_classes, num_anchors)
For dual-class (person + head): (1, 6, 8400)
"""
preds = output[0] # (1, 6, 8400)
preds = preds[0] # (6, 8400)
preds = preds.T # (8400, 6) → each row = one anchor
boxes_raw = preds[:, :4] # cx, cy, w, h
class_scores = preds[:, 4:] # (8400, 2) — one score per class
# Best class per anchor
class_ids = np.argmax(class_scores, axis=1)
scores = class_scores[np.arange(len(class_scores)), class_ids]
# Filter by confidence
mask = scores >= conf_thresh
boxes_raw = boxes_raw[mask]
scores = scores[mask]
class_ids = class_ids[mask]
if len(scores) == 0:
return []
# Convert cx,cy,w,h → x1,y1,x2,y2 and undo letterbox
x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale
y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale
x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale
y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale
boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) # for NMS
# NMS with topk cap
indices = cv2.dnn.NMSBoxes(
boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh
)
results = []
for i in indices[:TOPK]:
idx = i[0] if isinstance(i, (list, np.ndarray)) else i
x, y, w, h = boxes_xyxy[idx]
results.append({
"bbox": (x, y, x + w, y + h),
"score": float(scores[idx]),
"class_id": int(class_ids[idx])
})
return results
def draw(image, detections, labels):
for det in detections:
x1, y1, x2, y2 = det["bbox"]
label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}"
cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2)
cv2.putText(image, f"{label} {det['score']:.2f}",
(x1, max(y1 - 8, 0)),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
return image
# --- Main ---
labels = load_labels(LABEL_PATH)
session, input_name, input_size = load_model(MODEL_PATH)
print(f"Model input size: {input_size}")
image = cv2.imread(IMAGE_PATH)
tensor, scale, pad_top, pad_left = preprocess(image, input_size)
outputs = session.run(None, {input_name: tensor})
detections = postprocess(outputs, scale, pad_top, pad_left,
CONF_THRESHOLD, IOU_THRESHOLD)
print(f"Detected {len(detections)} heads")
for d in detections:
print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}")
result = draw(image.copy(), detections, labels)
cv2.imwrite("output.jpg", result)
cv2.imshow("Detections", result)
cv2.waitKey(0) |