import cv2 import numpy as np import onnxruntime as ort # --- Config (from model.phd.cfg) --- MODEL_PATH = "yolov11_phd_s.onnx" LABEL_PATH = "../models/crowd_human.names" IMAGE_PATH = "test_image.jpg" CONF_THRESHOLD = 0.2 # pre-cluster-threshold IOU_THRESHOLD = 0.6 # nms-iou-threshold NET_SCALE_FACTOR = 0.0039215697906911373 # net-scale-factor (≈1/255) MODEL_COLOR_FORMAT = 0 # 0 = BGR (no channel swap) TOPK = 300 # topk def load_labels(label_path): with open(label_path) as f: return [line.strip() for line in f if line.strip()] def load_model(model_path): session = ort.InferenceSession( model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"] # gpu-id=0, CPU fallback ) input_meta = session.get_inputs()[0] input_name = input_meta.name _, _, h, w = input_meta.shape # NCHW → extract H, W return session, input_name, (h, w) def preprocess(image, input_size): """Letterbox resize + normalize.""" h_in, w_in = input_size h_orig, w_orig = image.shape[:2] # Letterbox scaling (preserves aspect ratio) scale = min(w_in / w_orig, h_in / h_orig) new_w, new_h = int(w_orig * scale), int(h_orig * scale) resized = cv2.resize(image, (new_w, new_h)) # Pad to input size canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8) pad_top = (h_in - new_h) // 2 pad_left = (w_in - new_w) // 2 canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized # Normalize — model-color-format=0 means BGR input, no channel swap img = canvas.astype(np.float32) * NET_SCALE_FACTOR # scale by net-scale-factor img = np.transpose(img, (2, 0, 1)) # HWC → CHW img = np.expand_dims(img, axis=0) # Add batch dim return img, scale, pad_top, pad_left def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh): """ YOLOv11 output shape: (1, 4 + num_classes, num_anchors) For dual-class (person + head): (1, 6, 8400) """ preds = output[0] # (1, 6, 8400) preds = preds[0] # (6, 8400) preds = preds.T # (8400, 6) → each row = one anchor boxes_raw = preds[:, :4] # cx, cy, w, h class_scores = preds[:, 4:] # (8400, 2) — one score per class # Best class per anchor class_ids = np.argmax(class_scores, axis=1) scores = class_scores[np.arange(len(class_scores)), class_ids] # Filter by confidence mask = scores >= conf_thresh boxes_raw = boxes_raw[mask] scores = scores[mask] class_ids = class_ids[mask] if len(scores) == 0: return [] # Convert cx,cy,w,h → x1,y1,x2,y2 and undo letterbox x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) # for NMS # NMS with topk cap indices = cv2.dnn.NMSBoxes( boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh ) results = [] for i in indices[:TOPK]: idx = i[0] if isinstance(i, (list, np.ndarray)) else i x, y, w, h = boxes_xyxy[idx] results.append({ "bbox": (x, y, x + w, y + h), "score": float(scores[idx]), "class_id": int(class_ids[idx]) }) return results def draw(image, detections, labels): for det in detections: x1, y1, x2, y2 = det["bbox"] label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}" cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2) cv2.putText(image, f"{label} {det['score']:.2f}", (x1, max(y1 - 8, 0)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2) return image # --- Main --- labels = load_labels(LABEL_PATH) session, input_name, input_size = load_model(MODEL_PATH) print(f"Model input size: {input_size}") image = cv2.imread(IMAGE_PATH) tensor, scale, pad_top, pad_left = preprocess(image, input_size) outputs = session.run(None, {input_name: tensor}) detections = postprocess(outputs, scale, pad_top, pad_left, CONF_THRESHOLD, IOU_THRESHOLD) print(f"Detected {len(detections)} heads") for d in detections: print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}") result = draw(image.copy(), detections, labels) cv2.imwrite("output.jpg", result) cv2.imshow("Detections", result) cv2.waitKey(0)