Sharath33 commited on
Commit
45f16b6
·
verified ·
1 Parent(s): 1cd0b15

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +73 -0
  2. inference.py +131 -0
  3. yolov11_phd_s.onnx +3 -0
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PHD Person + Head Detection — YOLOv11 ONNX Engine
2
+
3
+ Dual-class detection model (Person Head Detection) based on YOLOv11, exported to ONNX and configured for DeepStream/ONNX Runtime inference. Detects both **persons** (class 0) and **heads** (class 1) simultaneously.
4
+
5
+ ## Files
6
+
7
+ | File | Description |
8
+ |---|---|
9
+ | `yolov11_phd_s.onnx` | YOLOv11-small ONNX model weights |
10
+ | `model.phd.cfg` | DeepStream nvinfer configuration |
11
+ | `inference.py` | Standalone ONNX Runtime inference script |
12
+
13
+ ## Model Details
14
+
15
+ | Property | Value |
16
+ |---|---|
17
+ | Architecture | YOLOv11-small |
18
+ | Task | Dual-class detection (person + head) |
19
+ | Classes | `0` — person, `1` — head |
20
+ | Dataset | CrowdHuman |
21
+ | Input format | BGR, NCHW |
22
+ | Scale factor | 0.0039215697906911373 (≈ 1/255) |
23
+
24
+ ## Inference Configuration (`model.phd.cfg`)
25
+
26
+ | Parameter | Value | Description |
27
+ |---|---|---|
28
+ | `gpu-id` | 0 | GPU device index |
29
+ | `model-color-format` | 0 | BGR input (no channel swap) |
30
+ | `net-scale-factor` | 0.0039215697906911373 | Pixel normalization factor |
31
+ | `pre-cluster-threshold` | 0.2 | Confidence threshold for detections |
32
+ | `nms-iou-threshold` | 0.6 | IoU threshold for NMS |
33
+ | `topk` | 300 | Maximum detections kept after NMS |
34
+ | `labelfile-path` | `../models/crowd_human.names` | Class label file |
35
+
36
+ ## Running Standalone Inference
37
+
38
+ ### Requirements
39
+
40
+ ```bash
41
+ pip install onnxruntime-gpu opencv-python numpy
42
+ ```
43
+
44
+ For CPU-only:
45
+ ```bash
46
+ pip install onnxruntime opencv-python numpy
47
+ ```
48
+
49
+ ### Usage
50
+
51
+ Place a test image in the same directory, then:
52
+
53
+ ```bash
54
+ python inference.py
55
+ ```
56
+
57
+ By default the script reads `test_image.jpg`, runs inference, and writes `output.jpg` with bounding boxes drawn.
58
+
59
+ To change the input image or thresholds, edit the config block at the top of `inference.py`:
60
+
61
+ ```python
62
+ MODEL_PATH = "yolov11_phd_s.onnx"
63
+ LABEL_PATH = "../models/crowd_human.names"
64
+ IMAGE_PATH = "test_image.jpg"
65
+ CONF_THRESHOLD = 0.2 # pre-cluster-threshold
66
+ IOU_THRESHOLD = 0.6 # nms-iou-threshold
67
+ TOPK = 300
68
+ ```
69
+
70
+ ### Output
71
+
72
+ - Console: detection count, bounding boxes, and confidence scores
73
+ - `output.jpg`: input image with green bounding boxes and labels overlaid
inference.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import onnxruntime as ort
4
+
5
+ # --- Config (from model.phd.cfg) ---
6
+ MODEL_PATH = "yolov11_phd_s.onnx"
7
+ LABEL_PATH = "../models/crowd_human.names"
8
+ IMAGE_PATH = "test_image.jpg"
9
+ CONF_THRESHOLD = 0.2 # pre-cluster-threshold
10
+ IOU_THRESHOLD = 0.6 # nms-iou-threshold
11
+ NET_SCALE_FACTOR = 0.0039215697906911373 # net-scale-factor (≈1/255)
12
+ MODEL_COLOR_FORMAT = 0 # 0 = BGR (no channel swap)
13
+ TOPK = 300 # topk
14
+
15
+ def load_labels(label_path):
16
+ with open(label_path) as f:
17
+ return [line.strip() for line in f if line.strip()]
18
+
19
+ def load_model(model_path):
20
+ session = ort.InferenceSession(
21
+ model_path,
22
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"] # gpu-id=0, CPU fallback
23
+ )
24
+ input_meta = session.get_inputs()[0]
25
+ input_name = input_meta.name
26
+ _, _, h, w = input_meta.shape # NCHW → extract H, W
27
+ return session, input_name, (h, w)
28
+
29
+ def preprocess(image, input_size):
30
+ """Letterbox resize + normalize."""
31
+ h_in, w_in = input_size
32
+ h_orig, w_orig = image.shape[:2]
33
+
34
+ # Letterbox scaling (preserves aspect ratio)
35
+ scale = min(w_in / w_orig, h_in / h_orig)
36
+ new_w, new_h = int(w_orig * scale), int(h_orig * scale)
37
+ resized = cv2.resize(image, (new_w, new_h))
38
+
39
+ # Pad to input size
40
+ canvas = np.full((h_in, w_in, 3), 114, dtype=np.uint8)
41
+ pad_top = (h_in - new_h) // 2
42
+ pad_left = (w_in - new_w) // 2
43
+ canvas[pad_top:pad_top + new_h, pad_left:pad_left + new_w] = resized
44
+
45
+ # Normalize — model-color-format=0 means BGR input, no channel swap
46
+ img = canvas.astype(np.float32) * NET_SCALE_FACTOR # scale by net-scale-factor
47
+ img = np.transpose(img, (2, 0, 1)) # HWC → CHW
48
+ img = np.expand_dims(img, axis=0) # Add batch dim
49
+
50
+ return img, scale, pad_top, pad_left
51
+
52
+ def postprocess(output, scale, pad_top, pad_left, conf_thresh, iou_thresh):
53
+ """
54
+ YOLOv11 output shape: (1, 4 + num_classes, num_anchors)
55
+ For dual-class (person + head): (1, 6, 8400)
56
+ """
57
+ preds = output[0] # (1, 6, 8400)
58
+ preds = preds[0] # (6, 8400)
59
+ preds = preds.T # (8400, 6) → each row = one anchor
60
+
61
+ boxes_raw = preds[:, :4] # cx, cy, w, h
62
+ class_scores = preds[:, 4:] # (8400, 2) — one score per class
63
+
64
+ # Best class per anchor
65
+ class_ids = np.argmax(class_scores, axis=1)
66
+ scores = class_scores[np.arange(len(class_scores)), class_ids]
67
+
68
+ # Filter by confidence
69
+ mask = scores >= conf_thresh
70
+ boxes_raw = boxes_raw[mask]
71
+ scores = scores[mask]
72
+ class_ids = class_ids[mask]
73
+
74
+ if len(scores) == 0:
75
+ return []
76
+
77
+ # Convert cx,cy,w,h → x1,y1,x2,y2 and undo letterbox
78
+ x1 = (boxes_raw[:, 0] - boxes_raw[:, 2] / 2 - pad_left) / scale
79
+ y1 = (boxes_raw[:, 1] - boxes_raw[:, 3] / 2 - pad_top) / scale
80
+ x2 = (boxes_raw[:, 0] + boxes_raw[:, 2] / 2 - pad_left) / scale
81
+ y2 = (boxes_raw[:, 1] + boxes_raw[:, 3] / 2 - pad_top) / scale
82
+
83
+ boxes_xyxy = np.stack([x1, y1, x2 - x1, y2 - y1], axis=1).astype(int) # for NMS
84
+
85
+ # NMS with topk cap
86
+ indices = cv2.dnn.NMSBoxes(
87
+ boxes_xyxy.tolist(), scores.tolist(), conf_thresh, iou_thresh
88
+ )
89
+
90
+ results = []
91
+ for i in indices[:TOPK]:
92
+ idx = i[0] if isinstance(i, (list, np.ndarray)) else i
93
+ x, y, w, h = boxes_xyxy[idx]
94
+ results.append({
95
+ "bbox": (x, y, x + w, y + h),
96
+ "score": float(scores[idx]),
97
+ "class_id": int(class_ids[idx])
98
+ })
99
+ return results
100
+
101
+ def draw(image, detections, labels):
102
+ for det in detections:
103
+ x1, y1, x2, y2 = det["bbox"]
104
+ label = labels[det["class_id"]] if labels and det["class_id"] < len(labels) else f"class{det['class_id']}"
105
+ cv2.rectangle(image, (x1, y1), (x2, y2), (0, 200, 0), 2)
106
+ cv2.putText(image, f"{label} {det['score']:.2f}",
107
+ (x1, max(y1 - 8, 0)),
108
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 200, 0), 2)
109
+ return image
110
+
111
+ # --- Main ---
112
+ labels = load_labels(LABEL_PATH)
113
+ session, input_name, input_size = load_model(MODEL_PATH)
114
+ print(f"Model input size: {input_size}")
115
+
116
+ image = cv2.imread(IMAGE_PATH)
117
+ tensor, scale, pad_top, pad_left = preprocess(image, input_size)
118
+
119
+ outputs = session.run(None, {input_name: tensor})
120
+
121
+ detections = postprocess(outputs, scale, pad_top, pad_left,
122
+ CONF_THRESHOLD, IOU_THRESHOLD)
123
+
124
+ print(f"Detected {len(detections)} heads")
125
+ for d in detections:
126
+ print(f" BBox: {d['bbox']}, Score: {d['score']:.3f}")
127
+
128
+ result = draw(image.copy(), detections, labels)
129
+ cv2.imwrite("output.jpg", result)
130
+ cv2.imshow("Detections", result)
131
+ cv2.waitKey(0)
yolov11_phd_s.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f0a061b37f1398be76fc344c116a8a6c42fb835583cabe33ddd80812ef4af10
3
+ size 37850759