meaculpitt commited on
Commit
80e163e
Β·
verified Β·
1 Parent(s): db64d20

scorevision: push artifact

Browse files
README.md CHANGED
@@ -1,42 +1,10 @@
1
- ---
2
- tags:
3
- - element_type:detect
4
- - model:yolov11-nano
5
- - object:person
6
- manako:
7
- description: >
8
- YOLOv11-nano fine-tuned for ground-level CCTV person detection on SN44.
9
- Trained on CrowdHuman (15k, dense crowds) + BDD100K street pedestrians.
10
- Conf threshold raised to 0.35 to minimise false positives.
11
- source: meaculpitt/Detect-Person
12
- prompt_hints: null
13
- input_payload:
14
- - name: frame
15
- type: image
16
- description: RGB frame (ground-level CCTV)
17
- output_payload:
18
- - name: detections
19
- type: detections
20
- description: Bounding boxes for detected persons
21
- evaluation_score: 0.5563
22
- last_benchmark:
23
- type: coco_val2017
24
- ran_at: '2026-03-25T02:58:57+00:00'
25
- result_path: null
26
- ---
27
 
28
- # Detect-Person β€” SN44
 
29
 
30
- YOLOv11-nano fine-tuned for ground-level CCTV person detection.
31
-
32
- | Metric | Value |
33
- |--------|-------|
34
- | mAP@50 (COCO val2017) | 55.63% |
35
- | Precision (conf=0.35) | 56.86% |
36
- | Recall | 50.67% |
37
- | Baseline to beat | 37.55% |
38
- | Model size | 5.6 MB |
39
- | Input size | 1280Γ—1280 |
40
-
41
- **Training data**: CrowdHuman (15k) + BDD100K (3.2k pedestrians)
42
- **Validation**: COCO val2017 persons (2,693 images)
 
1
+ # ScoreVision SN44 Unified Miner
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
+ Dual-model approach: vehicle (YOLO11s) + person (YOLO11s).
4
+ Runs both models on every image and merges all detections.
5
 
6
+ ## Classes
7
+ - cls_id 0: bus (vehicle eval) / person (person eval)
8
+ - cls_id 1: car
9
+ - cls_id 2: truck
10
+ - cls_id 3: motorcycle
 
 
 
 
 
 
 
 
__pycache__/miner.cpython-312.pyc CHANGED
Binary files a/__pycache__/miner.cpython-312.pyc and b/__pycache__/miner.cpython-312.pyc differ
 
class_names.txt CHANGED
@@ -1 +1,5 @@
 
 
 
 
1
  person
 
1
+ bus
2
+ car
3
+ truck
4
+ motorcycle
5
  person
miner.py CHANGED
@@ -1,7 +1,17 @@
1
  """
2
- Score Vision SN44 β€” DetectPerson miner v6 (2026-03-27).
3
- TTA (2-pass: original + hflip) + inline WBF. Stretch resize preprocessing.
4
- Single class: person (cls_id=0).
 
 
 
 
 
 
 
 
 
 
5
  """
6
 
7
  from pathlib import Path
@@ -13,75 +23,139 @@ import onnxruntime as ort
13
  from numpy import ndarray
14
  from pydantic import BaseModel
15
 
16
- CONF_THRESH = 0.35
17
- TTA_CONF_THRESH = 0.25
18
- IOU_THRESH = 0.45
19
- WBF_IOU_THR = 0.45
 
 
 
 
 
 
 
 
 
 
 
20
  WBF_SKIP_THR = 0.0001
21
 
22
 
23
- def _wbf(boxes_list: list[np.ndarray], scores_list: list[np.ndarray],
24
- iou_thr: float = 0.45, skip_box_thr: float = 0.0001
25
- ) -> tuple[np.ndarray, np.ndarray]:
26
- """Weighted Boxes Fusion for single-class detection. Boxes in [0,1] normalized coords."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  if not boxes_list:
28
  return np.empty((0, 4)), np.empty(0)
29
 
30
- all_boxes, all_scores = [], []
31
  for bx, sc in zip(boxes_list, scores_list):
32
  for i in range(len(bx)):
33
- if sc[i] < skip_box_thr:
34
  continue
35
- all_boxes.append(bx[i])
36
- all_scores.append(sc[i])
37
 
38
- if not all_boxes:
39
  return np.empty((0, 4)), np.empty(0)
40
 
41
- all_boxes = np.array(all_boxes)
42
- all_scores = np.array(all_scores)
43
-
44
- order = all_scores.argsort()[::-1]
45
- all_boxes = all_boxes[order]
46
- all_scores = all_scores[order]
47
-
48
- clusters: list[list[int]] = []
49
- cluster_boxes: list[np.ndarray] = []
50
-
51
- for i in range(len(all_boxes)):
52
- matched = -1
53
- best_iou = iou_thr
54
- for c_idx, c_box in enumerate(cluster_boxes):
55
- xx1 = max(all_boxes[i, 0], c_box[0])
56
- yy1 = max(all_boxes[i, 1], c_box[1])
57
- xx2 = min(all_boxes[i, 2], c_box[2])
58
- yy2 = min(all_boxes[i, 3], c_box[3])
59
  inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
60
- a1 = (all_boxes[i, 2] - all_boxes[i, 0]) * (all_boxes[i, 3] - all_boxes[i, 1])
61
- a2 = (c_box[2] - c_box[0]) * (c_box[3] - c_box[1])
62
  iou = inter / (a1 + a2 - inter + 1e-9)
63
  if iou > best_iou:
64
  best_iou = iou
65
- matched = c_idx
66
  if matched >= 0:
67
  clusters[matched].append(i)
68
  idxs = clusters[matched]
69
- weights = all_scores[idxs]
70
- w_sum = weights.sum()
71
- cluster_boxes[matched] = (all_boxes[idxs] * weights[:, None]).sum(0) / w_sum
72
  else:
73
  clusters.append([i])
74
- cluster_boxes.append(all_boxes[i].copy())
75
 
76
- fused_boxes, fused_scores = [], []
77
- for c_idx, idxs in enumerate(clusters):
78
- weights = all_scores[idxs]
79
- fused_boxes.append(cluster_boxes[c_idx])
80
- fused_scores.append(weights.mean())
81
 
82
- if not fused_boxes:
83
  return np.empty((0, 4)), np.empty(0)
84
- return np.array(fused_boxes), np.array(fused_scores)
85
 
86
 
87
  class BoundingBox(BaseModel):
@@ -102,127 +176,222 @@ class TVFrameResult(BaseModel):
102
  class Miner:
103
  def __init__(self, path_hf_repo: Path) -> None:
104
  self.path_hf_repo = path_hf_repo
105
- self.class_names = ['person']
106
- self.session = ort.InferenceSession(
107
- str(path_hf_repo / "weights.onnx"),
 
 
 
 
 
 
 
 
108
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
109
  )
110
- self.input_name = self.session.get_inputs()[0].name
111
- input_shape = self.session.get_inputs()[0].shape
112
- self.input_h = int(input_shape[2])
113
- self.input_w = int(input_shape[3])
114
- self.conf_threshold = CONF_THRESH
115
- self.tta_conf_threshold = TTA_CONF_THRESH
116
- self.iou_threshold = IOU_THRESH
117
 
118
  def __repr__(self) -> str:
119
- return f"DetectPerson Miner v6 2-pass TTA + WBF iou={WBF_IOU_THR}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
- def _preprocess(self, image_bgr: ndarray) -> tuple[np.ndarray, tuple[int, int]]:
122
- h, w = image_bgr.shape[:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
124
- resized = cv2.resize(rgb, (self.input_w, self.input_h))
125
  x = resized.astype(np.float32) / 255.0
126
  x = np.transpose(x, (2, 0, 1))[None, ...]
127
- return x, (h, w)
128
 
129
- def _decode_raw(self, raw: np.ndarray, orig_h: int, orig_w: int,
130
- conf_thresh: float | None = None) -> tuple[np.ndarray, np.ndarray]:
131
  pred = raw[0]
132
  if pred.ndim != 2:
133
  return np.empty((0, 4)), np.empty(0)
134
  if pred.shape[0] < pred.shape[1]:
135
- pred = pred.transpose(1, 0)
136
  if pred.shape[1] < 5:
137
  return np.empty((0, 4)), np.empty(0)
138
-
139
- boxes = pred[:, :4]
140
  cls_scores = pred[:, 4:]
141
- if cls_scores.shape[1] == 0:
142
- return np.empty((0, 4)), np.empty(0)
143
-
144
  confs = np.max(cls_scores, axis=1)
145
- thresh = conf_thresh if conf_thresh is not None else self.conf_threshold
146
- keep = confs >= thresh
147
- boxes, confs = boxes[keep], confs[keep]
148
- if boxes.shape[0] == 0:
149
  return np.empty((0, 4)), np.empty(0)
150
-
151
- sx = orig_w / float(self.input_w)
152
- sy = orig_h / float(self.input_h)
153
  cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
154
- x1 = np.clip((cx - bw / 2) * sx, 0, orig_w)
155
- y1 = np.clip((cy - bh / 2) * sy, 0, orig_h)
156
- x2 = np.clip((cx + bw / 2) * sx, 0, orig_w)
157
- y2 = np.clip((cy + bh / 2) * sy, 0, orig_h)
158
  return np.stack([x1, y1, x2, y2], axis=1), confs
159
 
160
- def _run_single_pass(self, image_bgr: ndarray, conf_thresh: float | None = None
161
- ) -> tuple[np.ndarray, np.ndarray]:
162
- orig_h, orig_w = image_bgr.shape[:2]
163
- inp, _ = self._preprocess(image_bgr)
164
- raw = self.session.run(None, {self.input_name: inp})[0]
165
- return self._decode_raw(raw, orig_h, orig_w, conf_thresh)
166
-
167
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
168
- orig_h, orig_w = image_bgr.shape[:2]
169
 
170
- all_boxes, all_scores = [], []
 
 
171
 
172
  def _collect(boxes, confs):
173
  if len(boxes) == 0:
174
  return
175
  norm = boxes.copy()
176
- norm[:, [0, 2]] /= orig_w
177
- norm[:, [1, 3]] /= orig_h
178
  norm = np.clip(norm, 0, 1)
179
- all_boxes.append(norm)
180
- all_scores.append(confs)
181
 
182
- # Pass 1: original (low threshold for TTA)
183
- _collect(*self._run_single_pass(image_bgr, self.tta_conf_threshold))
184
-
185
- # Pass 2: horizontal flip
186
  flipped = cv2.flip(image_bgr, 1)
187
- boxes_f, confs_f = self._run_single_pass(flipped, self.tta_conf_threshold)
188
- if len(boxes_f):
189
- boxes_f[:, 0], boxes_f[:, 2] = orig_w - boxes_f[:, 2], orig_w - boxes_f[:, 0]
190
- _collect(boxes_f, confs_f)
191
-
192
- # (1.2x crop pass REMOVED β€” adds more FPs than TPs)
193
 
194
- if not all_boxes:
195
  return []
196
 
197
- fused_boxes, fused_scores = _wbf(
198
- all_boxes, all_scores,
199
- iou_thr=WBF_IOU_THR, skip_box_thr=WBF_SKIP_THR,
200
- )
201
- if len(fused_boxes) == 0:
202
  return []
203
 
204
- # Denormalize
205
- fused_boxes[:, [0, 2]] *= orig_w
206
- fused_boxes[:, [1, 3]] *= orig_h
207
 
208
- # Apply final conf threshold after WBF
209
- keep = fused_scores >= self.conf_threshold
210
- fused_boxes = fused_boxes[keep]
211
- fused_scores = fused_scores[keep]
212
 
213
- out: list[BoundingBox] = []
214
- for i in range(len(fused_boxes)):
215
- b = fused_boxes[i]
216
  out.append(BoundingBox(
217
- x1=max(0, min(orig_w, math.floor(b[0]))),
218
- y1=max(0, min(orig_h, math.floor(b[1]))),
219
- x2=max(0, min(orig_w, math.ceil(b[2]))),
220
- y2=max(0, min(orig_h, math.ceil(b[3]))),
221
  cls_id=0,
222
- conf=max(0.0, min(1.0, float(fused_scores[i]))),
223
  ))
224
  return out
225
 
 
 
 
 
 
 
 
226
  def predict_batch(
227
  self,
228
  batch_images: list[ndarray],
 
1
  """
2
+ Score Vision SN44 β€” Unified miner v1 (2026-03-27).
3
+ Dual-model: vehicle (YOLO11s) + person (YOLO11s).
4
+
5
+ Vehicle model (vehicle_weights.onnx):
6
+ Trained classes: 0=car, 1=bus, 2=truck, 3=motorcycle
7
+ Remapped to manifest: 0=bus, 1=car, 2=truck, 3=motorcycle
8
+
9
+ Person model (person_weights.onnx):
10
+ Single class: 0=person
11
+
12
+ Both models run on every image. All detections merged.
13
+ cls_id 0 is shared: "bus" for vehicle eval, "person" for person eval.
14
+ Vehicle eval uses cls_id 0-3. Person eval uses cls_id 0 only.
15
  """
16
 
17
  from pathlib import Path
 
23
  from numpy import ndarray
24
  from pydantic import BaseModel
25
 
26
+ # ── Vehicle config ──────────────────────────────────────────────────────────
27
+ VEH_MODEL_TO_OUT: dict[int, int] = {0: 1, 1: 0, 2: 2, 3: 3}
28
+ VEH_NUM_CLASSES = 4
29
+ VEH_IMG_SIZE = 1280
30
+ VEH_CONF_PER_CLASS = {0: 0.33, 1: 0.50, 2: 0.40, 3: 0.36}
31
+ VEH_CONF_DEFAULT = 0.35
32
+ VEH_TTA_CONF = 0.25
33
+ VEH_WBF_IOU = 0.55
34
+
35
+ # ── Person config ───────────────────────────────────────────────────────────
36
+ PER_CONF = 0.35
37
+ PER_TTA_CONF = 0.25
38
+ PER_WBF_IOU = 0.45
39
+
40
+ # ── Shared ──────────────────────────────────────────────────────────────────
41
  WBF_SKIP_THR = 0.0001
42
 
43
 
44
+ def _wbf_multi(boxes_list, scores_list, labels_list, iou_thr=0.55, skip_thr=0.0001):
45
+ """Weighted Boxes Fusion (multi-class). Boxes in [0,1] normalized coords."""
46
+ if not boxes_list:
47
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
48
+
49
+ all_b, all_s, all_l = [], [], []
50
+ for bx, sc, lb in zip(boxes_list, scores_list, labels_list):
51
+ for i in range(len(bx)):
52
+ if sc[i] < skip_thr:
53
+ continue
54
+ all_b.append(bx[i])
55
+ all_s.append(sc[i])
56
+ all_l.append(int(lb[i]))
57
+
58
+ if not all_b:
59
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
60
+
61
+ all_b = np.array(all_b)
62
+ all_s = np.array(all_s)
63
+ all_l = np.array(all_l, dtype=int)
64
+
65
+ fused_b, fused_s, fused_l = [], [], []
66
+ for cls in np.unique(all_l):
67
+ m = all_l == cls
68
+ cb, cs = all_b[m], all_s[m]
69
+ order = cs.argsort()[::-1]
70
+ cb, cs = cb[order], cs[order]
71
+
72
+ clusters, cboxes = [], []
73
+ for i in range(len(cb)):
74
+ matched, best_iou = -1, iou_thr
75
+ for ci, cbox in enumerate(cboxes):
76
+ xx1 = max(cb[i, 0], cbox[0])
77
+ yy1 = max(cb[i, 1], cbox[1])
78
+ xx2 = min(cb[i, 2], cbox[2])
79
+ yy2 = min(cb[i, 3], cbox[3])
80
+ inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
81
+ a1 = (cb[i, 2] - cb[i, 0]) * (cb[i, 3] - cb[i, 1])
82
+ a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
83
+ iou = inter / (a1 + a2 - inter + 1e-9)
84
+ if iou > best_iou:
85
+ best_iou = iou
86
+ matched = ci
87
+ if matched >= 0:
88
+ clusters[matched].append(i)
89
+ idxs = clusters[matched]
90
+ w = cs[idxs]
91
+ cboxes[matched] = (cb[idxs] * w[:, None]).sum(0) / w.sum()
92
+ else:
93
+ clusters.append([i])
94
+ cboxes.append(cb[i].copy())
95
+
96
+ for ci, idxs in enumerate(clusters):
97
+ fused_b.append(cboxes[ci])
98
+ fused_s.append(cs[idxs].mean())
99
+ fused_l.append(cls)
100
+
101
+ if not fused_b:
102
+ return np.empty((0, 4)), np.empty(0), np.empty(0)
103
+ return np.array(fused_b), np.array(fused_s), np.array(fused_l)
104
+
105
+
106
+ def _wbf_single(boxes_list, scores_list, iou_thr=0.45, skip_thr=0.0001):
107
+ """Weighted Boxes Fusion (single-class). Boxes in [0,1] normalized coords."""
108
  if not boxes_list:
109
  return np.empty((0, 4)), np.empty(0)
110
 
111
+ all_b, all_s = [], []
112
  for bx, sc in zip(boxes_list, scores_list):
113
  for i in range(len(bx)):
114
+ if sc[i] < skip_thr:
115
  continue
116
+ all_b.append(bx[i])
117
+ all_s.append(sc[i])
118
 
119
+ if not all_b:
120
  return np.empty((0, 4)), np.empty(0)
121
 
122
+ all_b = np.array(all_b)
123
+ all_s = np.array(all_s)
124
+ order = all_s.argsort()[::-1]
125
+ all_b, all_s = all_b[order], all_s[order]
126
+
127
+ clusters, cboxes = [], []
128
+ for i in range(len(all_b)):
129
+ matched, best_iou = -1, iou_thr
130
+ for ci, cbox in enumerate(cboxes):
131
+ xx1 = max(all_b[i, 0], cbox[0])
132
+ yy1 = max(all_b[i, 1], cbox[1])
133
+ xx2 = min(all_b[i, 2], cbox[2])
134
+ yy2 = min(all_b[i, 3], cbox[3])
 
 
 
 
 
135
  inter = max(0, xx2 - xx1) * max(0, yy2 - yy1)
136
+ a1 = (all_b[i, 2] - all_b[i, 0]) * (all_b[i, 3] - all_b[i, 1])
137
+ a2 = (cbox[2] - cbox[0]) * (cbox[3] - cbox[1])
138
  iou = inter / (a1 + a2 - inter + 1e-9)
139
  if iou > best_iou:
140
  best_iou = iou
141
+ matched = ci
142
  if matched >= 0:
143
  clusters[matched].append(i)
144
  idxs = clusters[matched]
145
+ w = all_s[idxs]
146
+ cboxes[matched] = (all_b[idxs] * w[:, None]).sum(0) / w.sum()
 
147
  else:
148
  clusters.append([i])
149
+ cboxes.append(all_b[i].copy())
150
 
151
+ fused_b, fused_s = [], []
152
+ for ci, idxs in enumerate(clusters):
153
+ fused_b.append(cboxes[ci])
154
+ fused_s.append(all_s[idxs].mean())
 
155
 
156
+ if not fused_b:
157
  return np.empty((0, 4)), np.empty(0)
158
+ return np.array(fused_b), np.array(fused_s)
159
 
160
 
161
  class BoundingBox(BaseModel):
 
176
  class Miner:
177
  def __init__(self, path_hf_repo: Path) -> None:
178
  self.path_hf_repo = path_hf_repo
179
+
180
+ # Vehicle model (YOLO11s, 4 classes)
181
+ self.veh_session = ort.InferenceSession(
182
+ str(path_hf_repo / "vehicle_weights.onnx"),
183
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
184
+ )
185
+ self.veh_input_name = self.veh_session.get_inputs()[0].name
186
+
187
+ # Person model (YOLO11s, 1 class)
188
+ self.per_session = ort.InferenceSession(
189
+ str(path_hf_repo / "person_weights.onnx"),
190
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
191
  )
192
+ self.per_input_name = self.per_session.get_inputs()[0].name
193
+ per_shape = self.per_session.get_inputs()[0].shape
194
+ self.per_h = int(per_shape[2])
195
+ self.per_w = int(per_shape[3])
 
 
 
196
 
197
  def __repr__(self) -> str:
198
+ return "Unified Miner v1 β€” dual-model vehicle+person"
199
+
200
+ # ── Vehicle preprocessing (letterbox) ───────────────────────────────────
201
+
202
+ def _veh_letterbox(self, img):
203
+ h, w = img.shape[:2]
204
+ r = min(VEH_IMG_SIZE / h, VEH_IMG_SIZE / w)
205
+ nw, nh = int(round(w * r)), int(round(h * r))
206
+ img_r = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_LINEAR)
207
+ dw, dh = VEH_IMG_SIZE - nw, VEH_IMG_SIZE - nh
208
+ pl, pt = dw // 2, dh // 2
209
+ img_p = cv2.copyMakeBorder(
210
+ img_r, pt, dh - pt, pl, dw - pl,
211
+ cv2.BORDER_CONSTANT, value=(114, 114, 114),
212
+ )
213
+ return img_p, r, pl, pt
214
 
215
+ def _veh_preprocess(self, image_bgr):
216
+ img_p, ratio, pl, pt = self._veh_letterbox(image_bgr)
217
+ rgb = cv2.cvtColor(img_p, cv2.COLOR_BGR2RGB)
218
+ inp = rgb.astype(np.float32) / 255.0
219
+ inp = np.ascontiguousarray(inp.transpose(2, 0, 1)[np.newaxis])
220
+ return inp, ratio, pl, pt
221
+
222
+ def _veh_decode(self, raw, ratio, pl, pt, ow, oh, conf_thresh):
223
+ pred = raw[0]
224
+ if pred.shape[0] < pred.shape[1]:
225
+ pred = pred.T
226
+ cls_scores = pred[:, 4:]
227
+ cls_ids = np.argmax(cls_scores, axis=1)
228
+ confs = np.max(cls_scores, axis=1)
229
+ mask = confs >= conf_thresh
230
+ if not mask.any():
231
+ return np.empty((0, 4)), np.empty(0), np.empty(0, dtype=int)
232
+ bx, confs, cls_ids = pred[mask, :4], confs[mask], cls_ids[mask]
233
+ cx, cy, bw, bh = bx[:, 0], bx[:, 1], bx[:, 2], bx[:, 3]
234
+ x1 = np.clip((cx - bw / 2 - pl) / ratio, 0, ow)
235
+ y1 = np.clip((cy - bh / 2 - pt) / ratio, 0, oh)
236
+ x2 = np.clip((cx + bw / 2 - pl) / ratio, 0, ow)
237
+ y2 = np.clip((cy + bh / 2 - pt) / ratio, 0, oh)
238
+ return np.stack([x1, y1, x2, y2], axis=1), confs, cls_ids
239
+
240
+ def _veh_run_pass(self, image_bgr, conf_thresh):
241
+ oh, ow = image_bgr.shape[:2]
242
+ inp, ratio, pl, pt = self._veh_preprocess(image_bgr)
243
+ raw = self.veh_session.run(None, {self.veh_input_name: inp})[0]
244
+ return self._veh_decode(raw, ratio, pl, pt, ow, oh, conf_thresh)
245
+
246
+ def _infer_vehicle(self, image_bgr):
247
+ oh, ow = image_bgr.shape[:2]
248
+ all_b, all_s, all_l = [], [], []
249
+
250
+ def _collect(boxes, confs, cls_ids):
251
+ if len(boxes) == 0:
252
+ return
253
+ out_cls = np.array([VEH_MODEL_TO_OUT[int(c)] for c in cls_ids])
254
+ norm = boxes.copy()
255
+ norm[:, [0, 2]] /= ow
256
+ norm[:, [1, 3]] /= oh
257
+ norm = np.clip(norm, 0, 1)
258
+ all_b.append(norm)
259
+ all_s.append(confs)
260
+ all_l.append(out_cls)
261
+
262
+ # Pass 1: original
263
+ _collect(*self._veh_run_pass(image_bgr, VEH_TTA_CONF))
264
+ # Pass 2: hflip
265
+ flipped = cv2.flip(image_bgr, 1)
266
+ bx, sc, cl = self._veh_run_pass(flipped, VEH_TTA_CONF)
267
+ if len(bx):
268
+ bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
269
+ _collect(bx, sc, cl)
270
+
271
+ if not all_b:
272
+ return []
273
+
274
+ fb, fs, fl = _wbf_multi(all_b, all_s, all_l, iou_thr=VEH_WBF_IOU, skip_thr=WBF_SKIP_THR)
275
+ if len(fb) == 0:
276
+ return []
277
+
278
+ fb[:, [0, 2]] *= ow
279
+ fb[:, [1, 3]] *= oh
280
+
281
+ keep = np.array([
282
+ fs[i] >= VEH_CONF_PER_CLASS.get(int(fl[i]), VEH_CONF_DEFAULT)
283
+ for i in range(len(fs))
284
+ ])
285
+ if not keep.any():
286
+ return []
287
+ fb, fs, fl = fb[keep], fs[keep], fl[keep]
288
+
289
+ out = []
290
+ for i in range(len(fb)):
291
+ b = fb[i]
292
+ out.append(BoundingBox(
293
+ x1=max(0, min(ow, math.floor(b[0]))),
294
+ y1=max(0, min(oh, math.floor(b[1]))),
295
+ x2=max(0, min(ow, math.ceil(b[2]))),
296
+ y2=max(0, min(oh, math.ceil(b[3]))),
297
+ cls_id=int(fl[i]),
298
+ conf=max(0.0, min(1.0, float(fs[i]))),
299
+ ))
300
+ return out
301
+
302
+ # ── Person preprocessing (stretch resize) ──────────────────────────────
303
+
304
+ def _per_preprocess(self, image_bgr):
305
  rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
306
+ resized = cv2.resize(rgb, (self.per_w, self.per_h))
307
  x = resized.astype(np.float32) / 255.0
308
  x = np.transpose(x, (2, 0, 1))[None, ...]
309
+ return x
310
 
311
+ def _per_decode(self, raw, oh, ow, conf_thresh):
 
312
  pred = raw[0]
313
  if pred.ndim != 2:
314
  return np.empty((0, 4)), np.empty(0)
315
  if pred.shape[0] < pred.shape[1]:
316
+ pred = pred.T
317
  if pred.shape[1] < 5:
318
  return np.empty((0, 4)), np.empty(0)
 
 
319
  cls_scores = pred[:, 4:]
 
 
 
320
  confs = np.max(cls_scores, axis=1)
321
+ keep = confs >= conf_thresh
322
+ boxes, confs = pred[keep, :4], confs[keep]
323
+ if len(boxes) == 0:
 
324
  return np.empty((0, 4)), np.empty(0)
325
+ sx, sy = ow / float(self.per_w), oh / float(self.per_h)
 
 
326
  cx, cy, bw, bh = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
327
+ x1 = np.clip((cx - bw / 2) * sx, 0, ow)
328
+ y1 = np.clip((cy - bh / 2) * sy, 0, oh)
329
+ x2 = np.clip((cx + bw / 2) * sx, 0, ow)
330
+ y2 = np.clip((cy + bh / 2) * sy, 0, oh)
331
  return np.stack([x1, y1, x2, y2], axis=1), confs
332
 
333
+ def _per_run_pass(self, image_bgr, conf_thresh):
334
+ oh, ow = image_bgr.shape[:2]
335
+ inp = self._per_preprocess(image_bgr)
336
+ raw = self.per_session.run(None, {self.per_input_name: inp})[0]
337
+ return self._per_decode(raw, oh, ow, conf_thresh)
 
 
 
 
338
 
339
+ def _infer_person(self, image_bgr):
340
+ oh, ow = image_bgr.shape[:2]
341
+ all_b, all_s = [], []
342
 
343
  def _collect(boxes, confs):
344
  if len(boxes) == 0:
345
  return
346
  norm = boxes.copy()
347
+ norm[:, [0, 2]] /= ow
348
+ norm[:, [1, 3]] /= oh
349
  norm = np.clip(norm, 0, 1)
350
+ all_b.append(norm)
351
+ all_s.append(confs)
352
 
353
+ # Pass 1: original
354
+ _collect(*self._per_run_pass(image_bgr, PER_TTA_CONF))
355
+ # Pass 2: hflip
 
356
  flipped = cv2.flip(image_bgr, 1)
357
+ bx, sc = self._per_run_pass(flipped, PER_TTA_CONF)
358
+ if len(bx):
359
+ bx[:, 0], bx[:, 2] = ow - bx[:, 2], ow - bx[:, 0]
360
+ _collect(bx, sc)
 
 
361
 
362
+ if not all_b:
363
  return []
364
 
365
+ fb, fs = _wbf_single(all_b, all_s, iou_thr=PER_WBF_IOU, skip_thr=WBF_SKIP_THR)
366
+ if len(fb) == 0:
 
 
 
367
  return []
368
 
369
+ fb[:, [0, 2]] *= ow
370
+ fb[:, [1, 3]] *= oh
 
371
 
372
+ keep = fs >= PER_CONF
373
+ fb, fs = fb[keep], fs[keep]
 
 
374
 
375
+ out = []
376
+ for i in range(len(fb)):
377
+ b = fb[i]
378
  out.append(BoundingBox(
379
+ x1=max(0, min(ow, math.floor(b[0]))),
380
+ y1=max(0, min(oh, math.floor(b[1]))),
381
+ x2=max(0, min(ow, math.ceil(b[2]))),
382
+ y2=max(0, min(oh, math.ceil(b[3]))),
383
  cls_id=0,
384
+ conf=max(0.0, min(1.0, float(fs[i]))),
385
  ))
386
  return out
387
 
388
+ # ── Unified inference ───────────────────────────────────────────────────
389
+
390
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
391
+ vehicle_boxes = self._infer_vehicle(image_bgr)
392
+ person_boxes = self._infer_person(image_bgr)
393
+ return vehicle_boxes + person_boxes
394
+
395
  def predict_batch(
396
  self,
397
  batch_images: list[ndarray],
model_type.json CHANGED
@@ -1 +1 @@
1
- {"task_type": "object-detection", "model_type": "yolov11-nano", "deploy": "2026-03-26T07:46Z"}
 
1
+ {"task_type": "object-detection", "model_type": "yolov11-small-dual", "deploy": "2026-03-27T09:00Z"}
person_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32ed65b9024a69693f675d494c7fc813a964766c54b241464a463377342da60
3
+ size 5607862
vehicle_weights.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3916408ec21f8c94358c18914f922814770b78557e52fe17ff7a9ee74339a5a
3
+ size 19272252