licensy commited on
Commit
34d4058
·
verified ·
1 Parent(s): 15ea483

Update miner.py

Browse files
Files changed (1) hide show
  1. miner.py +60 -9
miner.py CHANGED
@@ -38,10 +38,12 @@ class Miner:
38
  )
39
  self.input_name = self.session.get_inputs()[0].name
40
  input_shape = self.session.get_inputs()[0].shape
 
41
  self.input_h = int(input_shape[2])
42
  self.input_w = int(input_shape[3])
43
- self.conf_threshold = 0.15
44
  self.iou_threshold = 0.3
 
45
 
46
  def __repr__(self) -> str:
47
  return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
@@ -65,48 +67,64 @@ class Miner:
65
  def _nms(self, dets: list[tuple[float, float, float, float, float, int]]) -> list[tuple[float, float, float, float, float, int]]:
66
  if not dets:
67
  return []
 
68
  boxes = np.array([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
69
  scores = np.array([d[4] for d in dets], dtype=np.float32)
70
  order = scores.argsort()[::-1]
71
  keep = []
 
72
  while order.size > 0:
73
  i = order[0]
74
  keep.append(i)
 
75
  xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
76
  yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
77
  xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
78
  yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
 
79
  w = np.maximum(0.0, xx2 - xx1)
80
  h = np.maximum(0.0, yy2 - yy1)
81
  inter = w * h
 
82
  area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
83
  area_rest = (boxes[order[1:], 2] - boxes[order[1:], 0]) * (boxes[order[1:], 3] - boxes[order[1:], 1])
84
  union = np.maximum(area_i + area_rest - inter, 1e-6)
85
  iou = inter / union
 
86
  remaining = np.where(iou <= self.iou_threshold)[0]
87
  order = order[remaining + 1]
 
88
  return [dets[idx] for idx in keep]
89
 
90
- def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
 
91
  inp, (orig_h, orig_w) = self._preprocess(image_bgr)
92
  out = self.session.run(None, {self.input_name: inp})[0]
93
  pred = self._normalize_predictions(out)
 
94
  if pred.shape[1] < 5:
95
  return []
 
96
  boxes = pred[:, :4]
97
  cls_scores = pred[:, 4:]
 
98
  if cls_scores.shape[1] == 0:
99
  return []
 
100
  cls_ids = np.argmax(cls_scores, axis=1)
101
  confs = np.max(cls_scores, axis=1)
102
  keep = confs >= self.conf_threshold
 
103
  boxes = boxes[keep]
104
  confs = confs[keep]
105
  cls_ids = cls_ids[keep]
 
106
  if boxes.shape[0] == 0:
107
  return []
 
108
  sx = orig_w / float(self.input_w)
109
  sy = orig_h / float(self.input_h)
 
110
  dets: list[tuple[float, float, float, float, float, int]] = []
111
  for i in range(boxes.shape[0]):
112
  cx, cy, bw, bh = boxes[i].tolist()
@@ -115,25 +133,58 @@ class Miner:
115
  x2 = (cx + bw / 2.0) * sx
116
  y2 = (cy + bh / 2.0) * sy
117
  dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
118
- dets = self._nms(dets)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  out_boxes: list[BoundingBox] = []
120
- for x1, y1, x2, y2, conf, cls_id in dets:
121
  ix1 = max(0, min(orig_w, math.floor(x1)))
122
  iy1 = max(0, min(orig_h, math.floor(y1)))
123
  ix2 = max(0, min(orig_w, math.ceil(x2)))
124
  iy2 = max(0, min(orig_h, math.ceil(y2)))
125
  out_boxes.append(
126
- BoundingBox(x1=ix1, y1=iy1, x2=ix2, y2=iy2,
127
- cls_id=cls_id, conf=max(0.0, min(1.0, conf))))
 
 
 
 
 
 
 
128
  return out_boxes
129
 
130
  def predict_batch(
131
- self, batch_images: list[ndarray], offset: int, n_keypoints: int,
 
 
 
132
  ) -> list[TVFrameResult]:
133
  results: list[TVFrameResult] = []
134
  for idx, image in enumerate(batch_images):
135
  boxes = self._infer_single(image)
136
  keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
137
- results.append(TVFrameResult(
138
- frame_id=offset + idx, boxes=boxes, keypoints=keypoints))
 
 
 
 
 
139
  return results
 
38
  )
39
  self.input_name = self.session.get_inputs()[0].name
40
  input_shape = self.session.get_inputs()[0].shape
41
+ # expected [N, C, H, W]
42
  self.input_h = int(input_shape[2])
43
  self.input_w = int(input_shape[3])
44
+ self.conf_threshold = 0.20
45
  self.iou_threshold = 0.3
46
+ self.use_tta = True
47
 
48
  def __repr__(self) -> str:
49
  return f"ONNX Miner session={type(self.session).__name__} classes={len(self.class_names)}"
 
67
  def _nms(self, dets: list[tuple[float, float, float, float, float, int]]) -> list[tuple[float, float, float, float, float, int]]:
68
  if not dets:
69
  return []
70
+
71
  boxes = np.array([[d[0], d[1], d[2], d[3]] for d in dets], dtype=np.float32)
72
  scores = np.array([d[4] for d in dets], dtype=np.float32)
73
  order = scores.argsort()[::-1]
74
  keep = []
75
+
76
  while order.size > 0:
77
  i = order[0]
78
  keep.append(i)
79
+
80
  xx1 = np.maximum(boxes[i, 0], boxes[order[1:], 0])
81
  yy1 = np.maximum(boxes[i, 1], boxes[order[1:], 1])
82
  xx2 = np.minimum(boxes[i, 2], boxes[order[1:], 2])
83
  yy2 = np.minimum(boxes[i, 3], boxes[order[1:], 3])
84
+
85
  w = np.maximum(0.0, xx2 - xx1)
86
  h = np.maximum(0.0, yy2 - yy1)
87
  inter = w * h
88
+
89
  area_i = (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
90
  area_rest = (boxes[order[1:], 2] - boxes[order[1:], 0]) * (boxes[order[1:], 3] - boxes[order[1:], 1])
91
  union = np.maximum(area_i + area_rest - inter, 1e-6)
92
  iou = inter / union
93
+
94
  remaining = np.where(iou <= self.iou_threshold)[0]
95
  order = order[remaining + 1]
96
+
97
  return [dets[idx] for idx in keep]
98
 
99
+ def _decode(self, image_bgr: ndarray) -> list[tuple[float, float, float, float, float, int]]:
100
+ """Run model and return raw detections before NMS."""
101
  inp, (orig_h, orig_w) = self._preprocess(image_bgr)
102
  out = self.session.run(None, {self.input_name: inp})[0]
103
  pred = self._normalize_predictions(out)
104
+
105
  if pred.shape[1] < 5:
106
  return []
107
+
108
  boxes = pred[:, :4]
109
  cls_scores = pred[:, 4:]
110
+
111
  if cls_scores.shape[1] == 0:
112
  return []
113
+
114
  cls_ids = np.argmax(cls_scores, axis=1)
115
  confs = np.max(cls_scores, axis=1)
116
  keep = confs >= self.conf_threshold
117
+
118
  boxes = boxes[keep]
119
  confs = confs[keep]
120
  cls_ids = cls_ids[keep]
121
+
122
  if boxes.shape[0] == 0:
123
  return []
124
+
125
  sx = orig_w / float(self.input_w)
126
  sy = orig_h / float(self.input_h)
127
+
128
  dets: list[tuple[float, float, float, float, float, int]] = []
129
  for i in range(boxes.shape[0]):
130
  cx, cy, bw, bh = boxes[i].tolist()
 
133
  x2 = (cx + bw / 2.0) * sx
134
  y2 = (cy + bh / 2.0) * sy
135
  dets.append((x1, y1, x2, y2, float(confs[i]), int(cls_ids[i])))
136
+
137
+ return dets
138
+
139
+ def _infer_single(self, image_bgr: ndarray) -> list[BoundingBox]:
140
+ orig_h, orig_w = image_bgr.shape[:2]
141
+
142
+ # Original pass
143
+ all_dets = self._decode(image_bgr)
144
+
145
+ # TTA: horizontal flip pass
146
+ if self.use_tta:
147
+ flipped = cv2.flip(image_bgr, 1)
148
+ flip_dets = self._decode(flipped)
149
+ for x1, y1, x2, y2, conf, cls_id in flip_dets:
150
+ all_dets.append((orig_w - x2, y1, orig_w - x1, y2, conf, cls_id))
151
+
152
+ # NMS
153
+ all_dets = self._nms(all_dets)
154
+
155
  out_boxes: list[BoundingBox] = []
156
+ for x1, y1, x2, y2, conf, cls_id in all_dets:
157
  ix1 = max(0, min(orig_w, math.floor(x1)))
158
  iy1 = max(0, min(orig_h, math.floor(y1)))
159
  ix2 = max(0, min(orig_w, math.ceil(x2)))
160
  iy2 = max(0, min(orig_h, math.ceil(y2)))
161
  out_boxes.append(
162
+ BoundingBox(
163
+ x1=ix1,
164
+ y1=iy1,
165
+ x2=ix2,
166
+ y2=iy2,
167
+ cls_id=cls_id,
168
+ conf=max(0.0, min(1.0, conf)),
169
+ )
170
+ )
171
  return out_boxes
172
 
173
  def predict_batch(
174
+ self,
175
+ batch_images: list[ndarray],
176
+ offset: int,
177
+ n_keypoints: int,
178
  ) -> list[TVFrameResult]:
179
  results: list[TVFrameResult] = []
180
  for idx, image in enumerate(batch_images):
181
  boxes = self._infer_single(image)
182
  keypoints = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
183
+ results.append(
184
+ TVFrameResult(
185
+ frame_id=offset + idx,
186
+ boxes=boxes,
187
+ keypoints=keypoints,
188
+ )
189
+ )
190
  return results