MTerryJack commited on
Commit
f77dea5
·
verified ·
1 Parent(s): dea2cf4

subnet_bridge: copy winning miner repo into library

Browse files
Files changed (6) hide show
  1. README.md +12 -3
  2. chute_config.yml +2 -3
  3. miner.py +248 -80
  4. pyproject.toml +4 -3
  5. readme.md +0 -17
  6. weights.onnx +2 -2
README.md CHANGED
@@ -10,8 +10,17 @@ tags:
10
  manako:
11
  source: winner_fetch
12
  manifest_element_name: manak0/Detect-fire
13
- winner_repo_id: navierstocks/wash
14
- winner_revision: f0cb290789b770cdd918dc49c0c219c3c01bc70b
15
  ---
16
 
17
- ## YOLO26 ONNX detector
 
 
 
 
 
 
 
 
 
 
10
  manako:
11
  source: winner_fetch
12
  manifest_element_name: manak0/Detect-fire
13
+ winner_repo_id: SuperBitDev/fire1
14
+ winner_revision: 0fbc341ae743ebec0d2a1d48bdaaaaa0d6ad9338
15
  ---
16
 
17
+ # Detect-fire-winner
18
+
19
+ Published winning miner converted into a library element.
20
+
21
+ - Source winner repo: `SuperBitDev/fire1`
22
+ - Source revision: `0fbc341ae743ebec0d2a1d48bdaaaaa0d6ad9338`
23
+ - Manifest element: `manak0/Detect-fire`
24
+ - Element type: `Detect`
25
+ - Objects: fire, smoke, fire extinguisher
26
+ - Runtime model type: `onnxruntime`
chute_config.yml CHANGED
@@ -2,13 +2,12 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
- - pip install torch torchvision
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
11
- max_hourly_price_per_gpu: 2
12
  include:
13
  - pro_6000
14
 
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
+ - pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
 
11
  include:
12
  - pro_6000
13
 
miner.py CHANGED
@@ -24,25 +24,56 @@ class TVFrameResult(BaseModel):
24
 
25
 
26
  class Miner:
27
- """ONNX Runtime miner. Hard per-class NMS + cross-class dedup + flip TTA, with per-class conf bonus rescue."""
28
 
29
- class_names = ["broom", "drainage gate", "nozzle", "track"]
30
- input_size = 1280
31
- iou_thres = 0.4
 
 
 
 
 
 
 
 
 
32
  cross_iou_thresh = 0.8
33
- max_det = 300
34
- _conf_thres_array = np.array([0.55, 0.6, 0.7, 0.5], dtype=np.float32)
35
- _bonus_array = np.array([0.05, 0.5, 0.0, 0.2], dtype=np.float32)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  def __init__(self, path_hf_repo: Path) -> None:
38
  model_path = path_hf_repo / "weights.onnx"
 
 
 
 
39
  print("ORT version:", ort.__version__)
40
 
41
  try:
42
  ort.preload_dlls()
43
- print("preload_dlls success")
44
  except Exception as e:
45
- print(f"preload_dlls failed: {e}")
46
 
47
  print("ORT available providers BEFORE session:", ort.get_available_providers())
48
 
@@ -55,9 +86,9 @@ class Miner:
55
  sess_options=sess_options,
56
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
57
  )
58
- print("Created ORT session with preferred CUDA provider list")
59
  except Exception as e:
60
- print(f"CUDA session creation failed, falling back to CPU: {e}")
61
  self.session = ort.InferenceSession(
62
  str(model_path),
63
  sess_options=sess_options,
@@ -75,15 +106,19 @@ class Miner:
75
  self.output_names = [output.name for output in self.session.get_outputs()]
76
  self.input_shape = self.session.get_inputs()[0].shape
77
 
78
- self.input_height = self._safe_dim(self.input_shape[2], default=self.input_size)
79
- self.input_width = self._safe_dim(self.input_shape[3], default=self.input_size)
80
 
81
- print(f"ONNX model loaded from: {model_path}")
82
- print(f"ONNX providers: {self.session.get_providers()}")
83
- print(f"ONNX input: name={self.input_name}, shape={self.input_shape}")
 
 
84
  print("per-class conf: " + ", ".join(
85
- f"{n}={t:.3f}" for n, t in zip(self.class_names,
86
- self._conf_thres_array.tolist())))
 
 
87
 
88
  def __repr__(self) -> str:
89
  return (
@@ -95,32 +130,44 @@ class Miner:
95
  def _safe_dim(value, default: int) -> int:
96
  return value if isinstance(value, int) and value > 0 else default
97
 
98
- def _letterbox(self, image: ndarray, new_shape: tuple[int, int],
99
- color=(114, 114, 114)
100
- ) -> tuple[ndarray, float, tuple[float, float]]:
 
 
 
101
  h, w = image.shape[:2]
102
  new_w, new_h = new_shape
 
103
  ratio = min(new_w / w, new_h / h)
104
  resized_w = int(round(w * ratio))
105
  resized_h = int(round(h * ratio))
 
106
  if (resized_w, resized_h) != (w, h):
107
  interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
108
  image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
 
109
  dw = (new_w - resized_w) / 2.0
110
  dh = (new_h - resized_h) / 2.0
 
111
  left = int(round(dw - 0.1))
112
  right = int(round(dw + 0.1))
113
  top = int(round(dh - 0.1))
114
  bottom = int(round(dh + 0.1))
115
- padded = cv2.copyMakeBorder(image, top, bottom, left, right,
116
- borderType=cv2.BORDER_CONSTANT, value=color)
 
 
 
117
  return padded, ratio, (dw, dh)
118
 
119
- def _preprocess(self, image: ndarray
120
- ) -> tuple[np.ndarray, float, tuple[float, float],
121
- tuple[int, int]]:
122
  orig_h, orig_w = image.shape[:2]
123
- img, ratio, pad = self._letterbox(image, (self.input_width, self.input_height))
 
 
124
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
125
  img = img.astype(np.float32) / 255.0
126
  img = np.transpose(img, (2, 0, 1))[None, ...]
@@ -146,8 +193,9 @@ class Miner:
146
  return out
147
 
148
  @staticmethod
149
- def _hard_nms(boxes: np.ndarray, scores: np.ndarray,
150
- iou_thresh: float) -> np.ndarray:
 
151
  n = len(boxes)
152
  if n == 0:
153
  return np.array([], dtype=np.intp)
@@ -172,9 +220,13 @@ class Miner:
172
  order = rest[iou <= iou_thresh]
173
  return np.array(keep, dtype=np.intp)
174
 
175
- def _per_class_hard_nms(self, boxes: np.ndarray, scores: np.ndarray,
176
- cls_ids: np.ndarray, iou_thresh: float
177
- ) -> np.ndarray:
 
 
 
 
178
  if len(boxes) == 0:
179
  return np.array([], dtype=np.intp)
180
  all_keep: list[int] = []
@@ -186,9 +238,20 @@ class Miner:
186
  all_keep.sort()
187
  return np.array(all_keep, dtype=np.intp)
188
 
189
- def _cross_class_dedup_op(self, boxes: np.ndarray, scores: np.ndarray,
190
- cls_ids: np.ndarray, iou_thresh: float
191
- ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
 
 
 
 
 
 
 
 
 
 
 
192
  n = len(boxes)
193
  if n <= 1:
194
  return boxes, scores, cls_ids
@@ -220,12 +283,20 @@ class Miner:
220
  return boxes[keep_idx], scores[keep_idx], cls_ids[keep_idx]
221
 
222
  @staticmethod
223
- def _max_score_per_cluster(post_boxes: np.ndarray,
224
- post_cls: np.ndarray,
225
- full_boxes: np.ndarray,
226
- full_scores: np.ndarray,
227
- full_cls: np.ndarray,
228
- iou_thresh: float) -> np.ndarray:
 
 
 
 
 
 
 
 
229
  n = len(post_boxes)
230
  if n == 0:
231
  return np.empty(0, dtype=np.float32)
@@ -245,10 +316,11 @@ class Miner:
245
  out[i] = float(np.max(full_scores[cluster])) if np.any(cluster) else 0.0
246
  return out
247
 
248
- def _conf_filter_mask(self, scores: np.ndarray,
249
- cls_ids: np.ndarray) -> np.ndarray:
 
250
  """Boolean keep-mask: score >= per-class threshold, with a per-class
251
- rescue if a class has zero boxes passing, admit its top-1 candidate
252
  when its score >= (per-class threshold - per-class bonus)."""
253
  if len(scores) == 0:
254
  return np.zeros(0, dtype=bool)
@@ -267,9 +339,52 @@ class Miner:
267
  keep[top] = True
268
  return keep
269
 
270
- def _per_view_pipeline(self, boxes: np.ndarray, scores: np.ndarray,
271
- cls_ids: np.ndarray
272
- ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  if len(boxes) > 1:
274
  keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
275
  boxes, scores, cls_ids = boxes[keep], scores[keep], cls_ids[keep]
@@ -282,9 +397,35 @@ class Miner:
282
  )
283
  return boxes, scores, cls_ids
284
 
285
- def _decode_final_dets(self, preds: np.ndarray, ratio: float,
286
- pad: tuple[float, float],
287
- orig_size: tuple[int, int]) -> list[BoundingBox]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
288
  if preds.ndim == 3 and preds.shape[0] == 1:
289
  preds = preds[0]
290
  if preds.ndim != 2 or preds.shape[1] < 6:
@@ -293,6 +434,7 @@ class Miner:
293
  boxes = preds[:, :4].astype(np.float32)
294
  scores = preds[:, 4].astype(np.float32)
295
  cls_ids = preds[:, 5].astype(np.int32)
 
296
 
297
  keep = self._conf_filter_mask(scores, cls_ids)
298
  boxes = boxes[keep]
@@ -307,12 +449,23 @@ class Miner:
307
  boxes /= ratio
308
  boxes = self._clip_boxes(boxes, orig_size)
309
 
 
 
 
 
 
 
310
  boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
311
  return self._build_results(boxes, scores, cls_ids)
312
 
313
- def _decode_raw_yolo(self, preds: np.ndarray, ratio: float,
314
- pad: tuple[float, float],
315
- orig_size: tuple[int, int]) -> list[BoundingBox]:
 
 
 
 
 
316
  if preds.ndim != 3 or preds.shape[0] != 1:
317
  raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
318
  preds = preds[0]
@@ -329,6 +482,7 @@ class Miner:
329
  else:
330
  cls_ids = np.argmax(cls_part, axis=1).astype(np.int32)
331
  scores = cls_part[np.arange(len(cls_part)), cls_ids]
 
332
 
333
  keep = self._conf_filter_mask(scores, cls_ids)
334
  boxes_xywh = boxes_xywh[keep]
@@ -344,32 +498,22 @@ class Miner:
344
  boxes /= ratio
345
  boxes = self._clip_boxes(boxes, orig_size)
346
 
 
 
 
 
 
 
347
  boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
348
  return self._build_results(boxes, scores, cls_ids)
349
 
350
- @staticmethod
351
- def _build_results(boxes: np.ndarray, scores: np.ndarray,
352
- cls_ids: np.ndarray) -> list[BoundingBox]:
353
- results: list[BoundingBox] = []
354
- for box, conf, cls_id in zip(boxes, scores, cls_ids):
355
- x1, y1, x2, y2 = box.tolist()
356
- if x2 <= x1 or y2 <= y1:
357
- continue
358
- results.append(
359
- BoundingBox(
360
- x1=int(math.floor(x1)),
361
- y1=int(math.floor(y1)),
362
- x2=int(math.ceil(x2)),
363
- y2=int(math.ceil(y2)),
364
- cls_id=int(cls_id),
365
- conf=float(conf),
366
- )
367
- )
368
- return results
369
-
370
- def _postprocess(self, output: np.ndarray, ratio: float,
371
- pad: tuple[float, float],
372
- orig_size: tuple[int, int]) -> list[BoundingBox]:
373
  if output.ndim == 2 and output.shape[1] >= 6:
374
  return self._decode_final_dets(output, ratio, pad, orig_size)
375
  if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] == 6:
@@ -383,6 +527,8 @@ class Miner:
383
  raise TypeError(f"Input is not numpy array: {type(image)}")
384
  if image.ndim != 3:
385
  raise ValueError(f"Expected HWC image, got shape={image.shape}")
 
 
386
  if image.shape[2] != 3:
387
  raise ValueError(f"Expected 3 channels, got shape={image.shape}")
388
  if image.dtype != np.uint8:
@@ -399,6 +545,17 @@ class Miner:
399
  return self._postprocess(outputs[0], ratio, pad, orig_size)
400
 
401
  def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
 
 
 
 
 
 
 
 
 
 
 
402
  boxes_orig = self._predict_single(image)
403
  flipped = cv2.flip(image, 1)
404
  boxes_flip = self._predict_single(flipped)
@@ -426,6 +583,7 @@ class Miner:
426
  if len(hard_keep) > self.max_det:
427
  top = np.argsort(-scores[hard_keep])[: self.max_det]
428
  hard_keep = hard_keep[top]
 
429
  boosted = self._max_score_per_cluster(
430
  coords[hard_keep], cls_ids[hard_keep],
431
  coords, scores, cls_ids, self.iou_thres,
@@ -450,14 +608,24 @@ class Miner:
450
  for j in range(len(kept_coords))
451
  ]
452
 
453
- def predict_batch(self, batch_images: list[ndarray], offset: int,
454
- n_keypoints: int) -> list[TVFrameResult]:
 
 
 
 
455
  results: list[TVFrameResult] = []
456
  for frame_number_in_batch, image in enumerate(batch_images):
457
  try:
458
- boxes = self._predict_tta(image)
 
 
 
459
  except Exception as e:
460
- print(f"Inference failed for frame {offset + frame_number_in_batch}: {e}")
 
 
 
461
  boxes = []
462
  results.append(
463
  TVFrameResult(
 
24
 
25
 
26
  class Miner:
27
+ """ONNX Runtime miner for fire / smoke / fire_extinguisher detection.
28
 
29
+ Strategy (ported from offense miner):
30
+ - per-class confidence threshold with per-class rescue bonus
31
+ - per-class hard NMS, then cross-class dedup
32
+ - horizontal-flip TTA with full-set cluster score boost
33
+ Plus fire001 specifics: class remap, sanity-box filter, TTA toggle.
34
+ """
35
+
36
+ class_names = ["fire", "smoke", "fire extinguisher"]
37
+ # Order the model emits classes in -- remapped to `class_names` index.
38
+ _model_class_order = ["fire", "fire extinguisher", "smoke"]
39
+
40
+ iou_thres = 0.55
41
  cross_iou_thresh = 0.8
42
+ max_det = 150
43
+
44
+ # Per-class confidence thresholds. Higher = fewer FP for that class.
45
+ # Indexed by class_names order: [fire, smoke, fire_extinguisher].
46
+ _conf_thres_array = np.array(
47
+ [0.6, 0.4, 0.3], dtype=np.float32
48
+ )
49
+ # Per-class rescue bonus. If a class has ZERO boxes passing the threshold
50
+ # in a frame, its top-1 candidate is admitted when its score is at least
51
+ # (threshold - bonus). Fire and smoke get a small bonus (variable
52
+ # appearance); fire extinguisher does not (distinctive object, leave FP
53
+ # control strict).
54
+ _bonus_array = np.array(
55
+ [0, 0.1, 0.15], dtype=np.float32
56
+ )
57
+
58
+ # Box sanity filter (fire001-specific FP reduction): drop tiny / degenerate
59
+ # / image-spanning / extreme aspect ratio boxes.
60
+ min_box_area = 14 * 14
61
+ min_side = 8
62
+ max_aspect_ratio = 8.0
63
 
64
  def __init__(self, path_hf_repo: Path) -> None:
65
  model_path = path_hf_repo / "weights.onnx"
66
+ self.cls_remap = np.array(
67
+ [self.class_names.index(n) for n in self._model_class_order],
68
+ dtype=np.int32,
69
+ )
70
  print("ORT version:", ort.__version__)
71
 
72
  try:
73
  ort.preload_dlls()
74
+ print("✅ onnxruntime.preload_dlls() success")
75
  except Exception as e:
76
+ print(f"⚠️ preload_dlls failed: {e}")
77
 
78
  print("ORT available providers BEFORE session:", ort.get_available_providers())
79
 
 
86
  sess_options=sess_options,
87
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
88
  )
89
+ print("Created ORT session with preferred CUDA provider list")
90
  except Exception as e:
91
+ print(f"⚠️ CUDA session creation failed, falling back to CPU: {e}")
92
  self.session = ort.InferenceSession(
93
  str(model_path),
94
  sess_options=sess_options,
 
106
  self.output_names = [output.name for output in self.session.get_outputs()]
107
  self.input_shape = self.session.get_inputs()[0].shape
108
 
109
+ self.input_height = self._safe_dim(self.input_shape[2], default=1280)
110
+ self.input_width = self._safe_dim(self.input_shape[3], default=1280)
111
 
112
+ self.use_tta = True
113
+
114
+ print(f"ONNX model loaded from: {model_path}")
115
+ print(f"✅ ONNX providers: {self.session.get_providers()}")
116
+ print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
117
  print("per-class conf: " + ", ".join(
118
+ f"{n}={t:.3f}" for n, t in zip(
119
+ self.class_names, self._conf_thres_array.tolist()
120
+ )
121
+ ))
122
 
123
  def __repr__(self) -> str:
124
  return (
 
130
  def _safe_dim(value, default: int) -> int:
131
  return value if isinstance(value, int) and value > 0 else default
132
 
133
+ def _letterbox(
134
+ self,
135
+ image: ndarray,
136
+ new_shape: tuple[int, int],
137
+ color=(114, 114, 114),
138
+ ) -> tuple[ndarray, float, tuple[float, float]]:
139
  h, w = image.shape[:2]
140
  new_w, new_h = new_shape
141
+
142
  ratio = min(new_w / w, new_h / h)
143
  resized_w = int(round(w * ratio))
144
  resized_h = int(round(h * ratio))
145
+
146
  if (resized_w, resized_h) != (w, h):
147
  interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
148
  image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
149
+
150
  dw = (new_w - resized_w) / 2.0
151
  dh = (new_h - resized_h) / 2.0
152
+
153
  left = int(round(dw - 0.1))
154
  right = int(round(dw + 0.1))
155
  top = int(round(dh - 0.1))
156
  bottom = int(round(dh + 0.1))
157
+
158
+ padded = cv2.copyMakeBorder(
159
+ image, top, bottom, left, right,
160
+ borderType=cv2.BORDER_CONSTANT, value=color,
161
+ )
162
  return padded, ratio, (dw, dh)
163
 
164
+ def _preprocess(
165
+ self, image: ndarray
166
+ ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
167
  orig_h, orig_w = image.shape[:2]
168
+ img, ratio, pad = self._letterbox(
169
+ image, (self.input_width, self.input_height)
170
+ )
171
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
172
  img = img.astype(np.float32) / 255.0
173
  img = np.transpose(img, (2, 0, 1))[None, ...]
 
193
  return out
194
 
195
  @staticmethod
196
+ def _hard_nms(
197
+ boxes: np.ndarray, scores: np.ndarray, iou_thresh: float
198
+ ) -> np.ndarray:
199
  n = len(boxes)
200
  if n == 0:
201
  return np.array([], dtype=np.intp)
 
220
  order = rest[iou <= iou_thresh]
221
  return np.array(keep, dtype=np.intp)
222
 
223
+ def _per_class_hard_nms(
224
+ self,
225
+ boxes: np.ndarray,
226
+ scores: np.ndarray,
227
+ cls_ids: np.ndarray,
228
+ iou_thresh: float,
229
+ ) -> np.ndarray:
230
  if len(boxes) == 0:
231
  return np.array([], dtype=np.intp)
232
  all_keep: list[int] = []
 
238
  all_keep.sort()
239
  return np.array(all_keep, dtype=np.intp)
240
 
241
+ def _cross_class_dedup_op(
242
+ self,
243
+ boxes: np.ndarray,
244
+ scores: np.ndarray,
245
+ cls_ids: np.ndarray,
246
+ iou_thresh: float,
247
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
248
+ """Remove near-duplicate boxes across classes.
249
+
250
+ Order candidates by (score - per_class_threshold) margin, then by area;
251
+ keep the highest, suppress every other box with IoU > iou_thresh.
252
+ This suppresses the case where the same physical object is detected
253
+ as multiple classes (e.g. fire vs smoke on the same flames).
254
+ """
255
  n = len(boxes)
256
  if n <= 1:
257
  return boxes, scores, cls_ids
 
283
  return boxes[keep_idx], scores[keep_idx], cls_ids[keep_idx]
284
 
285
  @staticmethod
286
+ def _max_score_per_cluster(
287
+ post_boxes: np.ndarray,
288
+ post_cls: np.ndarray,
289
+ full_boxes: np.ndarray,
290
+ full_scores: np.ndarray,
291
+ full_cls: np.ndarray,
292
+ iou_thresh: float,
293
+ ) -> np.ndarray:
294
+ """For each kept (post-NMS) box, return the max score over the FULL
295
+ candidate set among same-class boxes with IoU >= iou_thresh.
296
+
297
+ Used after horizontal-flip TTA: a high-confidence flipped detection
298
+ can raise the score of the corresponding original detection.
299
+ """
300
  n = len(post_boxes)
301
  if n == 0:
302
  return np.empty(0, dtype=np.float32)
 
316
  out[i] = float(np.max(full_scores[cluster])) if np.any(cluster) else 0.0
317
  return out
318
 
319
+ def _conf_filter_mask(
320
+ self, scores: np.ndarray, cls_ids: np.ndarray
321
+ ) -> np.ndarray:
322
  """Boolean keep-mask: score >= per-class threshold, with a per-class
323
+ rescue -- if a class has zero boxes passing, admit its top-1 candidate
324
  when its score >= (per-class threshold - per-class bonus)."""
325
  if len(scores) == 0:
326
  return np.zeros(0, dtype=bool)
 
339
  keep[top] = True
340
  return keep
341
 
342
+ def _filter_sane_boxes(
343
+ self,
344
+ boxes: np.ndarray,
345
+ scores: np.ndarray,
346
+ cls_ids: np.ndarray,
347
+ orig_size: tuple[int, int],
348
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
349
+ """Drop tiny / degenerate / image-spanning / extreme-AR boxes (FP)."""
350
+ if len(boxes) == 0:
351
+ return boxes, scores, cls_ids
352
+ orig_w, orig_h = orig_size
353
+ image_area = float(orig_w * orig_h)
354
+ keep = []
355
+ for i, box in enumerate(boxes):
356
+ x1, y1, x2, y2 = box.tolist()
357
+ bw = x2 - x1
358
+ bh = y2 - y1
359
+ if bw <= 0 or bh <= 0:
360
+ continue
361
+ if bw < self.min_side or bh < self.min_side:
362
+ continue
363
+ area = bw * bh
364
+ if area < self.min_box_area:
365
+ continue
366
+ if area > 0.95 * image_area:
367
+ continue
368
+ ar = max(bw / max(bh, 1e-6), bh / max(bw, 1e-6))
369
+ if ar > self.max_aspect_ratio:
370
+ continue
371
+ keep.append(i)
372
+ if not keep:
373
+ return (
374
+ np.empty((0, 4), dtype=np.float32),
375
+ np.empty((0,), dtype=np.float32),
376
+ np.empty((0,), dtype=np.int32),
377
+ )
378
+ k = np.array(keep, dtype=np.intp)
379
+ return boxes[k], scores[k], cls_ids[k]
380
+
381
+ def _per_view_pipeline(
382
+ self,
383
+ boxes: np.ndarray,
384
+ scores: np.ndarray,
385
+ cls_ids: np.ndarray,
386
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
387
+ """Per-view post-processing pipeline: per-class NMS -> cap -> cross-class dedup."""
388
  if len(boxes) > 1:
389
  keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
390
  boxes, scores, cls_ids = boxes[keep], scores[keep], cls_ids[keep]
 
397
  )
398
  return boxes, scores, cls_ids
399
 
400
+ @staticmethod
401
+ def _build_results(
402
+ boxes: np.ndarray, scores: np.ndarray, cls_ids: np.ndarray
403
+ ) -> list[BoundingBox]:
404
+ results: list[BoundingBox] = []
405
+ for box, conf, cls_id in zip(boxes, scores, cls_ids):
406
+ x1, y1, x2, y2 = box.tolist()
407
+ if x2 <= x1 or y2 <= y1:
408
+ continue
409
+ results.append(
410
+ BoundingBox(
411
+ x1=int(math.floor(x1)),
412
+ y1=int(math.floor(y1)),
413
+ x2=int(math.ceil(x2)),
414
+ y2=int(math.ceil(y2)),
415
+ cls_id=int(cls_id),
416
+ conf=float(conf),
417
+ )
418
+ )
419
+ return results
420
+
421
+ def _decode_final_dets(
422
+ self,
423
+ preds: np.ndarray,
424
+ ratio: float,
425
+ pad: tuple[float, float],
426
+ orig_size: tuple[int, int],
427
+ ) -> list[BoundingBox]:
428
+ """Final-detection output path: rows shaped [x1, y1, x2, y2, conf, cls_id]."""
429
  if preds.ndim == 3 and preds.shape[0] == 1:
430
  preds = preds[0]
431
  if preds.ndim != 2 or preds.shape[1] < 6:
 
434
  boxes = preds[:, :4].astype(np.float32)
435
  scores = preds[:, 4].astype(np.float32)
436
  cls_ids = preds[:, 5].astype(np.int32)
437
+ cls_ids = self.cls_remap[cls_ids]
438
 
439
  keep = self._conf_filter_mask(scores, cls_ids)
440
  boxes = boxes[keep]
 
449
  boxes /= ratio
450
  boxes = self._clip_boxes(boxes, orig_size)
451
 
452
+ boxes, scores, cls_ids = self._filter_sane_boxes(
453
+ boxes, scores, cls_ids, orig_size
454
+ )
455
+ if len(boxes) == 0:
456
+ return []
457
+
458
  boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
459
  return self._build_results(boxes, scores, cls_ids)
460
 
461
+ def _decode_raw_yolo(
462
+ self,
463
+ preds: np.ndarray,
464
+ ratio: float,
465
+ pad: tuple[float, float],
466
+ orig_size: tuple[int, int],
467
+ ) -> list[BoundingBox]:
468
+ """Fallback raw-YOLO output path: per-anchor class logits."""
469
  if preds.ndim != 3 or preds.shape[0] != 1:
470
  raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
471
  preds = preds[0]
 
482
  else:
483
  cls_ids = np.argmax(cls_part, axis=1).astype(np.int32)
484
  scores = cls_part[np.arange(len(cls_part)), cls_ids]
485
+ cls_ids = self.cls_remap[cls_ids]
486
 
487
  keep = self._conf_filter_mask(scores, cls_ids)
488
  boxes_xywh = boxes_xywh[keep]
 
498
  boxes /= ratio
499
  boxes = self._clip_boxes(boxes, orig_size)
500
 
501
+ boxes, scores, cls_ids = self._filter_sane_boxes(
502
+ boxes, scores, cls_ids, orig_size
503
+ )
504
+ if len(boxes) == 0:
505
+ return []
506
+
507
  boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
508
  return self._build_results(boxes, scores, cls_ids)
509
 
510
+ def _postprocess(
511
+ self,
512
+ output: np.ndarray,
513
+ ratio: float,
514
+ pad: tuple[float, float],
515
+ orig_size: tuple[int, int],
516
+ ) -> list[BoundingBox]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  if output.ndim == 2 and output.shape[1] >= 6:
518
  return self._decode_final_dets(output, ratio, pad, orig_size)
519
  if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] == 6:
 
527
  raise TypeError(f"Input is not numpy array: {type(image)}")
528
  if image.ndim != 3:
529
  raise ValueError(f"Expected HWC image, got shape={image.shape}")
530
+ if image.shape[0] <= 0 or image.shape[1] <= 0:
531
+ raise ValueError(f"Invalid image shape={image.shape}")
532
  if image.shape[2] != 3:
533
  raise ValueError(f"Expected 3 channels, got shape={image.shape}")
534
  if image.dtype != np.uint8:
 
545
  return self._postprocess(outputs[0], ratio, pad, orig_size)
546
 
547
  def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
548
+ """Horizontal-flip TTA.
549
+
550
+ Strategy:
551
+ 1. Predict on original and on flipped image.
552
+ 2. Map flipped boxes back to original coordinates.
553
+ 3. Per-class hard NMS on the union.
554
+ 4. For each kept box, compute the max same-class score across the
555
+ FULL union (not just the post-NMS subset) -- this lets a high-
556
+ confidence flipped detection raise a borderline original one.
557
+ 5. Cross-class dedup to suppress same-physical-object multi-class.
558
+ """
559
  boxes_orig = self._predict_single(image)
560
  flipped = cv2.flip(image, 1)
561
  boxes_flip = self._predict_single(flipped)
 
583
  if len(hard_keep) > self.max_det:
584
  top = np.argsort(-scores[hard_keep])[: self.max_det]
585
  hard_keep = hard_keep[top]
586
+
587
  boosted = self._max_score_per_cluster(
588
  coords[hard_keep], cls_ids[hard_keep],
589
  coords, scores, cls_ids, self.iou_thres,
 
608
  for j in range(len(kept_coords))
609
  ]
610
 
611
+ def predict_batch(
612
+ self,
613
+ batch_images: list[ndarray],
614
+ offset: int,
615
+ n_keypoints: int,
616
+ ) -> list[TVFrameResult]:
617
  results: list[TVFrameResult] = []
618
  for frame_number_in_batch, image in enumerate(batch_images):
619
  try:
620
+ if self.use_tta:
621
+ boxes = self._predict_tta(image)
622
+ else:
623
+ boxes = self._predict_single(image)
624
  except Exception as e:
625
+ print(
626
+ f"⚠️ Inference failed for frame "
627
+ f"{offset + frame_number_in_batch}: {e}"
628
+ )
629
  boxes = []
630
  results.append(
631
  TVFrameResult(
pyproject.toml CHANGED
@@ -5,13 +5,14 @@ requires-python = ">=3.9"
5
 
6
  dependencies = [
7
  "numpy>=1.23",
8
- "onnxruntime[cuda,cudnn]>=1.16",
9
  "opencv-python>=4.7",
10
  "pillow>=9.5",
11
  "huggingface_hub>=0.19.4",
12
  "pydantic>=2.0",
13
  "pyyaml>=6.0",
14
  "aiohttp>=3.9",
15
- "torch",
16
- "torchvision",
 
17
  ]
 
5
 
6
  dependencies = [
7
  "numpy>=1.23",
8
+ "onnxruntime>=1.16",
9
  "opencv-python>=4.7",
10
  "pillow>=9.5",
11
  "huggingface_hub>=0.19.4",
12
  "pydantic>=2.0",
13
  "pyyaml>=6.0",
14
  "aiohttp>=3.9",
15
+ "torch==2.8.0",
16
+ "torchvision==0.23.0",
17
+ "torchaudio==2.8.0",
18
  ]
readme.md DELETED
@@ -1,17 +0,0 @@
1
- ---
2
- tags:
3
- - element_type:detect
4
- - model:onnxruntime
5
- - subnet:winner
6
- - object:fire
7
- - object:smoke
8
- - object:fire extinguisher
9
-
10
- manako:
11
- source: winner_fetch
12
- manifest_element_name: manak0/Detect-fire
13
- winner_repo_id: navierstocks/wash
14
- winner_revision: f0cb290789b770cdd918dc49c0c219c3c01bc70b
15
- ---
16
-
17
- ## YOLO26 ONNX detector
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
weights.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e576f1b4805a0b6f026626e781859a256293f14a0faec0f81b26a550919d2911
3
- size 19300989
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7c65835fbffb2923b4b76531d5a343544ffd6018cdca67859f830f27d74b09
3
+ size 19407317