MTerryJack commited on
Commit
22f17e6
·
verified ·
1 Parent(s): f77dea5

subnet_bridge: copy winning miner repo into library

Browse files
Files changed (6) hide show
  1. README.md +3 -12
  2. chute_config.yml +3 -2
  3. miner.py +99 -327
  4. pyproject.toml +3 -4
  5. readme.md +17 -0
  6. weights.onnx +2 -2
README.md CHANGED
@@ -10,17 +10,8 @@ tags:
10
  manako:
11
  source: winner_fetch
12
  manifest_element_name: manak0/Detect-fire
13
- winner_repo_id: SuperBitDev/fire1
14
- winner_revision: 0fbc341ae743ebec0d2a1d48bdaaaaa0d6ad9338
15
  ---
16
 
17
- # Detect-fire-winner
18
-
19
- Published winning miner converted into a library element.
20
-
21
- - Source winner repo: `SuperBitDev/fire1`
22
- - Source revision: `0fbc341ae743ebec0d2a1d48bdaaaaa0d6ad9338`
23
- - Manifest element: `manak0/Detect-fire`
24
- - Element type: `Detect`
25
- - Objects: fire, smoke, fire extinguisher
26
- - Runtime model type: `onnxruntime`
 
10
  manako:
11
  source: winner_fetch
12
  manifest_element_name: manak0/Detect-fire
13
+ winner_repo_id: navierstocks/flame
14
+ winner_revision: ab3805fb6c7189ecdd1f0e8ed07aaf02a6b5b7b1
15
  ---
16
 
17
+ ## YOLO26 ONNX detector
 
 
 
 
 
 
 
 
 
chute_config.yml CHANGED
@@ -2,12 +2,13 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
- - pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
 
11
  include:
12
  - pro_6000
13
 
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
+ - pip install torch torchvision
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
  min_vram_gb_per_gpu: 16
11
+ max_hourly_price_per_gpu: 2
12
  include:
13
  - pro_6000
14
 
miner.py CHANGED
@@ -24,56 +24,27 @@ class TVFrameResult(BaseModel):
24
 
25
 
26
  class Miner:
27
- """ONNX Runtime miner for fire / smoke / fire_extinguisher detection.
28
-
29
- Strategy (ported from offense miner):
30
- - per-class confidence threshold with per-class rescue bonus
31
- - per-class hard NMS, then cross-class dedup
32
- - horizontal-flip TTA with full-set cluster score boost
33
- Plus fire001 specifics: class remap, sanity-box filter, TTA toggle.
34
- """
35
 
36
  class_names = ["fire", "smoke", "fire extinguisher"]
37
- # Order the model emits classes in -- remapped to `class_names` index.
38
- _model_class_order = ["fire", "fire extinguisher", "smoke"]
39
-
40
- iou_thres = 0.55
41
  cross_iou_thresh = 0.8
42
- max_det = 150
43
-
44
- # Per-class confidence thresholds. Higher = fewer FP for that class.
45
- # Indexed by class_names order: [fire, smoke, fire_extinguisher].
46
- _conf_thres_array = np.array(
47
- [0.6, 0.4, 0.3], dtype=np.float32
48
- )
49
- # Per-class rescue bonus. If a class has ZERO boxes passing the threshold
50
- # in a frame, its top-1 candidate is admitted when its score is at least
51
- # (threshold - bonus). Fire and smoke get a small bonus (variable
52
- # appearance); fire extinguisher does not (distinctive object, leave FP
53
- # control strict).
54
- _bonus_array = np.array(
55
- [0, 0.1, 0.15], dtype=np.float32
56
- )
57
-
58
- # Box sanity filter (fire001-specific FP reduction): drop tiny / degenerate
59
- # / image-spanning / extreme aspect ratio boxes.
60
- min_box_area = 14 * 14
61
- min_side = 8
62
  max_aspect_ratio = 8.0
 
 
 
63
 
64
  def __init__(self, path_hf_repo: Path) -> None:
65
  model_path = path_hf_repo / "weights.onnx"
66
- self.cls_remap = np.array(
67
- [self.class_names.index(n) for n in self._model_class_order],
68
- dtype=np.int32,
69
- )
70
  print("ORT version:", ort.__version__)
71
 
72
  try:
73
  ort.preload_dlls()
74
- print("✅ onnxruntime.preload_dlls() success")
75
  except Exception as e:
76
- print(f"⚠️ preload_dlls failed: {e}")
77
 
78
  print("ORT available providers BEFORE session:", ort.get_available_providers())
79
 
@@ -86,9 +57,9 @@ class Miner:
86
  sess_options=sess_options,
87
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
88
  )
89
- print("Created ORT session with preferred CUDA provider list")
90
  except Exception as e:
91
- print(f"⚠️ CUDA session creation failed, falling back to CPU: {e}")
92
  self.session = ort.InferenceSession(
93
  str(model_path),
94
  sess_options=sess_options,
@@ -106,19 +77,14 @@ class Miner:
106
  self.output_names = [output.name for output in self.session.get_outputs()]
107
  self.input_shape = self.session.get_inputs()[0].shape
108
 
109
- self.input_height = self._safe_dim(self.input_shape[2], default=1280)
110
- self.input_width = self._safe_dim(self.input_shape[3], default=1280)
111
 
112
- self.use_tta = True
113
-
114
- print(f"✅ ONNX model loaded from: {model_path}")
115
- print(f"✅ ONNX providers: {self.session.get_providers()}")
116
- print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
117
  print("per-class conf: " + ", ".join(
118
- f"{n}={t:.3f}" for n, t in zip(
119
- self.class_names, self._conf_thres_array.tolist()
120
- )
121
- ))
122
 
123
  def __repr__(self) -> str:
124
  return (
@@ -130,44 +96,32 @@ class Miner:
130
  def _safe_dim(value, default: int) -> int:
131
  return value if isinstance(value, int) and value > 0 else default
132
 
133
- def _letterbox(
134
- self,
135
- image: ndarray,
136
- new_shape: tuple[int, int],
137
- color=(114, 114, 114),
138
- ) -> tuple[ndarray, float, tuple[float, float]]:
139
  h, w = image.shape[:2]
140
  new_w, new_h = new_shape
141
-
142
  ratio = min(new_w / w, new_h / h)
143
  resized_w = int(round(w * ratio))
144
  resized_h = int(round(h * ratio))
145
-
146
  if (resized_w, resized_h) != (w, h):
147
  interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
148
  image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
149
-
150
  dw = (new_w - resized_w) / 2.0
151
  dh = (new_h - resized_h) / 2.0
152
-
153
  left = int(round(dw - 0.1))
154
  right = int(round(dw + 0.1))
155
  top = int(round(dh - 0.1))
156
  bottom = int(round(dh + 0.1))
157
-
158
- padded = cv2.copyMakeBorder(
159
- image, top, bottom, left, right,
160
- borderType=cv2.BORDER_CONSTANT, value=color,
161
- )
162
  return padded, ratio, (dw, dh)
163
 
164
- def _preprocess(
165
- self, image: ndarray
166
- ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
167
  orig_h, orig_w = image.shape[:2]
168
- img, ratio, pad = self._letterbox(
169
- image, (self.input_width, self.input_height)
170
- )
171
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
172
  img = img.astype(np.float32) / 255.0
173
  img = np.transpose(img, (2, 0, 1))[None, ...]
@@ -193,9 +147,8 @@ class Miner:
193
  return out
194
 
195
  @staticmethod
196
- def _hard_nms(
197
- boxes: np.ndarray, scores: np.ndarray, iou_thresh: float
198
- ) -> np.ndarray:
199
  n = len(boxes)
200
  if n == 0:
201
  return np.array([], dtype=np.intp)
@@ -220,38 +173,9 @@ class Miner:
220
  order = rest[iou <= iou_thresh]
221
  return np.array(keep, dtype=np.intp)
222
 
223
- def _per_class_hard_nms(
224
- self,
225
- boxes: np.ndarray,
226
- scores: np.ndarray,
227
- cls_ids: np.ndarray,
228
- iou_thresh: float,
229
- ) -> np.ndarray:
230
- if len(boxes) == 0:
231
- return np.array([], dtype=np.intp)
232
- all_keep: list[int] = []
233
- for c in np.unique(cls_ids):
234
- mask = cls_ids == c
235
- indices = np.where(mask)[0]
236
- keep = self._hard_nms(boxes[mask], scores[mask], iou_thresh)
237
- all_keep.extend(indices[keep].tolist())
238
- all_keep.sort()
239
- return np.array(all_keep, dtype=np.intp)
240
-
241
- def _cross_class_dedup_op(
242
- self,
243
- boxes: np.ndarray,
244
- scores: np.ndarray,
245
- cls_ids: np.ndarray,
246
- iou_thresh: float,
247
- ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
248
- """Remove near-duplicate boxes across classes.
249
-
250
- Order candidates by (score - per_class_threshold) margin, then by area;
251
- keep the highest, suppress every other box with IoU > iou_thresh.
252
- This suppresses the case where the same physical object is detected
253
- as multiple classes (e.g. fire vs smoke on the same flames).
254
- """
255
  n = len(boxes)
256
  if n <= 1:
257
  return boxes, scores, cls_ids
@@ -282,46 +206,33 @@ class Miner:
282
  keep_idx = np.array(keep, dtype=np.intp)
283
  return boxes[keep_idx], scores[keep_idx], cls_ids[keep_idx]
284
 
285
- @staticmethod
286
- def _max_score_per_cluster(
287
- post_boxes: np.ndarray,
288
- post_cls: np.ndarray,
289
- full_boxes: np.ndarray,
290
- full_scores: np.ndarray,
291
- full_cls: np.ndarray,
292
- iou_thresh: float,
293
- ) -> np.ndarray:
294
- """For each kept (post-NMS) box, return the max score over the FULL
295
- candidate set among same-class boxes with IoU >= iou_thresh.
296
-
297
- Used after horizontal-flip TTA: a high-confidence flipped detection
298
- can raise the score of the corresponding original detection.
299
- """
300
- n = len(post_boxes)
301
- if n == 0:
302
- return np.empty(0, dtype=np.float32)
303
- full_areas = (np.maximum(0.0, full_boxes[:, 2] - full_boxes[:, 0]) *
304
- np.maximum(0.0, full_boxes[:, 3] - full_boxes[:, 1]))
305
- out = np.empty(n, dtype=np.float32)
306
- for i in range(n):
307
- bi = post_boxes[i]
308
- xx1 = np.maximum(bi[0], full_boxes[:, 0])
309
- yy1 = np.maximum(bi[1], full_boxes[:, 1])
310
- xx2 = np.minimum(bi[2], full_boxes[:, 2])
311
- yy2 = np.minimum(bi[3], full_boxes[:, 3])
312
- inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
313
- a_i = max(0.0, float((bi[2] - bi[0]) * (bi[3] - bi[1])))
314
- iou = inter / (a_i + full_areas - inter + 1e-7)
315
- cluster = (iou >= iou_thresh) & (full_cls == post_cls[i])
316
- out[i] = float(np.max(full_scores[cluster])) if np.any(cluster) else 0.0
317
- return out
318
 
319
- def _conf_filter_mask(
320
- self, scores: np.ndarray, cls_ids: np.ndarray
321
- ) -> np.ndarray:
322
- """Boolean keep-mask: score >= per-class threshold, with a per-class
323
- rescue -- if a class has zero boxes passing, admit its top-1 candidate
324
- when its score >= (per-class threshold - per-class bonus)."""
325
  if len(scores) == 0:
326
  return np.zeros(0, dtype=bool)
327
  thr = self._conf_thres_array[cls_ids]
@@ -339,54 +250,16 @@ class Miner:
339
  keep[top] = True
340
  return keep
341
 
342
- def _filter_sane_boxes(
343
- self,
344
- boxes: np.ndarray,
345
- scores: np.ndarray,
346
- cls_ids: np.ndarray,
347
- orig_size: tuple[int, int],
348
- ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
349
- """Drop tiny / degenerate / image-spanning / extreme-AR boxes (FP)."""
350
  if len(boxes) == 0:
351
  return boxes, scores, cls_ids
352
- orig_w, orig_h = orig_size
353
- image_area = float(orig_w * orig_h)
354
- keep = []
355
- for i, box in enumerate(boxes):
356
- x1, y1, x2, y2 = box.tolist()
357
- bw = x2 - x1
358
- bh = y2 - y1
359
- if bw <= 0 or bh <= 0:
360
- continue
361
- if bw < self.min_side or bh < self.min_side:
362
- continue
363
- area = bw * bh
364
- if area < self.min_box_area:
365
- continue
366
- if area > 0.95 * image_area:
367
- continue
368
- ar = max(bw / max(bh, 1e-6), bh / max(bw, 1e-6))
369
- if ar > self.max_aspect_ratio:
370
- continue
371
- keep.append(i)
372
- if not keep:
373
- return (
374
- np.empty((0, 4), dtype=np.float32),
375
- np.empty((0,), dtype=np.float32),
376
- np.empty((0,), dtype=np.int32),
377
- )
378
- k = np.array(keep, dtype=np.intp)
379
- return boxes[k], scores[k], cls_ids[k]
380
-
381
- def _per_view_pipeline(
382
- self,
383
- boxes: np.ndarray,
384
- scores: np.ndarray,
385
- cls_ids: np.ndarray,
386
- ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
387
- """Per-view post-processing pipeline: per-class NMS -> cap -> cross-class dedup."""
388
  if len(boxes) > 1:
389
- keep = self._per_class_hard_nms(boxes, scores, cls_ids, self.iou_thres)
390
  boxes, scores, cls_ids = boxes[keep], scores[keep], cls_ids[keep]
391
  if len(scores) > self.max_det:
392
  top = np.argsort(-scores)[: self.max_det]
@@ -397,35 +270,9 @@ class Miner:
397
  )
398
  return boxes, scores, cls_ids
399
 
400
- @staticmethod
401
- def _build_results(
402
- boxes: np.ndarray, scores: np.ndarray, cls_ids: np.ndarray
403
- ) -> list[BoundingBox]:
404
- results: list[BoundingBox] = []
405
- for box, conf, cls_id in zip(boxes, scores, cls_ids):
406
- x1, y1, x2, y2 = box.tolist()
407
- if x2 <= x1 or y2 <= y1:
408
- continue
409
- results.append(
410
- BoundingBox(
411
- x1=int(math.floor(x1)),
412
- y1=int(math.floor(y1)),
413
- x2=int(math.ceil(x2)),
414
- y2=int(math.ceil(y2)),
415
- cls_id=int(cls_id),
416
- conf=float(conf),
417
- )
418
- )
419
- return results
420
-
421
- def _decode_final_dets(
422
- self,
423
- preds: np.ndarray,
424
- ratio: float,
425
- pad: tuple[float, float],
426
- orig_size: tuple[int, int],
427
- ) -> list[BoundingBox]:
428
- """Final-detection output path: rows shaped [x1, y1, x2, y2, conf, cls_id]."""
429
  if preds.ndim == 3 and preds.shape[0] == 1:
430
  preds = preds[0]
431
  if preds.ndim != 2 or preds.shape[1] < 6:
@@ -434,7 +281,6 @@ class Miner:
434
  boxes = preds[:, :4].astype(np.float32)
435
  scores = preds[:, 4].astype(np.float32)
436
  cls_ids = preds[:, 5].astype(np.int32)
437
- cls_ids = self.cls_remap[cls_ids]
438
 
439
  keep = self._conf_filter_mask(scores, cls_ids)
440
  boxes = boxes[keep]
@@ -449,23 +295,14 @@ class Miner:
449
  boxes /= ratio
450
  boxes = self._clip_boxes(boxes, orig_size)
451
 
452
- boxes, scores, cls_ids = self._filter_sane_boxes(
453
  boxes, scores, cls_ids, orig_size
454
  )
455
- if len(boxes) == 0:
456
- return []
457
-
458
- boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
459
  return self._build_results(boxes, scores, cls_ids)
460
 
461
- def _decode_raw_yolo(
462
- self,
463
- preds: np.ndarray,
464
- ratio: float,
465
- pad: tuple[float, float],
466
- orig_size: tuple[int, int],
467
- ) -> list[BoundingBox]:
468
- """Fallback raw-YOLO output path: per-anchor class logits."""
469
  if preds.ndim != 3 or preds.shape[0] != 1:
470
  raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
471
  preds = preds[0]
@@ -482,7 +319,6 @@ class Miner:
482
  else:
483
  cls_ids = np.argmax(cls_part, axis=1).astype(np.int32)
484
  scores = cls_part[np.arange(len(cls_part)), cls_ids]
485
- cls_ids = self.cls_remap[cls_ids]
486
 
487
  keep = self._conf_filter_mask(scores, cls_ids)
488
  boxes_xywh = boxes_xywh[keep]
@@ -498,22 +334,34 @@ class Miner:
498
  boxes /= ratio
499
  boxes = self._clip_boxes(boxes, orig_size)
500
 
501
- boxes, scores, cls_ids = self._filter_sane_boxes(
502
  boxes, scores, cls_ids, orig_size
503
  )
504
- if len(boxes) == 0:
505
- return []
506
-
507
- boxes, scores, cls_ids = self._per_view_pipeline(boxes, scores, cls_ids)
508
  return self._build_results(boxes, scores, cls_ids)
509
 
510
- def _postprocess(
511
- self,
512
- output: np.ndarray,
513
- ratio: float,
514
- pad: tuple[float, float],
515
- orig_size: tuple[int, int],
516
- ) -> list[BoundingBox]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  if output.ndim == 2 and output.shape[1] >= 6:
518
  return self._decode_final_dets(output, ratio, pad, orig_size)
519
  if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] == 6:
@@ -527,8 +375,6 @@ class Miner:
527
  raise TypeError(f"Input is not numpy array: {type(image)}")
528
  if image.ndim != 3:
529
  raise ValueError(f"Expected HWC image, got shape={image.shape}")
530
- if image.shape[0] <= 0 or image.shape[1] <= 0:
531
- raise ValueError(f"Invalid image shape={image.shape}")
532
  if image.shape[2] != 3:
533
  raise ValueError(f"Expected 3 channels, got shape={image.shape}")
534
  if image.dtype != np.uint8:
@@ -544,88 +390,14 @@ class Miner:
544
  outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
545
  return self._postprocess(outputs[0], ratio, pad, orig_size)
546
 
547
- def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
548
- """Horizontal-flip TTA.
549
-
550
- Strategy:
551
- 1. Predict on original and on flipped image.
552
- 2. Map flipped boxes back to original coordinates.
553
- 3. Per-class hard NMS on the union.
554
- 4. For each kept box, compute the max same-class score across the
555
- FULL union (not just the post-NMS subset) -- this lets a high-
556
- confidence flipped detection raise a borderline original one.
557
- 5. Cross-class dedup to suppress same-physical-object multi-class.
558
- """
559
- boxes_orig = self._predict_single(image)
560
- flipped = cv2.flip(image, 1)
561
- boxes_flip = self._predict_single(flipped)
562
- w = image.shape[1]
563
- boxes_flip = [
564
- BoundingBox(
565
- x1=w - b.x2, y1=b.y1, x2=w - b.x1, y2=b.y2,
566
- cls_id=b.cls_id, conf=b.conf,
567
- )
568
- for b in boxes_flip
569
- ]
570
- all_boxes = boxes_orig + boxes_flip
571
- if not all_boxes:
572
- return []
573
-
574
- coords = np.array(
575
- [[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32
576
- )
577
- scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
578
- cls_ids = np.array([b.cls_id for b in all_boxes], dtype=np.int32)
579
-
580
- hard_keep = self._per_class_hard_nms(coords, scores, cls_ids, self.iou_thres)
581
- if len(hard_keep) == 0:
582
- return []
583
- if len(hard_keep) > self.max_det:
584
- top = np.argsort(-scores[hard_keep])[: self.max_det]
585
- hard_keep = hard_keep[top]
586
-
587
- boosted = self._max_score_per_cluster(
588
- coords[hard_keep], cls_ids[hard_keep],
589
- coords, scores, cls_ids, self.iou_thres,
590
- )
591
-
592
- kept_coords = coords[hard_keep]
593
- kept_cls = cls_ids[hard_keep]
594
- if len(kept_coords) > 1:
595
- kept_coords, boosted, kept_cls = self._cross_class_dedup_op(
596
- kept_coords, boosted, kept_cls, self.cross_iou_thresh
597
- )
598
-
599
- return [
600
- BoundingBox(
601
- x1=int(math.floor(kept_coords[j, 0])),
602
- y1=int(math.floor(kept_coords[j, 1])),
603
- x2=int(math.ceil(kept_coords[j, 2])),
604
- y2=int(math.ceil(kept_coords[j, 3])),
605
- cls_id=int(kept_cls[j]),
606
- conf=float(boosted[j]),
607
- )
608
- for j in range(len(kept_coords))
609
- ]
610
-
611
- def predict_batch(
612
- self,
613
- batch_images: list[ndarray],
614
- offset: int,
615
- n_keypoints: int,
616
- ) -> list[TVFrameResult]:
617
  results: list[TVFrameResult] = []
618
  for frame_number_in_batch, image in enumerate(batch_images):
619
  try:
620
- if self.use_tta:
621
- boxes = self._predict_tta(image)
622
- else:
623
- boxes = self._predict_single(image)
624
  except Exception as e:
625
- print(
626
- f"⚠️ Inference failed for frame "
627
- f"{offset + frame_number_in_batch}: {e}"
628
- )
629
  boxes = []
630
  results.append(
631
  TVFrameResult(
 
24
 
25
 
26
  class Miner:
27
+ """ONNX Runtime miner. Hard global NMS + sanity + cross-class dedup, no TTA."""
 
 
 
 
 
 
 
28
 
29
  class_names = ["fire", "smoke", "fire extinguisher"]
30
+ input_size = 1280
31
+ iou_thres = 0.5
 
 
32
  cross_iou_thresh = 0.8
33
+ min_box_area = 144.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  max_aspect_ratio = 8.0
35
+ max_det = 300
36
+ _conf_thres_array = np.array([0.25, 0.2, 0.1], dtype=np.float32)
37
+ _bonus_array = np.array([0.1, 0.1, 0.05], dtype=np.float32)
38
 
39
  def __init__(self, path_hf_repo: Path) -> None:
40
  model_path = path_hf_repo / "weights.onnx"
 
 
 
 
41
  print("ORT version:", ort.__version__)
42
 
43
  try:
44
  ort.preload_dlls()
45
+ print("preload_dlls success")
46
  except Exception as e:
47
+ print(f"preload_dlls failed: {e}")
48
 
49
  print("ORT available providers BEFORE session:", ort.get_available_providers())
50
 
 
57
  sess_options=sess_options,
58
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
59
  )
60
+ print("Created ORT session with preferred CUDA provider list")
61
  except Exception as e:
62
+ print(f"CUDA session creation failed, falling back to CPU: {e}")
63
  self.session = ort.InferenceSession(
64
  str(model_path),
65
  sess_options=sess_options,
 
77
  self.output_names = [output.name for output in self.session.get_outputs()]
78
  self.input_shape = self.session.get_inputs()[0].shape
79
 
80
+ self.input_height = self._safe_dim(self.input_shape[2], default=self.input_size)
81
+ self.input_width = self._safe_dim(self.input_shape[3], default=self.input_size)
82
 
83
+ print(f"ONNX model loaded from: {model_path}")
84
+ print(f"ONNX input: name={self.input_name}, shape={self.input_shape}")
 
 
 
85
  print("per-class conf: " + ", ".join(
86
+ f"{n}={t:.3f}" for n, t in zip(self.class_names,
87
+ self._conf_thres_array.tolist())))
 
 
88
 
89
  def __repr__(self) -> str:
90
  return (
 
96
  def _safe_dim(value, default: int) -> int:
97
  return value if isinstance(value, int) and value > 0 else default
98
 
99
+ def _letterbox(self, image: ndarray, new_shape: tuple[int, int],
100
+ color=(114, 114, 114)
101
+ ) -> tuple[ndarray, float, tuple[float, float]]:
 
 
 
102
  h, w = image.shape[:2]
103
  new_w, new_h = new_shape
 
104
  ratio = min(new_w / w, new_h / h)
105
  resized_w = int(round(w * ratio))
106
  resized_h = int(round(h * ratio))
 
107
  if (resized_w, resized_h) != (w, h):
108
  interp = cv2.INTER_CUBIC if ratio > 1.0 else cv2.INTER_LINEAR
109
  image = cv2.resize(image, (resized_w, resized_h), interpolation=interp)
 
110
  dw = (new_w - resized_w) / 2.0
111
  dh = (new_h - resized_h) / 2.0
 
112
  left = int(round(dw - 0.1))
113
  right = int(round(dw + 0.1))
114
  top = int(round(dh - 0.1))
115
  bottom = int(round(dh + 0.1))
116
+ padded = cv2.copyMakeBorder(image, top, bottom, left, right,
117
+ borderType=cv2.BORDER_CONSTANT, value=color)
 
 
 
118
  return padded, ratio, (dw, dh)
119
 
120
+ def _preprocess(self, image: ndarray
121
+ ) -> tuple[np.ndarray, float, tuple[float, float],
122
+ tuple[int, int]]:
123
  orig_h, orig_w = image.shape[:2]
124
+ img, ratio, pad = self._letterbox(image, (self.input_width, self.input_height))
 
 
125
  img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
126
  img = img.astype(np.float32) / 255.0
127
  img = np.transpose(img, (2, 0, 1))[None, ...]
 
147
  return out
148
 
149
  @staticmethod
150
+ def _hard_nms(boxes: np.ndarray, scores: np.ndarray,
151
+ iou_thresh: float) -> np.ndarray:
 
152
  n = len(boxes)
153
  if n == 0:
154
  return np.array([], dtype=np.intp)
 
173
  order = rest[iou <= iou_thresh]
174
  return np.array(keep, dtype=np.intp)
175
 
176
+ def _cross_class_dedup_op(self, boxes: np.ndarray, scores: np.ndarray,
177
+ cls_ids: np.ndarray, iou_thresh: float
178
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  n = len(boxes)
180
  if n <= 1:
181
  return boxes, scores, cls_ids
 
206
  keep_idx = np.array(keep, dtype=np.intp)
207
  return boxes[keep_idx], scores[keep_idx], cls_ids[keep_idx]
208
 
209
+ def _filter_sane_boxes(self, boxes: np.ndarray, scores: np.ndarray,
210
+ cls_ids: np.ndarray, orig_size: tuple[int, int]
211
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
212
+ if len(boxes) == 0:
213
+ return boxes, scores, cls_ids
214
+ orig_w, orig_h = orig_size
215
+ image_area = float(orig_w * orig_h)
216
+ bw = np.maximum(0.0, boxes[:, 2] - boxes[:, 0])
217
+ bh = np.maximum(0.0, boxes[:, 3] - boxes[:, 1])
218
+ area = bw * bh
219
+ ar = np.where(
220
+ (bw > 0) & (bh > 0),
221
+ np.maximum(bw / np.maximum(bh, 1e-6), bh / np.maximum(bw, 1e-6)),
222
+ np.inf,
223
+ )
224
+ keep = (
225
+ (area >= self.min_box_area) &
226
+ (area <= 0.95 * image_area) &
227
+ (ar <= self.max_aspect_ratio)
228
+ )
229
+ return boxes[keep], scores[keep], cls_ids[keep]
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
+ def _conf_filter_mask(self, scores: np.ndarray,
232
+ cls_ids: np.ndarray) -> np.ndarray:
233
+ """score >= per-class threshold, with per-class rescue: if a class has
234
+ zero passing boxes, admit its top-1 candidate when its score >=
235
+ (per-class threshold - per-class bonus)."""
 
236
  if len(scores) == 0:
237
  return np.zeros(0, dtype=bool)
238
  thr = self._conf_thres_array[cls_ids]
 
250
  keep[top] = True
251
  return keep
252
 
253
+ def _per_view_pipeline(self, boxes: np.ndarray, scores: np.ndarray,
254
+ cls_ids: np.ndarray, orig_size: tuple[int, int]
255
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
256
+ boxes, scores, cls_ids = self._filter_sane_boxes(
257
+ boxes, scores, cls_ids, orig_size
258
+ )
 
 
259
  if len(boxes) == 0:
260
  return boxes, scores, cls_ids
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  if len(boxes) > 1:
262
+ keep = self._hard_nms(boxes, scores, self.iou_thres)
263
  boxes, scores, cls_ids = boxes[keep], scores[keep], cls_ids[keep]
264
  if len(scores) > self.max_det:
265
  top = np.argsort(-scores)[: self.max_det]
 
270
  )
271
  return boxes, scores, cls_ids
272
 
273
+ def _decode_final_dets(self, preds: np.ndarray, ratio: float,
274
+ pad: tuple[float, float],
275
+ orig_size: tuple[int, int]) -> list[BoundingBox]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  if preds.ndim == 3 and preds.shape[0] == 1:
277
  preds = preds[0]
278
  if preds.ndim != 2 or preds.shape[1] < 6:
 
281
  boxes = preds[:, :4].astype(np.float32)
282
  scores = preds[:, 4].astype(np.float32)
283
  cls_ids = preds[:, 5].astype(np.int32)
 
284
 
285
  keep = self._conf_filter_mask(scores, cls_ids)
286
  boxes = boxes[keep]
 
295
  boxes /= ratio
296
  boxes = self._clip_boxes(boxes, orig_size)
297
 
298
+ boxes, scores, cls_ids = self._per_view_pipeline(
299
  boxes, scores, cls_ids, orig_size
300
  )
 
 
 
 
301
  return self._build_results(boxes, scores, cls_ids)
302
 
303
+ def _decode_raw_yolo(self, preds: np.ndarray, ratio: float,
304
+ pad: tuple[float, float],
305
+ orig_size: tuple[int, int]) -> list[BoundingBox]:
 
 
 
 
 
306
  if preds.ndim != 3 or preds.shape[0] != 1:
307
  raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
308
  preds = preds[0]
 
319
  else:
320
  cls_ids = np.argmax(cls_part, axis=1).astype(np.int32)
321
  scores = cls_part[np.arange(len(cls_part)), cls_ids]
 
322
 
323
  keep = self._conf_filter_mask(scores, cls_ids)
324
  boxes_xywh = boxes_xywh[keep]
 
334
  boxes /= ratio
335
  boxes = self._clip_boxes(boxes, orig_size)
336
 
337
+ boxes, scores, cls_ids = self._per_view_pipeline(
338
  boxes, scores, cls_ids, orig_size
339
  )
 
 
 
 
340
  return self._build_results(boxes, scores, cls_ids)
341
 
342
+ @staticmethod
343
+ def _build_results(boxes: np.ndarray, scores: np.ndarray,
344
+ cls_ids: np.ndarray) -> list[BoundingBox]:
345
+ results: list[BoundingBox] = []
346
+ for box, conf, cls_id in zip(boxes, scores, cls_ids):
347
+ x1, y1, x2, y2 = box.tolist()
348
+ if x2 <= x1 or y2 <= y1:
349
+ continue
350
+ results.append(
351
+ BoundingBox(
352
+ x1=int(math.floor(x1)),
353
+ y1=int(math.floor(y1)),
354
+ x2=int(math.ceil(x2)),
355
+ y2=int(math.ceil(y2)),
356
+ cls_id=int(cls_id),
357
+ conf=float(conf),
358
+ )
359
+ )
360
+ return results
361
+
362
+ def _postprocess(self, output: np.ndarray, ratio: float,
363
+ pad: tuple[float, float],
364
+ orig_size: tuple[int, int]) -> list[BoundingBox]:
365
  if output.ndim == 2 and output.shape[1] >= 6:
366
  return self._decode_final_dets(output, ratio, pad, orig_size)
367
  if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] == 6:
 
375
  raise TypeError(f"Input is not numpy array: {type(image)}")
376
  if image.ndim != 3:
377
  raise ValueError(f"Expected HWC image, got shape={image.shape}")
 
 
378
  if image.shape[2] != 3:
379
  raise ValueError(f"Expected 3 channels, got shape={image.shape}")
380
  if image.dtype != np.uint8:
 
390
  outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
391
  return self._postprocess(outputs[0], ratio, pad, orig_size)
392
 
393
+ def predict_batch(self, batch_images: list[ndarray], offset: int,
394
+ n_keypoints: int) -> list[TVFrameResult]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
395
  results: list[TVFrameResult] = []
396
  for frame_number_in_batch, image in enumerate(batch_images):
397
  try:
398
+ boxes = self._predict_single(image)
 
 
 
399
  except Exception as e:
400
+ print(f"Inference failed for frame {offset + frame_number_in_batch}: {e}")
 
 
 
401
  boxes = []
402
  results.append(
403
  TVFrameResult(
pyproject.toml CHANGED
@@ -5,14 +5,13 @@ requires-python = ">=3.9"
5
 
6
  dependencies = [
7
  "numpy>=1.23",
8
- "onnxruntime>=1.16",
9
  "opencv-python>=4.7",
10
  "pillow>=9.5",
11
  "huggingface_hub>=0.19.4",
12
  "pydantic>=2.0",
13
  "pyyaml>=6.0",
14
  "aiohttp>=3.9",
15
- "torch==2.8.0",
16
- "torchvision==0.23.0",
17
- "torchaudio==2.8.0",
18
  ]
 
5
 
6
  dependencies = [
7
  "numpy>=1.23",
8
+ "onnxruntime[cuda,cudnn]>=1.16",
9
  "opencv-python>=4.7",
10
  "pillow>=9.5",
11
  "huggingface_hub>=0.19.4",
12
  "pydantic>=2.0",
13
  "pyyaml>=6.0",
14
  "aiohttp>=3.9",
15
+ "torch",
16
+ "torchvision",
 
17
  ]
readme.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - element_type:detect
4
+ - model:onnxruntime
5
+ - subnet:winner
6
+ - object:fire
7
+ - object:smoke
8
+ - object:fire extinguisher
9
+
10
+ manako:
11
+ source: winner_fetch
12
+ manifest_element_name: manak0/Detect-fire
13
+ winner_repo_id: navierstocks/flame
14
+ winner_revision: ab3805fb6c7189ecdd1f0e8ed07aaf02a6b5b7b1
15
+ ---
16
+
17
+ ## YOLO26 ONNX detector
weights.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7c65835fbffb2923b4b76531d5a343544ffd6018cdca67859f830f27d74b09
3
- size 19407317
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a7d34f742b87de31e77f775f001de473a07f86723234b34f1bbcf3197a16f2
3
+ size 19407447