aitask1024 commited on
Commit
82aba8a
·
verified ·
1 Parent(s): 1d49ff0
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
__pycache__/miner.cpython-310.pyc DELETED
Binary file (17.2 kB)
 
chute_config.yml CHANGED
@@ -2,20 +2,16 @@ Image:
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
- - pip install 'numpy>=1.23' 'onnxruntime-gpu>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
- - pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu128
 
7
 
8
  NodeSelector:
9
  gpu_count: 1
10
- min_vram_gb_per_gpu: 16
11
- max_hourly_price_per_gpu: 0.5
12
-
13
- # Required for integrated SN44 chutes (TEE policy enforced as of 2026-04-27).
14
  include:
15
  - pro_6000
16
 
17
  Chute:
18
- # Required for integrated SN44 chutes (TEE policy enforced as of 2026-04-27).
19
  tee: true
20
  timeout_seconds: 900
21
  concurrency: 4
 
2
  from_base: parachutes/python:3.12
3
  run_command:
4
  - pip install --upgrade setuptools wheel
5
+ - pip install 'numpy>=1.23' 'onnxruntime-gpu[cuda,cudnn]>=1.16' 'opencv-python>=4.7' 'pillow>=9.5' 'huggingface_hub>=0.19.4' 'pydantic>=2.0' 'pyyaml>=6.0' 'aiohttp>=3.9'
6
+ - pip install torch torchvision
7
+ set_workdir: /app
8
 
9
  NodeSelector:
10
  gpu_count: 1
 
 
 
 
11
  include:
12
  - pro_6000
13
 
14
  Chute:
 
15
  tee: true
16
  timeout_seconds: 900
17
  concurrency: 4
class_names.txt CHANGED
@@ -1,4 +1,4 @@
1
  petrol hose
2
  petrol pump
3
  price board
4
- roof canopy
 
1
  petrol hose
2
  petrol pump
3
  price board
4
+ roof canopy
miner.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from pathlib import Path
2
  import math
3
 
@@ -22,32 +23,18 @@ class TVFrameResult(BaseModel):
22
  boxes: list[BoundingBox]
23
  keypoints: list[tuple[int, int]]
24
 
25
- SIZE = 1280
26
- TARGET_CLASS_NAMES = ["petrol hose", "petrol pump", "price board", "roof canopy"]
27
-
28
 
29
  class Miner:
 
 
 
 
30
  def __init__(self, path_hf_repo: Path) -> None:
31
- model_path = path_hf_repo / "weights.onnx"
32
- cn_path = model_path.with_name("class_names.txt")
33
- self.class_names = TARGET_CLASS_NAMES.copy()
34
- if cn_path.is_file():
35
- lines = cn_path.read_text(encoding="utf-8").splitlines()
36
- model_class_order = [
37
- ln.strip()
38
- for ln in lines
39
- if ln.strip() and not ln.strip().startswith("#")
40
- ]
41
- if len(model_class_order) == len(self.class_names) and set(model_class_order) == set(self.class_names):
42
- self.cls_remap = np.array(
43
- [self.class_names.index(n) for n in model_class_order], dtype=np.int32
44
- )
45
- else:
46
- # If class_names.txt is missing/invalid for this target order, keep identity mapping.
47
- self.cls_remap = np.arange(len(self.class_names), dtype=np.int32)
48
- else:
49
- # Fallback when no class_names.txt is present: assume ONNX class order == target order.
50
- self.cls_remap = np.arange(len(self.class_names), dtype=np.int32)
51
  print("ORT version:", ort.__version__)
52
 
53
  try:
@@ -88,21 +75,26 @@ class Miner:
88
  self.output_names = [output.name for output in self.session.get_outputs()]
89
  self.input_shape = self.session.get_inputs()[0].shape
90
 
91
- self.input_height = self._safe_dim(self.input_shape[2], default=SIZE)
92
- self.input_width = self._safe_dim(self.input_shape[3], default=SIZE)
 
 
 
 
 
93
 
94
- self.conf_thres = 0.5
95
- self.iou_thres = 0.45
96
- self.max_det = 30
97
- self.use_tta = True
98
 
99
- print(f"✅ ONNX model loaded from: {model_path}")
100
  print(f"✅ ONNX providers: {self.session.get_providers()}")
101
  print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
 
102
 
103
  def __repr__(self) -> str:
104
  return (
105
- f"ONNXRuntime(session={type(self.session).__name__}, "
106
  f"providers={self.session.get_providers()})"
107
  )
108
 
@@ -116,13 +108,6 @@ class Miner:
116
  new_shape: tuple[int, int],
117
  color=(114, 114, 114),
118
  ) -> tuple[ndarray, float, tuple[float, float]]:
119
- """
120
- Resize with unchanged aspect ratio and pad to target shape.
121
- Returns:
122
- padded_image,
123
- ratio,
124
- (pad_w, pad_h) # half-padding
125
- """
126
  h, w = image.shape[:2]
127
  new_w, new_h = new_shape
128
 
@@ -158,14 +143,6 @@ class Miner:
158
  def _preprocess(
159
  self, image: ndarray
160
  ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
161
- """
162
- Preprocess for fixed-size ONNX export:
163
- - enhance image quality (CLAHE, denoise, sharpen)
164
- - letterbox to model input size
165
- - BGR -> RGB
166
- - normalize to [0,1]
167
- - HWC -> NCHW float32
168
- """
169
  orig_h, orig_w = image.shape[:2]
170
 
171
  img, ratio, pad = self._letterbox(
@@ -196,230 +173,195 @@ class Miner:
196
  out[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
197
  return out
198
 
199
- def _soft_nms(
200
- self,
201
  boxes: np.ndarray,
202
  scores: np.ndarray,
203
- sigma: float = 0.5,
204
- score_thresh: float = 0.01,
205
- ) -> tuple[np.ndarray, np.ndarray]:
206
- """
207
- Soft-NMS: Gaussian decay of overlapping scores instead of hard removal.
208
- Returns (kept_original_indices, updated_scores).
209
- """
210
- N = len(boxes)
211
- if N == 0:
212
- return np.array([], dtype=np.intp), np.array([], dtype=np.float32)
213
-
214
- boxes = boxes.astype(np.float32, copy=True)
215
- scores = scores.astype(np.float32, copy=True)
216
- order = np.arange(N)
217
 
218
- for i in range(N):
219
- max_pos = i + int(np.argmax(scores[i:]))
220
- boxes[[i, max_pos]] = boxes[[max_pos, i]]
221
- scores[[i, max_pos]] = scores[[max_pos, i]]
222
- order[[i, max_pos]] = order[[max_pos, i]]
223
 
224
- if i + 1 >= N:
 
 
 
225
  break
226
 
227
- xx1 = np.maximum(boxes[i, 0], boxes[i + 1:, 0])
228
- yy1 = np.maximum(boxes[i, 1], boxes[i + 1:, 1])
229
- xx2 = np.minimum(boxes[i, 2], boxes[i + 1:, 2])
230
- yy2 = np.minimum(boxes[i, 3], boxes[i + 1:, 3])
 
 
 
231
  inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
232
 
233
- area_i = max(0.0, float(
234
- (boxes[i, 2] - boxes[i, 0]) * (boxes[i, 3] - boxes[i, 1])
235
- ))
236
- areas_j = (
237
- np.maximum(0.0, boxes[i + 1:, 2] - boxes[i + 1:, 0])
238
- * np.maximum(0.0, boxes[i + 1:, 3] - boxes[i + 1:, 1])
239
- )
240
- iou = inter / (area_i + areas_j - inter + 1e-7)
241
- scores[i + 1:] *= np.exp(-(iou ** 2) / sigma)
242
 
243
- mask = scores > score_thresh
244
- return order[mask], scores[mask]
245
 
246
- @staticmethod
247
- def _hard_nms(
 
 
 
248
  boxes: np.ndarray,
249
  scores: np.ndarray,
 
250
  iou_thresh: float,
 
251
  ) -> np.ndarray:
252
- """
253
- Standard NMS: keep one box per overlapping cluster (the one with highest score).
254
- Returns indices of kept boxes (into the boxes/scores arrays).
255
- """
256
- N = len(boxes)
257
- if N == 0:
258
  return np.array([], dtype=np.intp)
259
- boxes = np.asarray(boxes, dtype=np.float32)
260
- scores = np.asarray(scores, dtype=np.float32)
261
- order = np.argsort(scores)[::-1]
262
- keep: list[int] = []
263
- suppressed = np.zeros(N, dtype=bool)
264
- for i in range(N):
265
- idx = order[i]
266
- if suppressed[idx]:
267
  continue
268
- keep.append(idx)
269
- bi = boxes[idx]
270
- for k in range(i + 1, N):
271
- jdx = order[k]
272
- if suppressed[jdx]:
273
- continue
274
- bj = boxes[jdx]
275
- xx1 = max(bi[0], bj[0])
276
- yy1 = max(bi[1], bj[1])
277
- xx2 = min(bi[2], bj[2])
278
- yy2 = min(bi[3], bj[3])
279
- inter = max(0.0, xx2 - xx1) * max(0.0, yy2 - yy1)
280
- area_i = (bi[2] - bi[0]) * (bi[3] - bi[1])
281
- area_j = (bj[2] - bj[0]) * (bj[3] - bj[1])
282
- iou = inter / (area_i + area_j - inter + 1e-7)
283
- if iou > iou_thresh:
284
- suppressed[jdx] = True
285
- return np.array(keep)
286
 
287
  @staticmethod
288
- def _max_score_per_cluster(
289
- coords: np.ndarray,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  scores: np.ndarray,
291
- keep_indices: np.ndarray,
292
- iou_thresh: float,
293
- ) -> np.ndarray:
 
 
 
 
 
 
294
  """
295
- For each kept box, return the max original score among itself and any
296
- box that overlaps it with IOU >= iou_thresh (so TTA cluster keeps best conf).
297
- """
298
- n_keep = len(keep_indices)
299
- if n_keep == 0:
300
- return np.array([], dtype=np.float32)
301
- out = np.empty(n_keep, dtype=np.float32)
302
- coords = np.asarray(coords, dtype=np.float32)
303
- scores = np.asarray(scores, dtype=np.float32)
304
- for i in range(n_keep):
305
- idx = keep_indices[i]
306
- bi = coords[idx]
307
- xx1 = np.maximum(bi[0], coords[:, 0])
308
- yy1 = np.maximum(bi[1], coords[:, 1])
309
- xx2 = np.minimum(bi[2], coords[:, 2])
310
- yy2 = np.minimum(bi[3], coords[:, 3])
311
- inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
312
- area_i = (bi[2] - bi[0]) * (bi[3] - bi[1])
313
- areas_j = (coords[:, 2] - coords[:, 0]) * (coords[:, 3] - coords[:, 1])
314
- iou = inter / (area_i + areas_j - inter + 1e-7)
315
- in_cluster = iou >= iou_thresh
316
- out[i] = float(np.max(scores[in_cluster]))
317
- return out
318
-
319
- def _decode_final_dets(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
320
  self,
321
  preds: np.ndarray,
322
  ratio: float,
323
  pad: tuple[float, float],
324
  orig_size: tuple[int, int],
325
- apply_optional_dedup: bool = False,
326
  ) -> list[BoundingBox]:
327
  """
328
- Primary path:
329
- expected output rows like [x1, y1, x2, y2, conf, cls_id]
330
- in letterboxed input coordinates.
331
- """
332
- if preds.ndim == 3 and preds.shape[0] == 1:
333
- preds = preds[0]
334
-
335
- if preds.ndim != 2 or preds.shape[1] < 6:
336
- raise ValueError(f"Unexpected ONNX final-det output shape: {preds.shape}")
337
-
338
- boxes = preds[:, :4].astype(np.float32)
339
- scores = preds[:, 4].astype(np.float32)
340
- cls_ids = preds[:, 5].astype(np.int32)
341
- cls_ids = self.cls_remap[np.clip(cls_ids, 0, len(self.cls_remap) - 1)]
342
-
343
- keep = scores >= self.conf_thres
344
- boxes = boxes[keep]
345
- scores = scores[keep]
346
- cls_ids = cls_ids[keep]
347
-
348
- if len(boxes) == 0:
349
- return []
350
-
351
- pad_w, pad_h = pad
352
- orig_w, orig_h = orig_size
353
-
354
- # reverse letterbox
355
- boxes[:, [0, 2]] -= pad_w
356
- boxes[:, [1, 3]] -= pad_h
357
- boxes /= ratio
358
- boxes = self._clip_boxes(boxes, (orig_w, orig_h))
359
-
360
- if apply_optional_dedup and len(boxes) > 1:
361
- keep_idx, scores = self._soft_nms(boxes, scores)
362
- boxes = boxes[keep_idx]
363
- cls_ids = cls_ids[keep_idx]
364
-
365
- results: list[BoundingBox] = []
366
- for box, conf, cls_id in zip(boxes, scores, cls_ids):
367
- x1, y1, x2, y2 = box.tolist()
368
-
369
- if x2 <= x1 or y2 <= y1:
370
- continue
371
-
372
- results.append(
373
- BoundingBox(
374
- x1=int(math.floor(x1)),
375
- y1=int(math.floor(y1)),
376
- x2=int(math.ceil(x2)),
377
- y2=int(math.ceil(y2)),
378
- cls_id=int(cls_id),
379
- conf=float(conf),
380
- )
381
- )
382
-
383
- return results
384
 
385
- def _decode_raw_yolo(
386
- self,
387
- preds: np.ndarray,
388
- ratio: float,
389
- pad: tuple[float, float],
390
- orig_size: tuple[int, int],
391
- ) -> list[BoundingBox]:
392
- """
393
- Fallback path for raw YOLO predictions.
394
- Supports common layouts:
395
- - [1, C, N]
396
- - [1, N, C]
397
  """
398
- if preds.ndim != 3:
399
- raise ValueError(f"Unexpected raw ONNX output shape: {preds.shape}")
400
-
401
- if preds.shape[0] != 1:
402
- raise ValueError(f"Unexpected batch dimension in raw output: {preds.shape}")
403
 
404
  preds = preds[0]
405
 
406
- # Normalize to [N, C]
407
- if preds.shape[0] <= 16 and preds.shape[1] > preds.shape[0]:
 
 
408
  preds = preds.T
 
 
 
 
409
 
410
  if preds.ndim != 2 or preds.shape[1] < 5:
411
- raise ValueError(f"Unexpected normalized raw output shape: {preds.shape}")
412
 
413
  boxes_xywh = preds[:, :4].astype(np.float32)
414
- cls_part = preds[:, 4:].astype(np.float32)
415
 
416
- if cls_part.shape[1] == 1:
417
- scores = cls_part[:, 0]
418
- cls_ids = np.zeros(len(scores), dtype=np.int32)
419
- else:
420
- cls_ids = np.argmax(cls_part, axis=1).astype(np.int32)
421
- scores = cls_part[np.arange(len(cls_part)), cls_ids]
422
- cls_ids = self.cls_remap[np.clip(cls_ids, 0, len(self.cls_remap) - 1)]
423
 
424
  keep = scores >= self.conf_thres
425
  boxes_xywh = boxes_xywh[keep]
@@ -430,12 +372,6 @@ class Miner:
430
  return []
431
 
432
  boxes = self._xywh_to_xyxy(boxes_xywh)
433
- keep_idx, scores = self._soft_nms(boxes, scores)
434
- keep_idx = keep_idx[: self.max_det]
435
- scores = scores[: self.max_det]
436
-
437
- boxes = boxes[keep_idx]
438
- cls_ids = cls_ids[keep_idx]
439
 
440
  pad_w, pad_h = pad
441
  orig_w, orig_h = orig_size
@@ -445,47 +381,33 @@ class Miner:
445
  boxes /= ratio
446
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
447
 
448
- results: list[BoundingBox] = []
449
- for box, conf, cls_id in zip(boxes, scores, cls_ids):
450
- x1, y1, x2, y2 = box.tolist()
451
-
452
- if x2 <= x1 or y2 <= y1:
453
- continue
454
-
455
- results.append(
456
- BoundingBox(
457
- x1=int(math.floor(x1)),
458
- y1=int(math.floor(y1)),
459
- x2=int(math.ceil(x2)),
460
- y2=int(math.ceil(y2)),
461
- cls_id=int(cls_id),
462
- conf=float(conf),
463
- )
464
- )
465
-
466
- return results
467
 
468
- def _postprocess(
469
- self,
470
- output: np.ndarray,
471
- ratio: float,
472
- pad: tuple[float, float],
473
- orig_size: tuple[int, int],
474
- ) -> list[BoundingBox]:
475
- """
476
- Prefer final detections first.
477
- Fallback to raw decode only if needed.
478
- """
479
- # final detections: [N,6]
480
- if output.ndim == 2 and output.shape[1] >= 6:
481
- return self._decode_final_dets(output, ratio, pad, orig_size)
482
 
483
- # final detections: [1,N,6]
484
- if output.ndim == 3 and output.shape[0] == 1 and output.shape[2] == 6:
485
- return self._decode_final_dets(output, ratio, pad, orig_size)
 
 
 
486
 
487
- # fallback raw decode
488
- return self._decode_raw_yolo(output, ratio, pad, orig_size)
 
 
 
 
 
 
 
 
 
 
489
 
490
  def _predict_single(self, image: np.ndarray) -> list[BoundingBox]:
491
  if image is None:
@@ -512,51 +434,7 @@ class Miner:
512
 
513
  outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
514
  det_output = outputs[0]
515
- return self._postprocess(det_output, ratio, pad, orig_size)
516
-
517
- def _predict_tta(self, image: np.ndarray) -> list[BoundingBox]:
518
- """Horizontal-flip TTA: merge original + flipped via hard NMS."""
519
- boxes_orig = self._predict_single(image)
520
-
521
- flipped = cv2.flip(image, 1)
522
- boxes_flip = self._predict_single(flipped)
523
-
524
- w = image.shape[1]
525
- boxes_flip = [
526
- BoundingBox(
527
- x1=w - b.x2, y1=b.y1, x2=w - b.x1, y2=b.y2,
528
- cls_id=b.cls_id, conf=b.conf,
529
- )
530
- for b in boxes_flip
531
- ]
532
-
533
- all_boxes = boxes_orig + boxes_flip
534
- if len(all_boxes) == 0:
535
- return []
536
-
537
- coords = np.array(
538
- [[b.x1, b.y1, b.x2, b.y2] for b in all_boxes], dtype=np.float32
539
- )
540
- scores = np.array([b.conf for b in all_boxes], dtype=np.float32)
541
-
542
- hard_keep = self._hard_nms(coords, scores, self.iou_thres)
543
- if len(hard_keep) == 0:
544
- return []
545
-
546
- # _hard_nms already orders kept indices by descending score.
547
- hard_keep = hard_keep[: self.max_det]
548
-
549
- return [
550
- BoundingBox(
551
- x1=all_boxes[i].x1,
552
- y1=all_boxes[i].y1,
553
- x2=all_boxes[i].x2,
554
- y2=all_boxes[i].y2,
555
- cls_id=all_boxes[i].cls_id,
556
- conf=float(scores[i]),
557
- )
558
- for i in hard_keep
559
- ]
560
 
561
  def predict_batch(
562
  self,
@@ -564,81 +442,113 @@ class Miner:
564
  offset: int,
565
  n_keypoints: int,
566
  ) -> list[TVFrameResult]:
 
 
 
 
 
 
 
567
  results: list[TVFrameResult] = []
 
568
 
569
  for frame_number_in_batch, image in enumerate(batch_images):
 
570
  try:
571
- if self.use_tta:
572
- boxes = self._predict_tta(image)
573
- else:
574
- boxes = self._predict_single(image)
575
  except Exception as e:
576
- print(f"⚠️ Inference failed for frame {offset + frame_number_in_batch}: {e}")
577
  boxes = []
578
- # for box in boxes:
579
- # if box.cls_id == 2:
580
- # box.cls_id = 3
581
- # elif box.cls_id == 3:
582
- # box.cls_id = 2
583
-
584
-
585
-
586
  results.append(
587
  TVFrameResult(
588
- frame_id=offset + frame_number_in_batch,
589
  boxes=boxes,
590
- keypoints=[(0, 0) for _ in range(max(0, int(n_keypoints)))],
591
  )
592
  )
593
 
 
594
  return results
595
-
596
 
597
- if __name__ == "__main__":
598
- # Simple manual test: load weights.onnx, run on 1.png, and draw bboxes
599
- repo_dir = Path(__file__).parent
600
- miner = Miner(repo_dir)
601
-
602
- image_path = repo_dir / "car1.png"
603
- if not image_path.exists():
604
- raise FileNotFoundError(f"Test image not found: {image_path}")
605
-
606
- image = cv2.imread(str(image_path), cv2.IMREAD_COLOR)
607
- if image is None:
608
- raise RuntimeError(f"Failed to read image: {image_path}")
609
-
610
- results = miner.predict_batch([image], offset=0, n_keypoints=0)
611
- # Draw bounding boxes on a copy of the image
612
- vis = image.copy()
613
- colors = [(0, 255, 0), (0, 0, 255), (255, 0, 0)]
614
- for frame in results:
615
- print(f"Frame {frame.frame_id}:")
616
- for i, box in enumerate(frame.boxes):
617
- color = colors[i % len(colors)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  cv2.rectangle(
619
- vis,
620
- (box.x1, box.y1),
621
- (box.x2, box.y2),
622
- color,
623
- 2,
624
  )
625
- label = f"{box.cls_id }_{miner.class_names[box.cls_id] if box.cls_id < len(miner.class_names) else box.cls_id}:{box.conf:.2f}"
626
  cv2.putText(
627
- vis,
628
- label,
629
- (box.x1, max(0, box.y1 - 5)),
630
- cv2.FONT_HERSHEY_SIMPLEX,
631
- box.conf,
632
- color,
633
- 1,
634
- cv2.LINE_AA,
635
  )
636
- print(
637
- f" cls={box.cls_id} conf={box.conf:.3f} "
638
- f"box=({box.x1},{box.y1},{box.x2},{box.y2})"
639
- )
640
- print(len(frame.boxes))
641
 
642
- out_path = repo_dir / f"1_out_iou{miner.iou_thres:.2f}.png"
643
- cv2.imwrite(str(out_path), vis)
644
- print(f"Saved visualization to: {out_path}")
 
 
 
 
 
 
 
 
 
 
1
+
2
  from pathlib import Path
3
  import math
4
 
 
23
  boxes: list[BoundingBox]
24
  keypoints: list[tuple[int, int]]
25
 
 
 
 
26
 
27
  class Miner:
28
+ """ONNX-backed petrol-tracking miner with canopy union-merge post-process."""
29
+
30
+ CANOPY_CLS = 3
31
+
32
  def __init__(self, path_hf_repo: Path) -> None:
33
+ model_path = path_hf_repo / "petrol.onnx"
34
+
35
+ # Class order as exported from the training pt: must match model.names
36
+ self.class_names = ["petrol hose", "petrol pump", "price board", "roof canopy"]
37
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  print("ORT version:", ort.__version__)
39
 
40
  try:
 
75
  self.output_names = [output.name for output in self.session.get_outputs()]
76
  self.input_shape = self.session.get_inputs()[0].shape
77
 
78
+ self.input_height = self._safe_dim(self.input_shape[2], default=640)
79
+ self.input_width = self._safe_dim(self.input_shape[3], default=640)
80
+
81
+ # Thresholds
82
+ self.conf_thres = 0.38
83
+ self.iou_thres = 0.50
84
+ self.max_det = 300
85
 
86
+ # Canopy union-merge: same-class IoU above this triggers a union merge
87
+ # for class 3 only (roof canopy). Set to 0 to disable.
88
+ self.canopy_merge_iou = 0.30
 
89
 
90
+ print(f"✅ Petrol ONNX model loaded from: {model_path}")
91
  print(f"✅ ONNX providers: {self.session.get_providers()}")
92
  print(f"✅ ONNX input: name={self.input_name}, shape={self.input_shape}")
93
+ print(f"✅ Canopy merge IoU: {self.canopy_merge_iou}")
94
 
95
  def __repr__(self) -> str:
96
  return (
97
+ f"Petrol ONNXRuntime(session={type(self.session).__name__}, "
98
  f"providers={self.session.get_providers()})"
99
  )
100
 
 
108
  new_shape: tuple[int, int],
109
  color=(114, 114, 114),
110
  ) -> tuple[ndarray, float, tuple[float, float]]:
 
 
 
 
 
 
 
111
  h, w = image.shape[:2]
112
  new_w, new_h = new_shape
113
 
 
143
  def _preprocess(
144
  self, image: ndarray
145
  ) -> tuple[np.ndarray, float, tuple[float, float], tuple[int, int]]:
 
 
 
 
 
 
 
 
146
  orig_h, orig_w = image.shape[:2]
147
 
148
  img, ratio, pad = self._letterbox(
 
173
  out[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
174
  return out
175
 
176
+ @staticmethod
177
+ def _hard_nms(
178
  boxes: np.ndarray,
179
  scores: np.ndarray,
180
+ iou_thresh: float,
181
+ ) -> np.ndarray:
182
+ if len(boxes) == 0:
183
+ return np.array([], dtype=np.intp)
 
 
 
 
 
 
 
 
 
 
184
 
185
+ boxes = np.asarray(boxes, dtype=np.float32)
186
+ scores = np.asarray(scores, dtype=np.float32)
187
+ order = np.argsort(scores)[::-1]
188
+ keep = []
 
189
 
190
+ while len(order) > 0:
191
+ i = order[0]
192
+ keep.append(i)
193
+ if len(order) == 1:
194
  break
195
 
196
+ rest = order[1:]
197
+
198
+ xx1 = np.maximum(boxes[i, 0], boxes[rest, 0])
199
+ yy1 = np.maximum(boxes[i, 1], boxes[rest, 1])
200
+ xx2 = np.minimum(boxes[i, 2], boxes[rest, 2])
201
+ yy2 = np.minimum(boxes[i, 3], boxes[rest, 3])
202
+
203
  inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
204
 
205
+ area_i = max(0.0, (boxes[i, 2] - boxes[i, 0])) * max(0.0, (boxes[i, 3] - boxes[i, 1]))
206
+ area_r = np.maximum(0.0, boxes[rest, 2] - boxes[rest, 0]) * np.maximum(0.0, boxes[rest, 3] - boxes[rest, 1])
 
 
 
 
 
 
 
207
 
208
+ iou = inter / (area_i + area_r - inter + 1e-7)
209
+ order = rest[iou <= iou_thresh]
210
 
211
+ return np.array(keep, dtype=np.intp)
212
+
213
+ @classmethod
214
+ def _nms_per_class(
215
+ cls,
216
  boxes: np.ndarray,
217
  scores: np.ndarray,
218
+ cls_ids: np.ndarray,
219
  iou_thresh: float,
220
+ max_det: int,
221
  ) -> np.ndarray:
222
+ if len(boxes) == 0:
 
 
 
 
 
223
  return np.array([], dtype=np.intp)
224
+ keep_all: list[int] = []
225
+ for c in np.unique(cls_ids):
226
+ idxs = np.nonzero(cls_ids == c)[0]
227
+ if len(idxs) == 0:
 
 
 
 
228
  continue
229
+ local_keep = cls._hard_nms(boxes[idxs], scores[idxs], iou_thresh)
230
+ keep_all.extend(idxs[local_keep].tolist())
231
+ keep_all_arr = np.array(keep_all, dtype=np.intp)
232
+ order = np.argsort(scores[keep_all_arr])[::-1]
233
+ return keep_all_arr[order[:max_det]]
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
  @staticmethod
236
+ def _pairwise_iou(boxes: np.ndarray) -> np.ndarray:
237
+ """N×N IoU matrix for an [N,4] xyxy array."""
238
+ n = len(boxes)
239
+ if n == 0:
240
+ return np.zeros((0, 0), dtype=np.float32)
241
+ x1 = boxes[:, 0]; y1 = boxes[:, 1]
242
+ x2 = boxes[:, 2]; y2 = boxes[:, 3]
243
+ area = np.maximum(0.0, x2 - x1) * np.maximum(0.0, y2 - y1)
244
+
245
+ ix1 = np.maximum(x1[:, None], x1[None, :])
246
+ iy1 = np.maximum(y1[:, None], y1[None, :])
247
+ ix2 = np.minimum(x2[:, None], x2[None, :])
248
+ iy2 = np.minimum(y2[:, None], y2[None, :])
249
+ iw = np.maximum(0.0, ix2 - ix1)
250
+ ih = np.maximum(0.0, iy2 - iy1)
251
+ inter = iw * ih
252
+ union = area[:, None] + area[None, :] - inter
253
+ with np.errstate(divide="ignore", invalid="ignore"):
254
+ iou = np.where(union > 0, inter / union, 0.0)
255
+ np.fill_diagonal(iou, 0.0)
256
+ return iou.astype(np.float32)
257
+
258
+ def _union_merge_class(
259
+ self,
260
+ boxes: np.ndarray,
261
  scores: np.ndarray,
262
+ cls_ids: np.ndarray,
263
+ target_cls: int,
264
+ merge_iou: float,
265
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
266
+ """Greedy union-merge for one class.
267
+
268
+ For boxes whose cls == target_cls, repeatedly fuse pairs whose IoU
269
+ exceeds `merge_iou`: replace them with the bounding-rectangle union
270
+ (max conf). Other classes are passed through unchanged.
271
  """
272
+ if merge_iou <= 0 or len(boxes) == 0:
273
+ return boxes, scores, cls_ids
274
+
275
+ mask = cls_ids == target_cls
276
+ if mask.sum() < 2:
277
+ return boxes, scores, cls_ids
278
+
279
+ tgt_boxes = boxes[mask].astype(np.float32).copy()
280
+ tgt_scores = scores[mask].astype(np.float32).copy()
281
+
282
+ # Greedy merge: highest-conf box anchors each round; absorb all
283
+ # others above the IoU threshold; repeat until stable.
284
+ changed = True
285
+ while changed and len(tgt_boxes) > 1:
286
+ changed = False
287
+ order = np.argsort(tgt_scores)[::-1]
288
+ tgt_boxes = tgt_boxes[order]
289
+ tgt_scores = tgt_scores[order]
290
+
291
+ iou = self._pairwise_iou(tgt_boxes)
292
+ consumed = np.zeros(len(tgt_boxes), dtype=bool)
293
+ new_boxes: list[np.ndarray] = []
294
+ new_scores: list[float] = []
295
+ for i in range(len(tgt_boxes)):
296
+ if consumed[i]:
297
+ continue
298
+ cur = tgt_boxes[i].copy()
299
+ cur_s = float(tgt_scores[i])
300
+ for j in range(i + 1, len(tgt_boxes)):
301
+ if consumed[j]:
302
+ continue
303
+ if iou[i, j] > merge_iou:
304
+ cur = np.array([
305
+ min(cur[0], tgt_boxes[j, 0]),
306
+ min(cur[1], tgt_boxes[j, 1]),
307
+ max(cur[2], tgt_boxes[j, 2]),
308
+ max(cur[3], tgt_boxes[j, 3]),
309
+ ], dtype=np.float32)
310
+ cur_s = max(cur_s, float(tgt_scores[j]))
311
+ consumed[j] = True
312
+ changed = True
313
+ new_boxes.append(cur)
314
+ new_scores.append(cur_s)
315
+ tgt_boxes = np.stack(new_boxes, axis=0)
316
+ tgt_scores = np.array(new_scores, dtype=np.float32)
317
+
318
+ # Stitch results back together with non-target classes
319
+ other_boxes = boxes[~mask]
320
+ other_scores = scores[~mask]
321
+ other_cls = cls_ids[~mask]
322
+
323
+ merged_cls = np.full(len(tgt_boxes), target_cls, dtype=cls_ids.dtype)
324
+ out_boxes = np.concatenate([other_boxes, tgt_boxes], axis=0)
325
+ out_scores = np.concatenate([other_scores, tgt_scores], axis=0)
326
+ out_cls = np.concatenate([other_cls, merged_cls], axis=0)
327
+ return out_boxes, out_scores, out_cls
328
+
329
+ def _decode_yolov8(
330
  self,
331
  preds: np.ndarray,
332
  ratio: float,
333
  pad: tuple[float, float],
334
  orig_size: tuple[int, int],
 
335
  ) -> list[BoundingBox]:
336
  """
337
+ Decode a raw YOLOv8-style ONNX detection output.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
+ Expected shape: [1, 4 + nc, num_boxes] (no objectness channel).
340
+ Some exporters emit [1, num_boxes, 4 + nc]; both are handled.
 
 
 
 
 
 
 
 
 
 
341
  """
342
+ if preds.ndim != 3 or preds.shape[0] != 1:
343
+ raise ValueError(f"Unexpected ONNX output shape: {preds.shape}")
 
 
 
344
 
345
  preds = preds[0]
346
 
347
+ # Normalize to [N, C] where C = 4 + nc
348
+ nc = len(self.class_names)
349
+ expected_c = 4 + nc
350
+ if preds.shape[0] == expected_c:
351
  preds = preds.T
352
+ elif preds.shape[1] != expected_c:
353
+ # Fall back: treat smaller dim as channels
354
+ if preds.shape[0] < preds.shape[1]:
355
+ preds = preds.T
356
 
357
  if preds.ndim != 2 or preds.shape[1] < 5:
358
+ raise ValueError(f"Unexpected normalized output shape: {preds.shape}")
359
 
360
  boxes_xywh = preds[:, :4].astype(np.float32)
361
+ class_probs = preds[:, 4:].astype(np.float32)
362
 
363
+ cls_ids = np.argmax(class_probs, axis=1).astype(np.int32)
364
+ scores = class_probs[np.arange(len(class_probs)), cls_ids]
 
 
 
 
 
365
 
366
  keep = scores >= self.conf_thres
367
  boxes_xywh = boxes_xywh[keep]
 
372
  return []
373
 
374
  boxes = self._xywh_to_xyxy(boxes_xywh)
 
 
 
 
 
 
375
 
376
  pad_w, pad_h = pad
377
  orig_w, orig_h = orig_size
 
381
  boxes /= ratio
382
  boxes = self._clip_boxes(boxes, (orig_w, orig_h))
383
 
384
+ keep_idx = self._nms_per_class(
385
+ boxes, scores, cls_ids, self.iou_thres, self.max_det
386
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
387
 
388
+ boxes = boxes[keep_idx]
389
+ scores = scores[keep_idx]
390
+ cls_ids = cls_ids[keep_idx]
 
 
 
 
 
 
 
 
 
 
 
391
 
392
+ # Class-3 union-merge: rejoin half-canopy splits into one box.
393
+ boxes, scores, cls_ids = self._union_merge_class(
394
+ boxes, scores, cls_ids,
395
+ target_cls=self.CANOPY_CLS,
396
+ merge_iou=self.canopy_merge_iou,
397
+ )
398
 
399
+ return [
400
+ BoundingBox(
401
+ x1=int(math.floor(box[0])),
402
+ y1=int(math.floor(box[1])),
403
+ x2=int(math.ceil(box[2])),
404
+ y2=int(math.ceil(box[3])),
405
+ cls_id=int(cls_id),
406
+ conf=float(conf),
407
+ )
408
+ for box, conf, cls_id in zip(boxes, scores, cls_ids)
409
+ if box[2] > box[0] and box[3] > box[1]
410
+ ]
411
 
412
  def _predict_single(self, image: np.ndarray) -> list[BoundingBox]:
413
  if image is None:
 
434
 
435
  outputs = self.session.run(self.output_names, {self.input_name: input_tensor})
436
  det_output = outputs[0]
437
+ return self._decode_yolov8(det_output, ratio, pad, orig_size)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
  def predict_batch(
440
  self,
 
442
  offset: int,
443
  n_keypoints: int,
444
  ) -> list[TVFrameResult]:
445
+ """
446
+ Miner prediction for a batch of images using ONNX Runtime.
447
+
448
+ The petrol detector is a plain object-detection model (no pose),
449
+ so keypoints are returned as `n_keypoints` padding entries of (0, 0)
450
+ to keep the TVFrameResult schema stable across challenge types.
451
+ """
452
  results: list[TVFrameResult] = []
453
+ n_kp = max(0, int(n_keypoints))
454
 
455
  for frame_number_in_batch, image in enumerate(batch_images):
456
+ frame_idx = offset + frame_number_in_batch
457
  try:
458
+ boxes = self._predict_single(image)
 
 
 
459
  except Exception as e:
460
+ print(f"⚠️ Inference failed for frame {frame_idx}: {e}")
461
  boxes = []
462
+
 
 
 
 
 
 
 
463
  results.append(
464
  TVFrameResult(
465
+ frame_id=frame_idx,
466
  boxes=boxes,
467
+ keypoints=[(0, 0) for _ in range(n_kp)],
468
  )
469
  )
470
 
471
+ print("✅ Petrol ONNX predictions complete")
472
  return results
 
473
 
474
+
475
+ def main() -> None:
476
+ """Example runner — same CLI as miner.py for direct A/B comparison."""
477
+ import sys
478
+
479
+ repo_path = Path(__file__).parent
480
+ print(f"Loading miner_v2 from: {repo_path}")
481
+ miner = Miner(path_hf_repo=repo_path)
482
+ print(repr(miner))
483
+
484
+ batch_images: list[np.ndarray] = []
485
+
486
+ if len(sys.argv) > 1:
487
+ for image_path in sys.argv[1:]:
488
+ image = cv2.imread(image_path)
489
+ if image is None:
490
+ raise ValueError(f"Cannot read image: {image_path}")
491
+ batch_images.append(image)
492
+ print(f"Loaded {len(batch_images)} image(s)")
493
+ else:
494
+ batch_images = [np.zeros((640, 640, 3), dtype=np.uint8)]
495
+ print("No image provided — running on a single blank dummy frame")
496
+
497
+ results = miner.predict_batch(
498
+ batch_images=batch_images,
499
+ offset=0,
500
+ n_keypoints=32,
501
+ )
502
+
503
+ output_dir = repo_path / "predictions_v2"
504
+ output_dir.mkdir(exist_ok=True)
505
+
506
+ class_names = {i: n for i, n in enumerate(miner.class_names)}
507
+
508
+ def color_for_class(cls_id: int) -> tuple[int, int, int]:
509
+ hue = (cls_id * 47) % 180
510
+ hsv = np.uint8([[[hue, 220, 255]]])
511
+ bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)[0, 0]
512
+ return int(bgr[0]), int(bgr[1]), int(bgr[2])
513
+
514
+ for image, r in zip(batch_images, results):
515
+ print(
516
+ f"frame={r.frame_id} "
517
+ f"boxes={len(r.boxes)} "
518
+ f"keypoints={len(r.keypoints)}"
519
+ )
520
+
521
+ vis = image.copy()
522
+ for box in r.boxes:
523
+ name = class_names.get(box.cls_id, str(box.cls_id))
524
+ color = color_for_class(box.cls_id)
525
+ print(
526
+ f" box cls={box.cls_id}({name}) conf={box.conf:.2f} "
527
+ f"[{box.x1},{box.y1},{box.x2},{box.y2}]"
528
+ )
529
+ cv2.rectangle(vis, (box.x1, box.y1), (box.x2, box.y2), color, 2)
530
+ label = f"{name} {box.conf:.2f}"
531
+ (tw, th), baseline = cv2.getTextSize(
532
+ label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1
533
+ )
534
+ top = max(box.y1 - th - baseline, 0)
535
  cv2.rectangle(
536
+ vis, (box.x1, top), (box.x1 + tw, top + th + baseline), color, -1
 
 
 
 
537
  )
 
538
  cv2.putText(
539
+ vis, label, (box.x1, top + th),
540
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1, cv2.LINE_AA,
 
 
 
 
 
 
541
  )
 
 
 
 
 
542
 
543
+ for x, y in r.keypoints:
544
+ if x == 0 and y == 0:
545
+ continue
546
+ cv2.circle(vis, (x, y), 3, (0, 0, 255), -1)
547
+
548
+ out_path = output_dir / f"frame_{r.frame_id:04d}.jpg"
549
+ cv2.imwrite(str(out_path), vis)
550
+ print(f" saved: {out_path}")
551
+
552
+
553
+ if __name__ == "__main__":
554
+ main()
weights.onnx → petrol.onnx RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcb98181cc317a8a83ffc13a390cc9b1ba37ef27c6cb55c05eade472ebdc0df2
3
- size 10264218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb8ff9dbe935b06f64e6049b0604c2c871386b633b36ef9d320d0e02e5f35c36
3
+ size 22664875