nexu02 commited on
Commit
375e376
·
verified ·
1 Parent(s): 807bfe0

R17 miner.py docstring + logging

Browse files
Files changed (1) hide show
  1. miner.py +30 -49
miner.py CHANGED
@@ -1,25 +1,25 @@
1
- """miner.py — uploaded to nexu02/ScoreVision HF repo (R16 public).
2
-
3
- Round 16 (R16): YOLO11s fine-tuned on dataset_v11 = batches 1+2 manually-reviewed
4
- + batch 3 raw (587 SAM3-anchored images, validator-distribution).
5
-
6
- Training (on RTX PRO 6000 Blackwell, 150 epochs, AdamW auto-lr, batch=32):
7
- - val mAP50 = 0.878 (cup 0.890 / bottle 0.855 / can 0.890)
8
- - val mAP50-95 = 0.644
9
- - precision 0.886 (cup 0.856 / bottle 0.915 / can 0.887)
10
- - recall 0.786
11
- - close_mosaic=15 final epochs for sharper boxes
12
- - patience=30 (didn't trigger; ran full 150)
13
-
14
- Inference (kept identical to R5/R6 to preserve calibration):
 
 
 
 
 
 
15
  - imgsz=1280, conf=0.50, iou=0.45, augment=True (hflip TTA)
16
  - cross-class NMS at IoU 0.6 (validator counts cross-class overlaps as FP)
17
-
18
- Required by the chute template:
19
- - class Miner with __init__(self, path_hf_repo: Path)
20
- - predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]
21
- - BoundingBox + TVFrameResult pydantic models defined in this file
22
- - No imports from anywhere except stdlib + site-packages
23
  """
24
  from pathlib import Path
25
 
@@ -29,8 +29,6 @@ from pydantic import BaseModel
29
  from ultralytics import YOLO
30
 
31
 
32
- # Class index follows the manifest objects order for Detect-beverage-detect:
33
- # [cup, bottle, can] → cls_id 0, 1, 2.
34
  CLASS_NAMES = ["cup", "bottle", "can"]
35
 
36
 
@@ -50,10 +48,8 @@ class TVFrameResult(BaseModel):
50
 
51
 
52
  def _iou(a: BoundingBox, b: BoundingBox) -> float:
53
- x1 = max(a.x1, b.x1)
54
- y1 = max(a.y1, b.y1)
55
- x2 = min(a.x2, b.x2)
56
- y2 = min(a.y2, b.y2)
57
  if x2 <= x1 or y2 <= y1:
58
  return 0.0
59
  inter = (x2 - x1) * (y2 - y1)
@@ -64,23 +60,15 @@ def _iou(a: BoundingBox, b: BoundingBox) -> float:
64
 
65
 
66
  def _cross_class_nms(boxes: list[BoundingBox], iou_thresh: float = 0.6) -> list[BoundingBox]:
67
- """Suppress cross-class overlapping boxes; keep highest-conf when IoU≥thresh.
68
-
69
- Ultralytics' default NMS only dedupes WITHIN a class. SN44 counts cross-class
70
- overlap as a false positive (only one class can be right per object).
71
- """
72
  if len(boxes) <= 1:
73
  return boxes
74
  sorted_boxes = sorted(boxes, key=lambda b: -b.conf)
75
  kept: list[BoundingBox] = []
76
  for b in sorted_boxes:
77
- suppressed = False
78
- for k in kept:
79
- if _iou(b, k) >= iou_thresh:
80
- suppressed = True
81
- break
82
- if not suppressed:
83
- kept.append(b)
84
  return kept
85
 
86
 
@@ -99,19 +87,15 @@ class Miner:
99
  dummy = np.zeros((640, 640, 3), dtype=np.uint8)
100
  _ = self.model.predict(dummy, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
101
  iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False)
102
- print(f"✅ YOLO11s R16 loaded from {weights_path}")
103
 
104
  def __repr__(self) -> str:
105
- return (f"YOLO11s_R16(imgsz={self.IMAGE_SIZE}, "
106
  f"conf={self.CONF_THRESH}, iou={self.IOU_THRESH}, "
107
  f"tta={self.USE_TTA})")
108
 
109
- def predict_batch(
110
- self,
111
- batch_images: list[ndarray],
112
- offset: int,
113
- n_keypoints: int,
114
- ) -> list[TVFrameResult]:
115
  results = self.model.predict(
116
  batch_images,
117
  imgsz=self.IMAGE_SIZE,
@@ -120,10 +104,8 @@ class Miner:
120
  augment=self.USE_TTA,
121
  verbose=False,
122
  )
123
-
124
  out: list[TVFrameResult] = []
125
  kp_zeros = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
126
-
127
  for i, r in enumerate(results):
128
  frame_id = offset + i
129
  boxes: list[BoundingBox] = []
@@ -142,5 +124,4 @@ class Miner:
142
  ))
143
  boxes = _cross_class_nms(boxes, iou_thresh=self.CROSS_CLASS_IOU)
144
  out.append(TVFrameResult(frame_id=frame_id, boxes=boxes, keypoints=kp_zeros))
145
-
146
  return out
 
1
+ """miner.py — uploaded to nexu02/ScoreVision HF repo (R17 public).
2
+
3
+ Round 17 (R17): YOLO11s retrained at imgsz=1280 with class-aware augmentation
4
+ to fix cup/bottle/can class confusion that R16 exhibited at CCTV scale.
5
+
6
+ Training (RTX PRO 6000 Blackwell, 120 epochs, batch=32, cos_lr, AdamW):
7
+ - imgsz=1280 (was 640) finer features at validator scale
8
+ - copy_paste=0.40 (was 0.00) → break class–context association
9
+ - mixup=0.20 (was 0.05) softer decision boundaries
10
+ - label_smoothing=0.10 → prevent over-confident wrong-class predictions
11
+ - cls loss weight 0.8 (was 0.5) → push classification harder
12
+ - lr0=0.005 (was 0.01) gentler steps at higher resolution
13
+ - close_mosaic=20 (was 15)
14
+
15
+ Val results:
16
+ - mAP50 = 0.928 (R16 0.876)
17
+ - mAP50-95 = 0.764 (R16 0.636)
18
+ - per-class P: cup 0.885, bottle 0.914, can 0.902 (uniform — bias removed)
19
+
20
+ Inference (kept identical to R16 chute):
21
  - imgsz=1280, conf=0.50, iou=0.45, augment=True (hflip TTA)
22
  - cross-class NMS at IoU 0.6 (validator counts cross-class overlaps as FP)
 
 
 
 
 
 
23
  """
24
  from pathlib import Path
25
 
 
29
  from ultralytics import YOLO
30
 
31
 
 
 
32
  CLASS_NAMES = ["cup", "bottle", "can"]
33
 
34
 
 
48
 
49
 
50
  def _iou(a: BoundingBox, b: BoundingBox) -> float:
51
+ x1 = max(a.x1, b.x1); y1 = max(a.y1, b.y1)
52
+ x2 = min(a.x2, b.x2); y2 = min(a.y2, b.y2)
 
 
53
  if x2 <= x1 or y2 <= y1:
54
  return 0.0
55
  inter = (x2 - x1) * (y2 - y1)
 
60
 
61
 
62
  def _cross_class_nms(boxes: list[BoundingBox], iou_thresh: float = 0.6) -> list[BoundingBox]:
63
+ """Suppress cross-class overlapping boxes; keep highest-conf when IoU≥thresh."""
 
 
 
 
64
  if len(boxes) <= 1:
65
  return boxes
66
  sorted_boxes = sorted(boxes, key=lambda b: -b.conf)
67
  kept: list[BoundingBox] = []
68
  for b in sorted_boxes:
69
+ if any(_iou(b, k) >= iou_thresh for k in kept):
70
+ continue
71
+ kept.append(b)
 
 
 
 
72
  return kept
73
 
74
 
 
87
  dummy = np.zeros((640, 640, 3), dtype=np.uint8)
88
  _ = self.model.predict(dummy, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
89
  iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False)
90
+ print(f"✅ YOLO11s R17 loaded from {weights_path}")
91
 
92
  def __repr__(self) -> str:
93
+ return (f"YOLO11s_R17(imgsz={self.IMAGE_SIZE}, "
94
  f"conf={self.CONF_THRESH}, iou={self.IOU_THRESH}, "
95
  f"tta={self.USE_TTA})")
96
 
97
+ def predict_batch(self, batch_images: list[ndarray], offset: int,
98
+ n_keypoints: int) -> list[TVFrameResult]:
 
 
 
 
99
  results = self.model.predict(
100
  batch_images,
101
  imgsz=self.IMAGE_SIZE,
 
104
  augment=self.USE_TTA,
105
  verbose=False,
106
  )
 
107
  out: list[TVFrameResult] = []
108
  kp_zeros = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
 
109
  for i, r in enumerate(results):
110
  frame_id = offset + i
111
  boxes: list[BoundingBox] = []
 
124
  ))
125
  boxes = _cross_class_nms(boxes, iou_thresh=self.CROSS_CLASS_IOU)
126
  out.append(TVFrameResult(frame_id=frame_id, boxes=boxes, keypoints=kp_zeros))
 
127
  return out