nexu02 commited on
Commit
4c789ca
·
verified ·
1 Parent(s): 7feee4c

R18 miner.py

Browse files
Files changed (1) hide show
  1. miner.py +30 -37
miner.py CHANGED
@@ -1,34 +1,36 @@
1
- """miner.py — uploaded to nexu02/ScoreVision HF repo (R17 public).
2
 
3
- Round 17 (R17): YOLO11s retrained at imgsz=1280 with class-aware augmentation
4
- to fix cup/bottle/can class confusion that R16 exhibited at CCTV scale.
 
 
 
5
 
6
  Training (RTX PRO 6000 Blackwell, 120 epochs, batch=32, cos_lr, AdamW):
7
- - imgsz=1280 (was 640) finer features at validator scale
8
- - copy_paste=0.40 (was 0.00) → break class–context association
9
- - mixup=0.20 (was 0.05) → softer decision boundaries
10
- - label_smoothing=0.10 → prevent over-confident wrong-class predictions
11
- - cls loss weight 0.8 (was 0.5) → push classification harder
12
- - lr0=0.005 (was 0.01) → gentler steps at higher resolution
13
- - close_mosaic=20 (was 15)
14
-
15
- Val results:
16
- - mAP50 = 0.928 (R16 0.876)
17
- - mAP50-95 = 0.764 (R16 0.636)
18
- - per-class P: cup 0.885, bottle 0.914, can 0.902 (uniform — bias removed)
19
-
20
- Inference (kept identical to R16 chute):
 
21
  - imgsz=1280, conf=0.50, iou=0.45, augment=True (hflip TTA)
22
- - cross-class NMS at IoU 0.6 (validator counts cross-class overlaps as FP)
23
  """
24
  from pathlib import Path
25
-
26
  import numpy as np
27
  from numpy import ndarray
28
  from pydantic import BaseModel
29
  from ultralytics import YOLO
30
 
31
-
32
  CLASS_NAMES = ["cup", "bottle", "can"]
33
 
34
 
@@ -50,8 +52,7 @@ class TVFrameResult(BaseModel):
50
  def _iou(a: BoundingBox, b: BoundingBox) -> float:
51
  x1 = max(a.x1, b.x1); y1 = max(a.y1, b.y1)
52
  x2 = min(a.x2, b.x2); y2 = min(a.y2, b.y2)
53
- if x2 <= x1 or y2 <= y1:
54
- return 0.0
55
  inter = (x2 - x1) * (y2 - y1)
56
  area_a = max(0, a.x2 - a.x1) * max(0, a.y2 - a.y1)
57
  area_b = max(0, b.x2 - b.x1) * max(0, b.y2 - b.y1)
@@ -60,9 +61,7 @@ def _iou(a: BoundingBox, b: BoundingBox) -> float:
60
 
61
 
62
  def _cross_class_nms(boxes: list[BoundingBox], iou_thresh: float = 0.6) -> list[BoundingBox]:
63
- """Suppress cross-class overlapping boxes; keep highest-conf when IoU≥thresh."""
64
- if len(boxes) <= 1:
65
- return boxes
66
  sorted_boxes = sorted(boxes, key=lambda b: -b.conf)
67
  kept: list[BoundingBox] = []
68
  for b in sorted_boxes:
@@ -87,22 +86,18 @@ class Miner:
87
  dummy = np.zeros((640, 640, 3), dtype=np.uint8)
88
  _ = self.model.predict(dummy, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
89
  iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False)
90
- print(f"✅ YOLO11s R17 loaded from {weights_path}")
91
 
92
  def __repr__(self) -> str:
93
- return (f"YOLO11s_R17(imgsz={self.IMAGE_SIZE}, "
94
  f"conf={self.CONF_THRESH}, iou={self.IOU_THRESH}, "
95
  f"tta={self.USE_TTA})")
96
 
97
  def predict_batch(self, batch_images: list[ndarray], offset: int,
98
  n_keypoints: int) -> list[TVFrameResult]:
99
  results = self.model.predict(
100
- batch_images,
101
- imgsz=self.IMAGE_SIZE,
102
- conf=self.CONF_THRESH,
103
- iou=self.IOU_THRESH,
104
- augment=self.USE_TTA,
105
- verbose=False,
106
  )
107
  out: list[TVFrameResult] = []
108
  kp_zeros = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
@@ -113,11 +108,9 @@ class Miner:
113
  for box in r.boxes.data.cpu().numpy():
114
  x1, y1, x2, y2, conf, cls_id = box.tolist()
115
  cls_id_int = int(cls_id)
116
- if cls_id_int < 0 or cls_id_int >= len(CLASS_NAMES):
117
- continue
118
  xi1, yi1, xi2, yi2 = int(x1), int(y1), int(x2), int(y2)
119
- if xi2 <= xi1 or yi2 <= yi1:
120
- continue
121
  boxes.append(BoundingBox(
122
  x1=xi1, y1=yi1, x2=xi2, y2=yi2,
123
  cls_id=cls_id_int, conf=float(conf),
 
1
+ """miner.py — uploaded to nexu02/ScoreVision HF repo (R18 public).
2
 
3
+ Round 18 (R18): YOLO11s retrained on dataset_v12 = 529 manual + 124 pseudo-labeled
4
+ frames from the validator's own challenge pool. Pseudo-labels generated by
5
+ YOLO11x teacher (mAP50 0.946) with multi-scale TTA + WBF + per-class threshold gates
6
+ (cup 0.60, bottle 0.65, can 0.65). Goal: lift recall on the validator's specific
7
+ CCTV distribution while keeping R17's class-discrimination gains.
8
 
9
  Training (RTX PRO 6000 Blackwell, 120 epochs, batch=32, cos_lr, AdamW):
10
+ - dataset_v12 (587 manual + 124 pseudo-labeled = 711 train + 58 val)
11
+ - same R17 recipe: 1280 imgsz, label_smoothing=0.1, copy_paste=0.4, mixup=0.2
12
+ - cls loss weight 0.8
13
+
14
+ Val results vs R17:
15
+ - mAP50 = 0.932 (R17 0.928, +0.004)
16
+ - mAP50-95 = 0.776 (R17 0.764, +0.012)
17
+ - per-class P: cup 0.890, bottle 0.921, can 0.899
18
+
19
+ Local F1 on 3 windows (vs bird ref): R17 0.784 R18 0.836 (+0.052)
20
+ - 8337900: 0.833 0.833 (no change)
21
+ - 8338200: 0.818 0.857 (+0.039)
22
+ - 8338500: 0.700 → 0.818 (+0.118) ← hardest window, biggest gain
23
+
24
+ Inference (unchanged from R17 chute):
25
  - imgsz=1280, conf=0.50, iou=0.45, augment=True (hflip TTA)
26
+ - cross-class NMS at IoU 0.6
27
  """
28
  from pathlib import Path
 
29
  import numpy as np
30
  from numpy import ndarray
31
  from pydantic import BaseModel
32
  from ultralytics import YOLO
33
 
 
34
  CLASS_NAMES = ["cup", "bottle", "can"]
35
 
36
 
 
52
  def _iou(a: BoundingBox, b: BoundingBox) -> float:
53
  x1 = max(a.x1, b.x1); y1 = max(a.y1, b.y1)
54
  x2 = min(a.x2, b.x2); y2 = min(a.y2, b.y2)
55
+ if x2 <= x1 or y2 <= y1: return 0.0
 
56
  inter = (x2 - x1) * (y2 - y1)
57
  area_a = max(0, a.x2 - a.x1) * max(0, a.y2 - a.y1)
58
  area_b = max(0, b.x2 - b.x1) * max(0, b.y2 - b.y1)
 
61
 
62
 
63
  def _cross_class_nms(boxes: list[BoundingBox], iou_thresh: float = 0.6) -> list[BoundingBox]:
64
+ if len(boxes) <= 1: return boxes
 
 
65
  sorted_boxes = sorted(boxes, key=lambda b: -b.conf)
66
  kept: list[BoundingBox] = []
67
  for b in sorted_boxes:
 
86
  dummy = np.zeros((640, 640, 3), dtype=np.uint8)
87
  _ = self.model.predict(dummy, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
88
  iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False)
89
+ print(f"✅ YOLO11s R18 loaded from {weights_path}")
90
 
91
  def __repr__(self) -> str:
92
+ return (f"YOLO11s_R18(imgsz={self.IMAGE_SIZE}, "
93
  f"conf={self.CONF_THRESH}, iou={self.IOU_THRESH}, "
94
  f"tta={self.USE_TTA})")
95
 
96
  def predict_batch(self, batch_images: list[ndarray], offset: int,
97
  n_keypoints: int) -> list[TVFrameResult]:
98
  results = self.model.predict(
99
+ batch_images, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
100
+ iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False,
 
 
 
 
101
  )
102
  out: list[TVFrameResult] = []
103
  kp_zeros = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
 
108
  for box in r.boxes.data.cpu().numpy():
109
  x1, y1, x2, y2, conf, cls_id = box.tolist()
110
  cls_id_int = int(cls_id)
111
+ if cls_id_int < 0 or cls_id_int >= len(CLASS_NAMES): continue
 
112
  xi1, yi1, xi2, yi2 = int(x1), int(y1), int(x2), int(y2)
113
+ if xi2 <= xi1 or yi2 <= yi1: continue
 
114
  boxes.append(BoundingBox(
115
  x1=xi1, y1=yi1, x2=xi2, y2=yi2,
116
  cls_id=cls_id_int, conf=float(conf),