nexu02
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

nexu02 commited on 10 days ago

Commit

f94d217

verified ·

1 Parent(s): 01e6c47

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +160 -0

miner.py ADDED Viewed

	@@ -0,0 +1,160 @@

+"""miner.py — uploaded to artur7236/turbovision-beverage HF repo.
+Fine-tuned YOLO11s on SAM3-labelled beverage data + horizontal-flip TTA at
+1280×1280 + confidence threshold 0.55. Local benchmark mean composite = 0.709
+vs UID 213's 0.667.
+Required by the chute template:
+  - class Miner with __init__(self, path_hf_repo: Path)
+  - predict_batch(batch_images, offset, n_keypoints) -> list[TVFrameResult]
+  - BoundingBox + TVFrameResult pydantic models defined in this file
+  - No imports from anywhere except stdlib + site-packages (no helper modules
+    from the HF repo).
+"""
+from pathlib import Path
+import numpy as np
+from numpy import ndarray
+from pydantic import BaseModel
+from ultralytics import YOLO
+# Class index follows the manifest objects order for Detect-beverage-detect:
+# [cup, bottle, can] → cls_id 0, 1, 2.
+CLASS_NAMES = ["cup", "bottle", "can"]
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+def _iou(a: BoundingBox, b: BoundingBox) -> float:
+    """Standard IoU between two axis-aligned boxes."""
+    x1 = max(a.x1, b.x1)
+    y1 = max(a.y1, b.y1)
+    x2 = min(a.x2, b.x2)
+    y2 = min(a.y2, b.y2)
+    if x2 <= x1 or y2 <= y1:
+        return 0.0
+    inter = (x2 - x1) * (y2 - y1)
+    area_a = max(0, a.x2 - a.x1) * max(0, a.y2 - a.y1)
+    area_b = max(0, b.x2 - b.x1) * max(0, b.y2 - b.y1)
+    union = area_a + area_b - inter
+    return inter / union if union > 0 else 0.0
+def _cross_class_nms(boxes: list[BoundingBox], iou_thresh: float = 0.6) -> list[BoundingBox]:
+    """Suppress cross-class overlapping boxes (keep highest-conf when IoU≥thresh).
+    Ultralytics' default NMS only dedupes WITHIN a class. The SN44 validator
+    counts cross-class overlapping boxes as false positives (only one class
+    can be right per object), so we need this extra pass. Mirrors UID 213's
+    _cross_class_dedup_op (IoU 0.7); we use 0.6 to be slightly more aggressive.
+    """
+    if len(boxes) <= 1:
+        return boxes
+    sorted_boxes = sorted(boxes, key=lambda b: -b.conf)
+    kept: list[BoundingBox] = []
+    for b in sorted_boxes:
+        suppressed = False
+        for k in kept:
+            if _iou(b, k) >= iou_thresh:
+                suppressed = True
+                break
+        if not suppressed:
+            kept.append(b)
+    return kept
+class Miner:
+    """Fine-tuned YOLO11s with hflip TTA (Round 5, 607 validator-distribution imgs, 19 MB).
+    Tuning rationale (all on SAM3 ground-truth, 361 val images):
+      - imgsz=1280: bumps small-object recall on the typical 1408×768 frames
+        the validator sends (cans, distant cups).
+      - conf=0.55: sweet spot for the validator's pillar weighting
+        (0.6·mAP50 + 0.4·(1−ffpi/10)). Below 0.45 the false-positive pillar
+        crashes; above 0.65 recall drops faster than precision rises.
+      - augment=True: ultralytics enables hflip TTA, our biggest single
+        composite gain in offline sweep.
+      - iou=0.45: standard YOLO NMS IoU; cross-class dedup happens implicitly.
+    """
+    IMAGE_SIZE = 1280
+    CONF_THRESH = 0.50  # Round 4: 0.50 narrowly beat 0.45 (0.6982 vs 0.6980)
+    IOU_THRESH = 0.45
+    USE_TTA = True
+    CROSS_CLASS_IOU = 0.6  # Round 6: post-NMS cross-class dedup
+                           # Block 8287800 had 18 boxes including same-pixel cup+bottle pairs.
+    def __init__(self, path_hf_repo: Path) -> None:
+        weights_path = path_hf_repo / "best.pt"
+        if not weights_path.exists():
+            raise FileNotFoundError(f"missing weights at {weights_path}")
+        self.model = YOLO(str(weights_path))
+        # Touch the model once so cold-start latency hits the warmup, not the
+        # first validator call.
+        dummy = np.zeros((640, 640, 3), dtype=np.uint8)
+        _ = self.model.predict(dummy, imgsz=self.IMAGE_SIZE, conf=self.CONF_THRESH,
+                               iou=self.IOU_THRESH, augment=self.USE_TTA, verbose=False)
+        print(f"✅ YOLO11s loaded from {weights_path}")
+    def __repr__(self) -> str:
+        return (f"YOLO11s_ft(imgsz={self.IMAGE_SIZE}, "
+                f"conf={self.CONF_THRESH}, iou={self.IOU_THRESH}, "
+                f"tta={self.USE_TTA})")
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        # Run inference. Ultralytics will accept a list[ndarray]; with augment=True
+        # it does the flip+merge internally per image.
+        results = self.model.predict(
+            batch_images,
+            imgsz=self.IMAGE_SIZE,
+            conf=self.CONF_THRESH,
+            iou=self.IOU_THRESH,
+            augment=self.USE_TTA,
+            verbose=False,
+        )
+        out: list[TVFrameResult] = []
+        # n_keypoints is irrelevant for detection elements; return zero-padded.
+        kp_zeros = [(0, 0) for _ in range(max(0, int(n_keypoints)))]
+        for i, r in enumerate(results):
+            frame_id = offset + i
+            boxes: list[BoundingBox] = []
+            if r.boxes is not None and r.boxes.data is not None:
+                for box in r.boxes.data.cpu().numpy():
+                    x1, y1, x2, y2, conf, cls_id = box.tolist()
+                    cls_id_int = int(cls_id)
+                    if cls_id_int < 0 or cls_id_int >= len(CLASS_NAMES):
+                        continue
+                    # ensure non-degenerate after rounding
+                    xi1, yi1, xi2, yi2 = int(x1), int(y1), int(x2), int(y2)
+                    if xi2 <= xi1 or yi2 <= yi1:
+                        continue
+                    boxes.append(BoundingBox(
+                        x1=xi1, y1=yi1, x2=xi2, y2=yi2,
+                        cls_id=cls_id_int, conf=float(conf),
+                    ))
+            # Cross-class dedup (Ultralytics NMS is per-class only)
+            boxes = _cross_class_nms(boxes, iou_thresh=self.CROSS_CLASS_IOU)
+            out.append(TVFrameResult(frame_id=frame_id, boxes=boxes, keypoints=kp_zeros))
+        return out