Ichiro1007
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

Ichiro1007 commited on Apr 30

Commit

1c90073

verified ·

1 Parent(s): 49e996b

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +297 -0

miner.py ADDED Viewed

	@@ -0,0 +1,297 @@

+"""
+Production Miner for YOLOv9s 4-Class Beverage Detection
+TurboVision Subnet 44 - Bittensor
+This miner implements the required interface for TurboVision validators.
+Model: YOLOv9s trained for 100 epochs on 4,840 images
+Classes: bottle, wine_glass, cup, can
+Performance: 89.59% mAP50, 100% can detection
+"""
+from pathlib import Path
+from typing import Optional
+import cv2
+import numpy as np
+import onnxruntime as ort
+from pydantic import BaseModel
+class BoundingBox(BaseModel):
+    """Bounding box with class and confidence."""
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    """Result for a single frame."""
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]  # Empty for detection tasks
+class Miner:
+    """
+    YOLOv9s 4-Class Beverage Detection Miner
+    Optimized for TurboVision beverage detection competition.
+    Achieves 89.59% mAP50 validation accuracy with 100% can detection.
+    """
+    def __init__(self, path_hf_repo: Path) -> None:
+        """
+        Initialize the miner with model from Hugging Face repo.
+        Args:
+            path_hf_repo: Path to the Hugging Face repository containing weights.onnx
+        """
+        self.path_hf_repo = path_hf_repo
+        self.class_names = ['bottle', 'wine_glass', 'cup', 'can']
+        self.num_classes = len(self.class_names)
+        # Model input size
+        self.input_size = 640
+        # Initialize ONNX session with optimizations
+        sess_options = ort.SessionOptions()
+        sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        sess_options.intra_op_num_threads = 4
+        sess_options.inter_op_num_threads = 4
+        # Load model
+        model_path = path_hf_repo / "weights.onnx"
+        self.session = ort.InferenceSession(
+            str(model_path),
+            sess_options=sess_options,
+            providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+        )
+        self.input_name = self.session.get_inputs()[0].name
+        self.output_names = [output.name for output in self.session.get_outputs()]
+        # Detection thresholds
+        self.conf_threshold = 0.25  # Confidence threshold
+        self.iou_threshold = 0.45   # NMS IoU threshold
+        print(f"✓ YOLOv9s model loaded from {model_path}")
+        print(f"✓ Input: {self.input_name}, Outputs: {self.output_names}")
+        print(f"✓ Classes: {self.class_names}")
+    def __repr__(self) -> str:
+        return (
+            f"YOLOv9s 4-Class Beverage Miner\n"
+            f"Model: {self.path_hf_repo / 'weights.onnx'}\n"
+            f"Classes: {self.class_names}\n"
+            f"Performance: 89.59% mAP50\n"
+        )
+    def preprocess(self, image: np.ndarray) -> np.ndarray:
+        """
+        Preprocess image for YOLO model.
+        Args:
+            image: BGR image (H, W, 3)
+        Returns:
+            Preprocessed tensor (1, 3, 640, 640)
+        """
+        # Resize to 640x640
+        img_resized = cv2.resize(image, (self.input_size, self.input_size))
+        # Convert BGR to RGB
+        img_rgb = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)
+        # Normalize to [0, 1]
+        img_normalized = img_rgb.astype(np.float32) / 255.0
+        # Transpose to CHW format
+        img_transposed = np.transpose(img_normalized, (2, 0, 1))
+        # Add batch dimension
+        img_batch = np.expand_dims(img_transposed, axis=0)
+        return img_batch
+    def postprocess(
+        self,
+        outputs: list[np.ndarray],
+        orig_shape: tuple[int, int]
+    ) -> list[BoundingBox]:
+        """
+        Post-process YOLO outputs to extract bounding boxes.
+        Args:
+            outputs: Raw YOLO outputs
+            orig_shape: Original image shape (height, width)
+        Returns:
+            List of detected bounding boxes
+        """
+        predictions = outputs[0]  # Shape: (1, N, 4+num_classes)
+        predictions = predictions[0]  # Remove batch dimension: (N, 4+num_classes)
+        # Extract boxes and scores
+        boxes = predictions[:, :4]  # (N, 4) - x_center, y_center, width, height
+        scores = predictions[:, 4:]  # (N, num_classes)
+        # Get max class score and index for each detection
+        class_ids = np.argmax(scores, axis=1)  # (N,)
+        confidences = np.max(scores, axis=1)  # (N,)
+        # Filter by confidence threshold
+        mask = confidences > self.conf_threshold
+        boxes = boxes[mask]
+        class_ids = class_ids[mask]
+        confidences = confidences[mask]
+        if len(boxes) == 0:
+            return []
+        # Convert from xywh to xyxy format
+        boxes_xyxy = np.zeros_like(boxes)
+        boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2  # x1
+        boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2  # y1
+        boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2  # x2
+        boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2  # y2
+        # Scale boxes to original image size
+        scale_x = orig_shape[1] / self.input_size
+        scale_y = orig_shape[0] / self.input_size
+        boxes_xyxy[:, [0, 2]] *= scale_x
+        boxes_xyxy[:, [1, 3]] *= scale_y
+        # Apply NMS
+        indices = self.nms(boxes_xyxy, confidences, self.iou_threshold)
+        # Create BoundingBox objects
+        detections = []
+        for idx in indices:
+            box = boxes_xyxy[idx]
+            detections.append(BoundingBox(
+                x1=int(box[0]),
+                y1=int(box[1]),
+                x2=int(box[2]),
+                y2=int(box[3]),
+                cls_id=int(class_ids[idx]),
+                conf=float(confidences[idx])
+            ))
+        return detections
+    def nms(
+        self,
+        boxes: np.ndarray,
+        scores: np.ndarray,
+        iou_threshold: float
+    ) -> list[int]:
+        """
+        Non-Maximum Suppression.
+        Args:
+            boxes: Bounding boxes in xyxy format (N, 4)
+            scores: Confidence scores (N,)
+            iou_threshold: IoU threshold for NMS
+        Returns:
+            Indices of boxes to keep
+        """
+        # Sort by confidence (descending)
+        indices = np.argsort(scores)[::-1]
+        keep = []
+        while len(indices) > 0:
+            # Pick the box with highest confidence
+            current = indices[0]
+            keep.append(current)
+            if len(indices) == 1:
+                break
+            # Compute IoU with remaining boxes
+            current_box = boxes[current]
+            other_boxes = boxes[indices[1:]]
+            ious = self.compute_iou(current_box, other_boxes)
+            # Keep boxes with IoU below threshold
+            mask = ious < iou_threshold
+            indices = indices[1:][mask]
+        return keep
+    def compute_iou(
+        self,
+        box: np.ndarray,
+        boxes: np.ndarray
+    ) -> np.ndarray:
+        """
+        Compute IoU between one box and multiple boxes.
+        Args:
+            box: Single box (4,)
+            boxes: Multiple boxes (N, 4)
+        Returns:
+            IoU values (N,)
+        """
+        # Compute intersection
+        x1 = np.maximum(box[0], boxes[:, 0])
+        y1 = np.maximum(box[1], boxes[:, 1])
+        x2 = np.minimum(box[2], boxes[:, 2])
+        y2 = np.minimum(box[3], boxes[:, 3])
+        intersection = np.maximum(0, x2 - x1) * np.maximum(0, y2 - y1)
+        # Compute union
+        box_area = (box[2] - box[0]) * (box[3] - box[1])
+        boxes_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
+        union = box_area + boxes_area - intersection
+        # Compute IoU
+        iou = intersection / (union + 1e-6)
+        return iou
+    def __call__(
+        self,
+        images: list[np.ndarray],
+        frame_ids: Optional[list[int]] = None,
+    ) -> list[TVFrameResult]:
+        """
+        Run detection on a batch of images.
+        Args:
+            images: List of BGR images
+            frame_ids: Optional frame IDs
+        Returns:
+            List of detection results
+        """
+        if frame_ids is None:
+            frame_ids = list(range(len(images)))
+        results = []
+        for image, frame_id in zip(images, frame_ids):
+            # Preprocess
+            input_tensor = self.preprocess(image)
+            # Run inference
+            outputs = self.session.run(
+                self.output_names,
+                {self.input_name: input_tensor}
+            )
+            # Post-process
+            boxes = self.postprocess(outputs, image.shape[:2])
+            # Create result
+            result = TVFrameResult(
+                frame_id=frame_id,
+                boxes=boxes,
+                keypoints=[]  # Empty for detection tasks
+            )
+            results.append(result)
+        return results