Gege24
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

Gege24 commited on Dec 19, 2025

Commit

c46342c

verified ·

1 Parent(s): fc9d334

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +126 -0

miner.py ADDED Viewed

	@@ -0,0 +1,126 @@

+from pathlib import Path
+from ultralytics import YOLO
+from numpy import ndarray
+from pydantic import BaseModel
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+class Miner:
+    """
+    This class is responsible for:
+    - Loading ML models.
+    - Running batched predictions on images.
+    - Parsing ML model outputs into structured results (TVFrameResult).
+    MODIFIED FOR TESTING: Uses standard yolov8n.pt and yolov8n-pose.pt
+    """
+    def __init__(self, path_hf_repo: Path) -> None:
+        """
+        Loads all ML models.
+        """
+        # Using standard YOLOv8 nano models that will be automatically downloaded
+        # if not present. This avoids the need for custom .pt files for testing.
+        self.bbox_model = YOLO("yolov8n.pt")
+        print(f"✅ BBox Model Loaded (yolov8n)")
+        self.keypoints_model = YOLO("yolov8n-pose.pt")
+        print(f"✅ Keypoints Model Loaded (yolov8n-pose)")
+    def __repr__(self) -> str:
+        return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        """
+        Miner prediction for a batch of images.
+        """
+        bboxes: dict[int, list[BoundingBox]] = {}
+        # Run BBox prediction
+        bbox_model_results = self.bbox_model.predict(batch_images, verbose=False)
+        if bbox_model_results is not None:
+            for frame_number_in_batch, detection in enumerate(bbox_model_results):
+                if not hasattr(detection, "boxes") or detection.boxes is None:
+                    continue
+                boxes = []
+                for box in detection.boxes.data:
+                    # YOLOv8 standard output: x1, y1, x2, y2, conf, cls
+                    x1, y1, x2, y2, conf, cls_id = box.tolist()
+                    boxes.append(
+                        BoundingBox(
+                            x1=int(x1),
+                            y1=int(y1),
+                            x2=int(x2),
+                            y2=int(y2),
+                            cls_id=int(cls_id),
+                            conf=float(conf),
+                        )
+                    )
+                bboxes[offset + frame_number_in_batch] = boxes
+        print("✅ BBoxes predicted")
+        keypoints: dict[int, tuple[int, int]] = {}
+        # Run Pose/Keypoints prediction
+        keypoints_model_results = self.keypoints_model.predict(batch_images, verbose=False)
+        if keypoints_model_results is not None:
+            for frame_number_in_batch, detection in enumerate(keypoints_model_results):
+                if not hasattr(detection, "keypoints") or detection.keypoints is None:
+                    continue
+                frame_keypoints: list[tuple[int, int]] = []
+                # Check if keypoints data exists and has the expected shape/content
+                if detection.keypoints.data is not None and len(detection.keypoints.data) > 0:
+                     # Taking the first person detected for keypoints (simplification for testing)
+                     # YOLO pose output is typically [num_people, num_kpts, 3] (x, y, conf)
+                    first_person_kpts = detection.keypoints.data[0]
+                    for kpt in first_person_kpts:
+                       x, y = kpt[0], kpt[1] # extracting x, y
+                       frame_keypoints.append((int(x), int(y)))
+                # Padding or truncating to match expected n_keypoints
+                if len(frame_keypoints) < n_keypoints:
+                    frame_keypoints.extend(
+                        [(0, 0)] * (n_keypoints - len(frame_keypoints))
+                    )
+                else:
+                    frame_keypoints = frame_keypoints[:n_keypoints]
+                keypoints[offset + frame_number_in_batch] = frame_keypoints
+        print("✅ Keypoints predicted")
+        results: list[TVFrameResult] = []
+        for frame_number in range(offset, offset + len(batch_images)):
+            results.append(
+                TVFrameResult(
+                    frame_id=frame_number,
+                    boxes=bboxes.get(frame_number, []),
+                    keypoints=keypoints.get(
+                        frame_number, [(0, 0) for _ in range(n_keypoints)]
+                    ),
+                )
+            )
+        print("✅ Combined results as TVFrameResult")
+        return results