Gege24
/

ScoreVision

ONNX

Model card Files Files and versions

xet

Community

Gege24 commited on Dec 19, 2025

Commit

2e8cd2d

verified ·

1 Parent(s): a6e2f77

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +29 -42

miner.py CHANGED Viewed

@@ -27,35 +27,22 @@ class Miner:
     - Running batched predictions on images.
     - Parsing ML model outputs into structured results (TVFrameResult).
-    This class can be modified, but it must have the following to be compatible with the chute:
-        - be named `Miner`
-        - have a `predict_batch` function with the inputs and outputs specified
-        - be stored in a file called `miner.py` which lives in the root of the HFHub repo
     """
     def __init__(self, path_hf_repo: Path) -> None:
         """
-        Loads all ML models from the repository.
-        -----(Adjust as needed)----
-        Args:
-            path_hf_repo (Path):
-                Path to the downloaded HuggingFace Hub repository
-        Returns:
-            None
         """
-        self.bbox_model = YOLO(path_hf_repo / "football-player-detection.pt")
-        print(f"✅ BBox Model Loaded")
-        self.keypoints_model = YOLO(path_hf_repo / "football-pitch-detection.pt")
-        print(f"✅ Keypoints Model Loaded")
     def __repr__(self) -> str:
-        """
-        Information about miner returned in the health endpoint
-        to inspect the loaded ML models (and their types)
-        -----(Adjust as needed)----
-        """
         return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
     def predict_batch(
@@ -66,31 +53,19 @@ class Miner:
     ) -> list[TVFrameResult]:
         """
         Miner prediction for a batch of images.
-        Handles the orchestration of ML models and any preprocessing and postprocessing
-        -----(Adjust as needed)----
-        Args:
-            batch_images (list[np.ndarray]):
-                A list of images (as NumPy arrays) to process in this batch.
-            offset (int):
-                The frame number corresponding to the first image in the batch.
-                Used to correctly index frames in the output results.
-            n_keypoints (int):
-                The number of keypoints expected for each frame in this challenge type.
-        Returns:
-            list[TVFrameResult]:
-                A list of predictions for each image in the batch
         """
         bboxes: dict[int, list[BoundingBox]] = {}
-        bbox_model_results = self.bbox_model.predict(batch_images)
         if bbox_model_results is not None:
             for frame_number_in_batch, detection in enumerate(bbox_model_results):
                 if not hasattr(detection, "boxes") or detection.boxes is None:
                     continue
                 boxes = []
                 for box in detection.boxes.data:
                     x1, y1, x2, y2, conf, cls_id = box.tolist()
                     boxes.append(
                         BoundingBox(
@@ -106,21 +81,33 @@ class Miner:
         print("✅ BBoxes predicted")
         keypoints: dict[int, tuple[int, int]] = {}
-        keypoints_model_results = self.keypoints_model.predict(batch_images)
         if keypoints_model_results is not None:
             for frame_number_in_batch, detection in enumerate(keypoints_model_results):
                 if not hasattr(detection, "keypoints") or detection.keypoints is None:
                     continue
                 frame_keypoints: list[tuple[int, int]] = []
-                for part_points in detection.keypoints.data:
-                    for x, y, _ in part_points:
-                        frame_keypoints.append((int(x), int(y)))
                 if len(frame_keypoints) < n_keypoints:
                     frame_keypoints.extend(
                         [(0, 0)] * (n_keypoints - len(frame_keypoints))
                     )
                 else:
                     frame_keypoints = frame_keypoints[:n_keypoints]
                 keypoints[offset + frame_number_in_batch] = frame_keypoints
         print("✅ Keypoints predicted")

     - Running batched predictions on images.
     - Parsing ML model outputs into structured results (TVFrameResult).
+    MODIFIED FOR TESTING: Uses standard yolov8n.pt and yolov8n-pose.pt
     """
     def __init__(self, path_hf_repo: Path) -> None:
         """
+        Loads all ML models.
         """
+        # Using standard YOLOv8 nano models that will be automatically downloaded
+        # if not present. This avoids the need for custom .pt files for testing.
+        self.bbox_model = YOLO("yolov8n.pt")
+        print(f"✅ BBox Model Loaded (yolov8n)")
+        self.keypoints_model = YOLO("yolov8n-pose.pt")
+        print(f"✅ Keypoints Model Loaded (yolov8n-pose)")
     def __repr__(self) -> str:
         return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}"
     def predict_batch(
     ) -> list[TVFrameResult]:
         """
         Miner prediction for a batch of images.
         """
         bboxes: dict[int, list[BoundingBox]] = {}
+        # Run BBox prediction
+        bbox_model_results = self.bbox_model.predict(batch_images, verbose=False)
         if bbox_model_results is not None:
             for frame_number_in_batch, detection in enumerate(bbox_model_results):
                 if not hasattr(detection, "boxes") or detection.boxes is None:
                     continue
                 boxes = []
                 for box in detection.boxes.data:
+                    # YOLOv8 standard output: x1, y1, x2, y2, conf, cls
                     x1, y1, x2, y2, conf, cls_id = box.tolist()
                     boxes.append(
                         BoundingBox(
         print("✅ BBoxes predicted")
         keypoints: dict[int, tuple[int, int]] = {}
+        # Run Pose/Keypoints prediction
+        keypoints_model_results = self.keypoints_model.predict(batch_images, verbose=False)
         if keypoints_model_results is not None:
             for frame_number_in_batch, detection in enumerate(keypoints_model_results):
                 if not hasattr(detection, "keypoints") or detection.keypoints is None:
                     continue
                 frame_keypoints: list[tuple[int, int]] = []
+                # Check if keypoints data exists and has the expected shape/content
+                if detection.keypoints.data is not None and len(detection.keypoints.data) > 0:
+                     # Taking the first person detected for keypoints (simplification for testing)
+                     # YOLO pose output is typically [num_people, num_kpts, 3] (x, y, conf)
+                    first_person_kpts = detection.keypoints.data[0]
+                    for kpt in first_person_kpts:
+                       x, y = kpt[0], kpt[1] # extracting x, y
+                       frame_keypoints.append((int(x), int(y)))
+                # Padding or truncating to match expected n_keypoints
                 if len(frame_keypoints) < n_keypoints:
                     frame_keypoints.extend(
                         [(0, 0)] * (n_keypoints - len(frame_keypoints))
                     )
                 else:
                     frame_keypoints = frame_keypoints[:n_keypoints]
                 keypoints[offset + frame_number_in_batch] = frame_keypoints
         print("✅ Keypoints predicted")