from pathlib import Path from ultralytics import YOLO from numpy import ndarray from pydantic import BaseModel class BoundingBox(BaseModel): x1: int y1: int x2: int y2: int cls_id: int conf: float class TVFrameResult(BaseModel): frame_id: int boxes: list[BoundingBox] keypoints: list[tuple[int, int]] class Miner: """ This class is responsible for: - Loading ML models. - Running batched predictions on images. - Parsing ML model outputs into structured results (TVFrameResult). This class can be modified, but it must have the following to be compatible with the chute: - be named `Miner` - have a `predict_batch` function with the inputs and outputs specified - be stored in a file called `miner.py` which lives in the root of the HFHub repo """ def __init__(self, path_hf_repo: Path) -> None: """ Loads all ML models from the repository. -----(Adjust as needed)---- Args: path_hf_repo (Path): Path to the downloaded HuggingFace Hub repository Returns: None """ self.bbox_model = YOLO(path_hf_repo / "football-player-detection.pt") print(f"✅ BBox Model Loaded") self.keypoints_model = YOLO(path_hf_repo / "football-pitch-detection.pt") print(f"✅ Keypoints Model Loaded") def __repr__(self) -> str: """ Information about miner returned in the health endpoint to inspect the loaded ML models (and their types) -----(Adjust as needed)---- """ return f"BBox Model: {type(self.bbox_model).__name__}\nKeypoints Model: {type(self.keypoints_model).__name__}" def predict_batch( self, batch_images: list[ndarray], offset: int, n_keypoints: int, ) -> list[TVFrameResult]: """ Miner prediction for a batch of images. Handles the orchestration of ML models and any preprocessing and postprocessing -----(Adjust as needed)---- Args: batch_images (list[np.ndarray]): A list of images (as NumPy arrays) to process in this batch. offset (int): The frame number corresponding to the first image in the batch. Used to correctly index frames in the output results. n_keypoints (int): The number of keypoints expected for each frame in this challenge type. Returns: list[TVFrameResult]: A list of predictions for each image in the batch """ bboxes: dict[int, list[BoundingBox]] = {} bbox_model_results = self.bbox_model.predict(batch_images) if bbox_model_results is not None: for frame_number_in_batch, detection in enumerate(bbox_model_results): if not hasattr(detection, "boxes") or detection.boxes is None: continue boxes = [] for box in detection.boxes.data: x1, y1, x2, y2, conf, cls_id = box.tolist() boxes.append( BoundingBox( x1=int(x1), y1=int(y1), x2=int(x2), y2=int(y2), cls_id=int(cls_id), conf=float(conf), ) ) bboxes[offset + frame_number_in_batch] = boxes print("✅ BBoxes predicted") keypoints: dict[int, tuple[int, int]] = {} keypoints_model_results = self.keypoints_model.predict(batch_images) if keypoints_model_results is not None: for frame_number_in_batch, detection in enumerate(keypoints_model_results): if not hasattr(detection, "keypoints") or detection.keypoints is None: continue frame_keypoints: list[tuple[int, int]] = [] for part_points in detection.keypoints.data: for x, y, _ in part_points: frame_keypoints.append((int(x), int(y))) if len(frame_keypoints) < n_keypoints: frame_keypoints.extend( [(0, 0)] * (n_keypoints - len(frame_keypoints)) ) else: frame_keypoints = frame_keypoints[:n_keypoints] keypoints[offset + frame_number_in_batch] = frame_keypoints print("✅ Keypoints predicted") results: list[TVFrameResult] = [] for frame_number in range(offset, offset + len(batch_images)): results.append( TVFrameResult( frame_id=frame_number, boxes=bboxes.get(frame_number, []), keypoints=keypoints.get( frame_number, [(0, 0) for _ in range(n_keypoints)] ), ) ) print("✅ Combined results as TVFrameResult") return results