EiMon724
/

ScoreVision

Model card Files Files and versions

xet

Community

EiMon724 commited on Jan 19

Commit

1365e57

verified ·

1 Parent(s): 45f7cd2

scorevision: push artifact

Browse files

Files changed (1) hide show

miner.py +175 -0

miner.py ADDED Viewed

	@@ -0,0 +1,175 @@

+from pathlib import Path
+# NOTE:
+# - This is copied from `example_miner/miner.py` as a starting point.
+# - This version shows how to use a SAM-style segmentation model as your detector.
+# - SAM gives masks (segmentation). This subnet expects boxes, so we convert masks -> boxes.
+# - SAM does NOT give 32 pitch keypoints; you likely need a separate keypoint model.
+import os
+from typing import Any
+import cv2
+import numpy as np
+from numpy import ndarray
+from pydantic import BaseModel
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: list[BoundingBox]
+    keypoints: list[tuple[int, int]]
+class Miner:
+    """
+    Your miner engine.
+    Requirements (must keep):
+    - file name: `miner.py` (repo root)
+    - class name: `Miner`
+    - method: `predict_batch(batch_images, offset, n_keypoints)`
+    """
+    def __init__(self, path_hf_repo: Path) -> None:
+        """
+        Load your models from the HuggingFace repo snapshot directory.
+        For SAM-based detection:
+        - Put your SAM checkpoint file in this repo folder (same folder as miner.py)
+        - Set SAM_CHECKPOINT env var (optional) to choose the filename.
+        """
+        self.path_hf_repo = path_hf_repo
+        # ---------------- SAM setup ----------------
+        # IMPORTANT: "SAM 3" can mean different things. This skeleton uses the common
+        # Segment Anything API shape (sam_model_registry + SamAutomaticMaskGenerator).
+        # If your SAM3 is different, keep the structure and replace the loading/inference.
+        ckpt_name = os.getenv("SAM_CHECKPOINT", "sam_vit_h_4b8939.pth")
+        ckpt_path = (path_hf_repo / ckpt_name).resolve()
+        if not ckpt_path.is_file():
+            raise FileNotFoundError(
+                f"SAM checkpoint not found: {ckpt_path}. Put the checkpoint in your HF repo "
+                f"and/or set SAM_CHECKPOINT to the correct filename."
+            )
+        model_type = os.getenv("SAM_MODEL_TYPE", "vit_h")  # vit_h / vit_l / vit_b (depends on checkpoint)
+        try:
+            from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+        except Exception as e:
+            raise ImportError(
+                "segment-anything is not installed in the Chutes image. "
+                "Add it to chute_config.yml (pip install segment-anything)."
+            ) from e
+        device = "cuda" if os.getenv("CUDA_VISIBLE_DEVICES") else "cpu"
+        self.sam = sam_model_registry[model_type](checkpoint=str(ckpt_path))
+        self.sam.to(device=device)
+        # Tunables: lower points_per_side => faster, fewer masks.
+        self.mask_generator = SamAutomaticMaskGenerator(
+            self.sam,
+            points_per_side=int(os.getenv("SAM_POINTS_PER_SIDE", "16")),
+            pred_iou_thresh=float(os.getenv("SAM_PRED_IOU_THRESH", "0.88")),
+            stability_score_thresh=float(os.getenv("SAM_STABILITY_THRESH", "0.90")),
+            min_mask_region_area=int(os.getenv("SAM_MIN_REGION_AREA", "200")),
+        )
+        # ---------------- Keypoints ----------------
+        # Placeholder: output all zeros unless you add a keypoint detector.
+        self.enable_keypoints = os.getenv("ENABLE_KEYPOINTS", "0").lower() in ("1", "true", "yes")
+        self._kp_model: Any | None = None
+        # If you have a keypoint model, load it here from path_hf_repo.
+    def __repr__(self) -> str:
+        return (
+            f"SAM: {type(self.sam).__name__}\n"
+            f"Keypoints enabled: {self.enable_keypoints}"
+        )
+    def predict_batch(
+        self,
+        batch_images: list[ndarray],
+        offset: int,
+        n_keypoints: int,
+    ) -> list[TVFrameResult]:
+        # ------------------ Boxes (SAM masks -> boxes) ------------------
+        # SAM returns masks for "things" but does not label them (player/ref/ball).
+        # For a first working miner, we mark everything as "player" (cls_id=2).
+        # To score well, you will later need classification (ball/ref/goalkeeper/team).
+        bboxes: dict[int, list[BoundingBox]] = {}
+        for i, img in enumerate(batch_images):
+            frame_id = offset + i
+            # Convert BGR(OpenCV) -> RGB(SAM)
+            if img is None:
+                bboxes[frame_id] = []
+                continue
+            rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            masks = self.mask_generator.generate(rgb)  # list[dict]
+            # Filter out giant masks (often the grass/background) and tiny noise.
+            H, W = rgb.shape[:2]
+            area_frame = float(H * W)
+            out_boxes: list[BoundingBox] = []
+            for m in masks:
+                # segment-anything returns bbox as [x, y, w, h]
+                x, y, w, h = m.get("bbox") or (0, 0, 0, 0)
+                x1, y1 = int(x), int(y)
+                x2, y2 = int(x + w), int(y + h)
+                if x2 <= x1 or y2 <= y1:
+                    continue
+                box_area = float((x2 - x1) * (y2 - y1))
+                if box_area < float(os.getenv("MIN_BOX_AREA", "250")):
+                    continue
+                if box_area / area_frame > float(os.getenv("MAX_BOX_AREA_FRAC", "0.25")):
+                    continue
+                conf = float(m.get("predicted_iou") or 0.5)
+                out_boxes.append(
+                    BoundingBox(
+                        x1=x1,
+                        y1=y1,
+                        x2=x2,
+                        y2=y2,
+                        cls_id=2,  # default: player
+                        conf=conf,
+                    )
+                )
+            bboxes[frame_id] = out_boxes
+        # ---------------- Keypoints (length = n_keypoints) ----------------
+        keypoints: dict[int, list[tuple[int, int]]] = {}
+        # Placeholder (zeros). Replace with your own keypoint detector when ready.
+        for i in range(len(batch_images)):
+            frame_id = offset + i
+            keypoints[frame_id] = [(0, 0) for _ in range(n_keypoints)]
+        # ---------------- Combine ------------------
+        results: list[TVFrameResult] = []
+        for frame_number in range(offset, offset + len(batch_images)):
+            results.append(
+                TVFrameResult(
+                    frame_id=frame_number,
+                    boxes=bboxes.get(frame_number, []),
+                    keypoints=keypoints.get(
+                        frame_number, [(0, 0) for _ in range(n_keypoints)]
+                    ),
+                )
+            )
+        return results