tarto2
/

TurboVision

ONNX

Model card Files Files and versions

xet

Community

tarto2 commited on Dec 7, 2025

Commit

424a89a

verified ·

1 Parent(s): 43eec27

Update miner.py

Browse files

Files changed (1) hide show

miner.py +433 -433

miner.py CHANGED Viewed

@@ -1,434 +1,434 @@
-from pathlib import Path
-from typing import List, Tuple, Dict
-import sys
-import os
-from numpy import ndarray
-from pydantic import BaseModel
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from keypoint_helper import run_keypoints_post_processing
-from ultralytics import YOLO
-from team_cluster import TeamClassifier
-from utils import (
-    BoundingBox,
-    Constants,
-)
-import time
-import torch
-import gc
-from pitch import process_batch_input, get_cls_net
-import yaml
-class BoundingBox(BaseModel):
-    x1: int
-    y1: int
-    x2: int
-    y2: int
-    cls_id: int
-    conf: float
-class TVFrameResult(BaseModel):
-    frame_id: int
-    boxes: List[BoundingBox]
-    keypoints: List[Tuple[int, int]]
-class Miner:
-    SMALL_CONTAINED_IOA = Constants.SMALL_CONTAINED_IOA
-    SMALL_RATIO_MAX = Constants.SMALL_RATIO_MAX
-    SINGLE_PLAYER_HUE_PIVOT = Constants.SINGLE_PLAYER_HUE_PIVOT
-    CORNER_INDICES = Constants.CORNER_INDICES
-    KEYPOINTS_CONFIDENCE = Constants.KEYPOINTS_CONFIDENCE
-    CORNER_CONFIDENCE = Constants.CORNER_CONFIDENCE
-    GOALKEEPER_POSITION_MARGIN = Constants.GOALKEEPER_POSITION_MARGIN
-    MIN_SAMPLES_FOR_FIT = 16  # Minimum player crops needed before fitting TeamClassifier
-    MAX_SAMPLES_FOR_FIT = 1000  # Maximum samples to avoid overfitting
-    def __init__(self, path_hf_repo: Path) -> None:
-        try:
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            model_path = path_hf_repo / "football_object_detection.onnx"
-            self.bbox_model = YOLO(model_path)
-            print("BBox Model Loaded")
-            team_model_path = path_hf_repo / "osnet_model.pth.tar-100"
-            self.team_classifier = TeamClassifier(
-                device=device,
-                batch_size=32,
-                model_name=str(team_model_path)
-            )
-            print("Team Classifier Loaded")
-            # Team classification state
-            self.team_classifier_fitted = False
-            self.player_crops_for_fit = []
-            model_kp_path = path_hf_repo / 'keypoint'
-            config_kp_path = path_hf_repo / 'hrnetv2_w48.yaml'
-            cfg_kp = yaml.safe_load(open(config_kp_path, 'r'))
-            loaded_state_kp = torch.load(model_kp_path, map_location=device)
-            model = get_cls_net(cfg_kp)
-            model.load_state_dict(loaded_state_kp)
-            model.to(device)
-            model.eval()
-            self.keypoints_model = model
-            self.kp_threshold = 0.1
-            self.pitch_batch_size = 4
-            self.health = "healthy"
-            print("✅ Keypoints Model Loaded")
-        except Exception as e:
-            self.health = "❌ Miner initialization failed: " + str(e)
-            print(self.health)
-    def __repr__(self) -> str:
-        if self.health == 'healthy':
-            return (
-                f"health: {self.health}\n"
-                f"BBox Model: {type(self.bbox_model).__name__}\n"
-                f"Keypoints Model: {type(self.keypoints_model).__name__}"
-            )
-        else:
-            return self.health
-    def _calculate_iou(self, box1: Tuple[float, float, float, float],
-                       box2: Tuple[float, float, float, float]) -> float:
-        """
-        Calculate Intersection over Union (IoU) between two bounding boxes.
-        Args:
-            box1: (x1, y1, x2, y2)
-            box2: (x1, y1, x2, y2)
-        Returns:
-            IoU score (0-1)
-        """
-        x1_1, y1_1, x2_1, y2_1 = box1
-        x1_2, y1_2, x2_2, y2_2 = box2
-        # Calculate intersection area
-        x_left = max(x1_1, x1_2)
-        y_top = max(y1_1, y1_2)
-        x_right = min(x2_1, x2_2)
-        y_bottom = min(y2_1, y2_2)
-        if x_right < x_left or y_bottom < y_top:
-            return 0.0
-        intersection_area = (x_right - x_left) * (y_bottom - y_top)
-        # Calculate union area
-        box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
-        box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
-        union_area = box1_area + box2_area - intersection_area
-        if union_area == 0:
-            return 0.0
-        return intersection_area / union_area
-    def _detect_objects_batch(self, decoded_images: List[ndarray]) -> Dict[int, List[BoundingBox]]:
-        batch_size = 16
-        detection_results = []
-        n_frames = len(decoded_images)
-        for frame_number in range(0, n_frames, batch_size):
-            batch_images = decoded_images[frame_number: frame_number + batch_size]
-            detections = self.bbox_model(batch_images, verbose=False, save=False)
-            detection_results.extend(detections)
-        return detection_results
-    def _team_classify(self, detection_results, decoded_images, offset):
-        self.team_classifier_fitted = False
-        start = time.time()
-        # Collect player crops from first batch for fitting
-        fit_sample_size = 1000
-        player_crops_for_fit = []
-        for frame_id in range(len(detection_results)):
-            detection_box = detection_results[frame_id].boxes.data
-            if len(detection_box) < 4:
-                continue
-            # Collect player boxes for team classification fitting (first batch only)
-            if len(player_crops_for_fit) < fit_sample_size:
-                frame_image = decoded_images[frame_id]
-                for box in detection_box:
-                    x1, y1, x2, y2, conf, cls_id = box.tolist()
-                    if conf < 0.5:
-                        continue
-                    mapped_cls_id = str(int(cls_id))
-                    # Only collect player crops (cls_id = 2)
-                    if mapped_cls_id == '2':
-                        crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
-                        if crop.size > 0:
-                            player_crops_for_fit.append(crop)
-            # Fit team classifier after collecting samples
-            if self.team_classifier and not self.team_classifier_fitted and len(player_crops_for_fit) >= fit_sample_size:
-                print(f"Fitting TeamClassifier with {len(player_crops_for_fit)} player crops")
-                self.team_classifier.fit(player_crops_for_fit)
-                self.team_classifier_fitted = True
-                break
-        if not self.team_classifier_fitted and len(player_crops_for_fit) >= 16:
-            print(f"Fallback: Fitting TeamClassifier with {len(player_crops_for_fit)} player crops")
-            self.team_classifier.fit(player_crops_for_fit)
-            self.team_classifier_fitted = True
-        end = time.time()
-        print(f"Fitting Kmeans time: {end - start}")
-        # Second pass: predict teams with configurable frame skipping optimization
-        start = time.time()
-        # Get configuration for frame skipping
-        prediction_interval = 1  # Default: predict every 2 frames
-        iou_threshold = 0.3
-        print(f"Team classification - prediction_interval: {prediction_interval}, iou_threshold: {iou_threshold}")
-        # Storage for predicted frame results: {frame_id: {box_idx: (bbox, team_id)}}
-        predicted_frame_data = {}
-        # Step 1: Predict for frames at prediction_interval only
-        frames_to_predict = []
-        for frame_id in range(len(detection_results)):
-            if frame_id % prediction_interval == 0:
-                frames_to_predict.append(frame_id)
-        print(f"Predicting teams for {len(frames_to_predict)}/{len(detection_results)} frames "
-                    f"(saving {100 - (len(frames_to_predict) * 100 // len(detection_results))}% compute)")
-        for frame_id in frames_to_predict:
-            detection_box = detection_results[frame_id].boxes.data
-            frame_image = decoded_images[frame_id]
-            # Collect player crops for this frame
-            frame_player_crops = []
-            frame_player_indices = []
-            frame_player_boxes = []
-            for idx, box in enumerate(detection_box):
-                x1, y1, x2, y2, conf, cls_id = box.tolist()
-                if cls_id == 2 and conf < 0.6:
-                    continue
-                mapped_cls_id = str(int(cls_id))
-                # Collect player crops for prediction
-                if self.team_classifier and self.team_classifier_fitted and mapped_cls_id == '2':
-                    crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
-                    if crop.size > 0:
-                        frame_player_crops.append(crop)
-                        frame_player_indices.append(idx)
-                        frame_player_boxes.append((x1, y1, x2, y2))
-            # Predict teams for all players in this frame
-            if len(frame_player_crops) > 0:
-                team_ids = self.team_classifier.predict(frame_player_crops)
-                predicted_frame_data[frame_id] = {}
-                for idx, bbox, team_id in zip(frame_player_indices, frame_player_boxes, team_ids):
-                    # Map team_id (0,1) to cls_id (6,7)
-                    team_cls_id = str(6 + int(team_id))
-                    predicted_frame_data[frame_id][idx] = (bbox, team_cls_id)
-        # Step 2: Process all frames (interpolate skipped frames)
-        fallback_count = 0
-        interpolated_count = 0
-        bboxes: dict[int, list[BoundingBox]] = {}
-        for frame_id in range(len(detection_results)):
-            detection_box = detection_results[frame_id].boxes.data
-            frame_image = decoded_images[frame_id]
-            boxes = []
-            team_predictions = {}
-            if frame_id % prediction_interval == 0:
-                # Predicted frame: use pre-computed predictions
-                if frame_id in predicted_frame_data:
-                    for idx, (bbox, team_cls_id) in predicted_frame_data[frame_id].items():
-                        team_predictions[idx] = team_cls_id
-            else:
-                # Skipped frame: interpolate from neighboring predicted frames
-                # Find nearest predicted frames
-                prev_predicted_frame = (frame_id // prediction_interval) * prediction_interval
-                next_predicted_frame = prev_predicted_frame + prediction_interval
-                # Collect current frame player boxes
-                for idx, box in enumerate(detection_box):
-                    x1, y1, x2, y2, conf, cls_id = box.tolist()
-                    if cls_id == 2 and conf < 0.6:
-                        continue
-                    mapped_cls_id = str(int(cls_id))
-                    if self.team_classifier and self.team_classifier_fitted and mapped_cls_id == '2':
-                        target_box = (x1, y1, x2, y2)
-                        # Try to match with previous predicted frame
-                        best_team_id = None
-                        best_iou = 0.0
-                        if prev_predicted_frame in predicted_frame_data:
-                            team_id, iou = self._find_best_match(
-                                target_box,
-                                predicted_frame_data[prev_predicted_frame],
-                                iou_threshold
-                            )
-                            if team_id is not None:
-                                best_team_id = team_id
-                                best_iou = iou
-                        # Try to match with next predicted frame if available and no good match yet
-                        if best_team_id is None and next_predicted_frame < len(detection_results):
-                            if next_predicted_frame in predicted_frame_data:
-                                team_id, iou = self._find_best_match(
-                                    target_box,
-                                    predicted_frame_data[next_predicted_frame],
-                                    iou_threshold
-                                )
-                                if team_id is not None and iou > best_iou:
-                                    best_team_id = team_id
-                                    best_iou = iou
-                        # Track interpolation success
-                        if best_team_id is not None:
-                            interpolated_count += 1
-                        else:
-                            # Fallback: if no match found, predict individually
-                            crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
-                            if crop.size > 0:
-                                team_id = self.team_classifier.predict([crop])[0]
-                                best_team_id = str(6 + int(team_id))
-                                fallback_count += 1
-                        if best_team_id is not None:
-                            team_predictions[idx] = best_team_id
-            # Parse boxes with team classification
-            for idx, box in enumerate(detection_box):
-                x1, y1, x2, y2, conf, cls_id = box.tolist()
-                if cls_id == 2 and conf < 0.6:
-                    continue
-                # Check overlap with staff box
-                overlap_staff = False
-                for idy, boxy in enumerate(detection_box):
-                    s_x1, s_y1, s_x2, s_y2, s_conf, s_cls_id = boxy.tolist()
-                    if cls_id == 2 and s_cls_id == 4:
-                        staff_iou = self._calculate_iou(box[:4], boxy[:4])
-                        if staff_iou >= 0.8:
-                            overlap_staff = True
-                            break
-                if overlap_staff:
-                    continue
-                mapped_cls_id = str(int(cls_id))
-                # Override cls_id for players with team prediction
-                if idx in team_predictions:
-                    mapped_cls_id = team_predictions[idx]
-                if mapped_cls_id != '4':
-                    if int(mapped_cls_id) == 3 and conf < 0.5:
-                        continue
-                    boxes.append(
-                        BoundingBox(
-                            x1=int(x1),
-                            y1=int(y1),
-                            x2=int(x2),
-                            y2=int(y2),
-                            cls_id=int(mapped_cls_id),
-                            conf=float(conf),
-                        )
-                    )
-            # Handle footballs - keep only the best one
-            footballs = [bb for bb in boxes if int(bb.cls_id) == 0]
-            if len(footballs) > 1:
-                best_ball = max(footballs, key=lambda b: b.conf)
-                boxes = [bb for bb in boxes if int(bb.cls_id) != 0]
-                boxes.append(best_ball)
-            bboxes[offset + frame_id] = boxes
-        return bboxes
-    def predict_batch(self, batch_images: List[ndarray], offset: int, n_keypoints: int) -> List[TVFrameResult]:
-        start = time.time()
-        detection_results = self._detect_objects_batch(batch_images)
-        end = time.time()
-        print(f"Detection time: {end - start}")
-        start = time.time()
-        bboxes = self._team_classify(detection_results, batch_images, offset)
-        end = time.time()
-        print(f"Team classify time: {end - start}")
-        pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
-        keypoints: Dict[int, List[Tuple[int, int]]] = {}
-        start = time.time()
-        while True:
-            gc.collect()
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                torch.cuda.synchronize()
-            device_str = "cuda"
-            keypoints_result = process_batch_input(
-                batch_images,
-                self.keypoints_model,
-                self.kp_threshold,
-                device_str,
-                batch_size=pitch_batch_size,
-            )
-            if keypoints_result is not None and len(keypoints_result) > 0:
-                for frame_number_in_batch, kp_dict in enumerate(keypoints_result):
-                    if frame_number_in_batch >= len(batch_images):
-                        break
-                    frame_keypoints: List[Tuple[int, int]] = []
-                    try:
-                        height, width = batch_images[frame_number_in_batch].shape[:2]
-                        if kp_dict is not None and isinstance(kp_dict, dict):
-                            for idx in range(32):
-                                x, y = 0, 0
-                                kp_idx = idx + 1
-                                if kp_idx in kp_dict:
-                                    try:
-                                        kp_data = kp_dict[kp_idx]
-                                        if isinstance(kp_data, dict) and "x" in kp_data and "y" in kp_data:
-                                            x = int(kp_data["x"] * width)
-                                            y = int(kp_data["y"] * height)
-                                    except (KeyError, TypeError, ValueError):
-                                        pass
-                                frame_keypoints.append((x, y))
-                    except (IndexError, ValueError, AttributeError):
-                        frame_keypoints = [(0, 0)] * 32
-                    if len(frame_keypoints) < n_keypoints:
-                        frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints)))
-                    else:
-                        frame_keypoints = frame_keypoints[:n_keypoints]
-                    keypoints[offset + frame_number_in_batch] = frame_keypoints
-            break
-        end = time.time()
-        print(f"Keypoint time: {end - start}")
-        results: List[TVFrameResult] = []
-        for frame_number in range(offset, offset + len(batch_images)):
-            frame_boxes = bboxes.get(frame_number, [])
-            frame_keypoints = keypoints.get(frame_number, [(0, 0) for _ in range(n_keypoints)])
-            result = TVFrameResult(
-                frame_id=frame_number,
-                boxes=frame_boxes,
-                keypoints=frame_keypoints,
-            )
-            results.append(result)
-        if len(batch_images) > 0:
-            h, w = batch_images[0].shape[:2]
-            results = run_keypoints_post_processing(results, w, h)
-        gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
         return results

+from pathlib import Path
+from typing import List, Tuple, Dict
+import sys
+import os
+from numpy import ndarray
+from pydantic import BaseModel
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+from keypoint_helper import run_keypoints_post_processing
+from ultralytics import YOLO
+from team_cluster import TeamClassifier
+from utils import (
+    BoundingBox,
+    Constants,
+)
+import time
+import torch
+import gc
+from pitch import process_batch_input, get_cls_net
+import yaml
+class BoundingBox(BaseModel):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+    cls_id: int
+    conf: float
+class TVFrameResult(BaseModel):
+    frame_id: int
+    boxes: List[BoundingBox]
+    keypoints: List[Tuple[int, int]]
+class Miner:
+    SMALL_CONTAINED_IOA = Constants.SMALL_CONTAINED_IOA
+    SMALL_RATIO_MAX = Constants.SMALL_RATIO_MAX
+    SINGLE_PLAYER_HUE_PIVOT = Constants.SINGLE_PLAYER_HUE_PIVOT
+    CORNER_INDICES = Constants.CORNER_INDICES
+    KEYPOINTS_CONFIDENCE = Constants.KEYPOINTS_CONFIDENCE
+    CORNER_CONFIDENCE = Constants.CORNER_CONFIDENCE
+    GOALKEEPER_POSITION_MARGIN = Constants.GOALKEEPER_POSITION_MARGIN
+    MIN_SAMPLES_FOR_FIT = 16  # Minimum player crops needed before fitting TeamClassifier
+    MAX_SAMPLES_FOR_FIT = 700  # Maximum samples to avoid overfitting
+    def __init__(self, path_hf_repo: Path) -> None:
+        try:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            model_path = path_hf_repo / "football_object_detection.onnx"
+            self.bbox_model = YOLO(model_path)
+            print("BBox Model Loaded")
+            team_model_path = path_hf_repo / "osnet_model.pth.tar-100"
+            self.team_classifier = TeamClassifier(
+                device=device,
+                batch_size=32,
+                model_name=str(team_model_path)
+            )
+            print("Team Classifier Loaded")
+            # Team classification state
+            self.team_classifier_fitted = False
+            self.player_crops_for_fit = []
+            model_kp_path = path_hf_repo / 'keypoint'
+            config_kp_path = path_hf_repo / 'hrnetv2_w48.yaml'
+            cfg_kp = yaml.safe_load(open(config_kp_path, 'r'))
+            loaded_state_kp = torch.load(model_kp_path, map_location=device)
+            model = get_cls_net(cfg_kp)
+            model.load_state_dict(loaded_state_kp)
+            model.to(device)
+            model.eval()
+            self.keypoints_model = model
+            self.kp_threshold = 0.1
+            self.pitch_batch_size = 4
+            self.health = "healthy"
+            print("✅ Keypoints Model Loaded")
+        except Exception as e:
+            self.health = "❌ Miner initialization failed: " + str(e)
+            print(self.health)
+    def __repr__(self) -> str:
+        if self.health == 'healthy':
+            return (
+                f"health: {self.health}\n"
+                f"BBox Model: {type(self.bbox_model).__name__}\n"
+                f"Keypoints Model: {type(self.keypoints_model).__name__}"
+            )
+        else:
+            return self.health
+    def _calculate_iou(self, box1: Tuple[float, float, float, float],
+                       box2: Tuple[float, float, float, float]) -> float:
+        """
+        Calculate Intersection over Union (IoU) between two bounding boxes.
+        Args:
+            box1: (x1, y1, x2, y2)
+            box2: (x1, y1, x2, y2)
+        Returns:
+            IoU score (0-1)
+        """
+        x1_1, y1_1, x2_1, y2_1 = box1
+        x1_2, y1_2, x2_2, y2_2 = box2
+        # Calculate intersection area
+        x_left = max(x1_1, x1_2)
+        y_top = max(y1_1, y1_2)
+        x_right = min(x2_1, x2_2)
+        y_bottom = min(y2_1, y2_2)
+        if x_right < x_left or y_bottom < y_top:
+            return 0.0
+        intersection_area = (x_right - x_left) * (y_bottom - y_top)
+        # Calculate union area
+        box1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
+        box2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
+        union_area = box1_area + box2_area - intersection_area
+        if union_area == 0:
+            return 0.0
+        return intersection_area / union_area
+    def _detect_objects_batch(self, decoded_images: List[ndarray]) -> Dict[int, List[BoundingBox]]:
+        batch_size = 16
+        detection_results = []
+        n_frames = len(decoded_images)
+        for frame_number in range(0, n_frames, batch_size):
+            batch_images = decoded_images[frame_number: frame_number + batch_size]
+            detections = self.bbox_model(batch_images, verbose=False, save=False)
+            detection_results.extend(detections)
+        return detection_results
+    def _team_classify(self, detection_results, decoded_images, offset):
+        self.team_classifier_fitted = False
+        start = time.time()
+        # Collect player crops from first batch for fitting
+        fit_sample_size = 700
+        player_crops_for_fit = []
+        for frame_id in range(len(detection_results)):
+            detection_box = detection_results[frame_id].boxes.data
+            if len(detection_box) < 4:
+                continue
+            # Collect player boxes for team classification fitting (first batch only)
+            if len(player_crops_for_fit) < fit_sample_size:
+                frame_image = decoded_images[frame_id]
+                for box in detection_box:
+                    x1, y1, x2, y2, conf, cls_id = box.tolist()
+                    if conf < 0.5:
+                        continue
+                    mapped_cls_id = str(int(cls_id))
+                    # Only collect player crops (cls_id = 2)
+                    if mapped_cls_id == '2':
+                        crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
+                        if crop.size > 0:
+                            player_crops_for_fit.append(crop)
+            # Fit team classifier after collecting samples
+            if self.team_classifier and not self.team_classifier_fitted and len(player_crops_for_fit) >= fit_sample_size:
+                print(f"Fitting TeamClassifier with {len(player_crops_for_fit)} player crops")
+                self.team_classifier.fit(player_crops_for_fit)
+                self.team_classifier_fitted = True
+                break
+        if not self.team_classifier_fitted and len(player_crops_for_fit) >= 16:
+            print(f"Fallback: Fitting TeamClassifier with {len(player_crops_for_fit)} player crops")
+            self.team_classifier.fit(player_crops_for_fit)
+            self.team_classifier_fitted = True
+        end = time.time()
+        print(f"Fitting Kmeans time: {end - start}")
+        # Second pass: predict teams with configurable frame skipping optimization
+        start = time.time()
+        # Get configuration for frame skipping
+        prediction_interval = 1  # Default: predict every 2 frames
+        iou_threshold = 0.3
+        print(f"Team classification - prediction_interval: {prediction_interval}, iou_threshold: {iou_threshold}")
+        # Storage for predicted frame results: {frame_id: {box_idx: (bbox, team_id)}}
+        predicted_frame_data = {}
+        # Step 1: Predict for frames at prediction_interval only
+        frames_to_predict = []
+        for frame_id in range(len(detection_results)):
+            if frame_id % prediction_interval == 0:
+                frames_to_predict.append(frame_id)
+        print(f"Predicting teams for {len(frames_to_predict)}/{len(detection_results)} frames "
+                    f"(saving {100 - (len(frames_to_predict) * 100 // len(detection_results))}% compute)")
+        for frame_id in frames_to_predict:
+            detection_box = detection_results[frame_id].boxes.data
+            frame_image = decoded_images[frame_id]
+            # Collect player crops for this frame
+            frame_player_crops = []
+            frame_player_indices = []
+            frame_player_boxes = []
+            for idx, box in enumerate(detection_box):
+                x1, y1, x2, y2, conf, cls_id = box.tolist()
+                if cls_id == 2 and conf < 0.6:
+                    continue
+                mapped_cls_id = str(int(cls_id))
+                # Collect player crops for prediction
+                if self.team_classifier and self.team_classifier_fitted and mapped_cls_id == '2':
+                    crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
+                    if crop.size > 0:
+                        frame_player_crops.append(crop)
+                        frame_player_indices.append(idx)
+                        frame_player_boxes.append((x1, y1, x2, y2))
+            # Predict teams for all players in this frame
+            if len(frame_player_crops) > 0:
+                team_ids = self.team_classifier.predict(frame_player_crops)
+                predicted_frame_data[frame_id] = {}
+                for idx, bbox, team_id in zip(frame_player_indices, frame_player_boxes, team_ids):
+                    # Map team_id (0,1) to cls_id (6,7)
+                    team_cls_id = str(6 + int(team_id))
+                    predicted_frame_data[frame_id][idx] = (bbox, team_cls_id)
+        # Step 2: Process all frames (interpolate skipped frames)
+        fallback_count = 0
+        interpolated_count = 0
+        bboxes: dict[int, list[BoundingBox]] = {}
+        for frame_id in range(len(detection_results)):
+            detection_box = detection_results[frame_id].boxes.data
+            frame_image = decoded_images[frame_id]
+            boxes = []
+            team_predictions = {}
+            if frame_id % prediction_interval == 0:
+                # Predicted frame: use pre-computed predictions
+                if frame_id in predicted_frame_data:
+                    for idx, (bbox, team_cls_id) in predicted_frame_data[frame_id].items():
+                        team_predictions[idx] = team_cls_id
+            else:
+                # Skipped frame: interpolate from neighboring predicted frames
+                # Find nearest predicted frames
+                prev_predicted_frame = (frame_id // prediction_interval) * prediction_interval
+                next_predicted_frame = prev_predicted_frame + prediction_interval
+                # Collect current frame player boxes
+                for idx, box in enumerate(detection_box):
+                    x1, y1, x2, y2, conf, cls_id = box.tolist()
+                    if cls_id == 2 and conf < 0.6:
+                        continue
+                    mapped_cls_id = str(int(cls_id))
+                    if self.team_classifier and self.team_classifier_fitted and mapped_cls_id == '2':
+                        target_box = (x1, y1, x2, y2)
+                        # Try to match with previous predicted frame
+                        best_team_id = None
+                        best_iou = 0.0
+                        if prev_predicted_frame in predicted_frame_data:
+                            team_id, iou = self._find_best_match(
+                                target_box,
+                                predicted_frame_data[prev_predicted_frame],
+                                iou_threshold
+                            )
+                            if team_id is not None:
+                                best_team_id = team_id
+                                best_iou = iou
+                        # Try to match with next predicted frame if available and no good match yet
+                        if best_team_id is None and next_predicted_frame < len(detection_results):
+                            if next_predicted_frame in predicted_frame_data:
+                                team_id, iou = self._find_best_match(
+                                    target_box,
+                                    predicted_frame_data[next_predicted_frame],
+                                    iou_threshold
+                                )
+                                if team_id is not None and iou > best_iou:
+                                    best_team_id = team_id
+                                    best_iou = iou
+                        # Track interpolation success
+                        if best_team_id is not None:
+                            interpolated_count += 1
+                        else:
+                            # Fallback: if no match found, predict individually
+                            crop = frame_image[int(y1):int(y2), int(x1):int(x2)]
+                            if crop.size > 0:
+                                team_id = self.team_classifier.predict([crop])[0]
+                                best_team_id = str(6 + int(team_id))
+                                fallback_count += 1
+                        if best_team_id is not None:
+                            team_predictions[idx] = best_team_id
+            # Parse boxes with team classification
+            for idx, box in enumerate(detection_box):
+                x1, y1, x2, y2, conf, cls_id = box.tolist()
+                if cls_id == 2 and conf < 0.6:
+                    continue
+                # Check overlap with staff box
+                overlap_staff = False
+                for idy, boxy in enumerate(detection_box):
+                    s_x1, s_y1, s_x2, s_y2, s_conf, s_cls_id = boxy.tolist()
+                    if cls_id == 2 and s_cls_id == 4:
+                        staff_iou = self._calculate_iou(box[:4], boxy[:4])
+                        if staff_iou >= 0.8:
+                            overlap_staff = True
+                            break
+                if overlap_staff:
+                    continue
+                mapped_cls_id = str(int(cls_id))
+                # Override cls_id for players with team prediction
+                if idx in team_predictions:
+                    mapped_cls_id = team_predictions[idx]
+                if mapped_cls_id != '4':
+                    if int(mapped_cls_id) == 3 and conf < 0.5:
+                        continue
+                    boxes.append(
+                        BoundingBox(
+                            x1=int(x1),
+                            y1=int(y1),
+                            x2=int(x2),
+                            y2=int(y2),
+                            cls_id=int(mapped_cls_id),
+                            conf=float(conf),
+                        )
+                    )
+            # Handle footballs - keep only the best one
+            footballs = [bb for bb in boxes if int(bb.cls_id) == 0]
+            if len(footballs) > 1:
+                best_ball = max(footballs, key=lambda b: b.conf)
+                boxes = [bb for bb in boxes if int(bb.cls_id) != 0]
+                boxes.append(best_ball)
+            bboxes[offset + frame_id] = boxes
+        return bboxes
+    def predict_batch(self, batch_images: List[ndarray], offset: int, n_keypoints: int) -> List[TVFrameResult]:
+        start = time.time()
+        detection_results = self._detect_objects_batch(batch_images)
+        end = time.time()
+        print(f"Detection time: {end - start}")
+        start = time.time()
+        bboxes = self._team_classify(detection_results, batch_images, offset)
+        end = time.time()
+        print(f"Team classify time: {end - start}")
+        pitch_batch_size = min(self.pitch_batch_size, len(batch_images))
+        keypoints: Dict[int, List[Tuple[int, int]]] = {}
+        start = time.time()
+        while True:
+            gc.collect()
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+                torch.cuda.synchronize()
+            device_str = "cuda"
+            keypoints_result = process_batch_input(
+                batch_images,
+                self.keypoints_model,
+                self.kp_threshold,
+                device_str,
+                batch_size=pitch_batch_size,
+            )
+            if keypoints_result is not None and len(keypoints_result) > 0:
+                for frame_number_in_batch, kp_dict in enumerate(keypoints_result):
+                    if frame_number_in_batch >= len(batch_images):
+                        break
+                    frame_keypoints: List[Tuple[int, int]] = []
+                    try:
+                        height, width = batch_images[frame_number_in_batch].shape[:2]
+                        if kp_dict is not None and isinstance(kp_dict, dict):
+                            for idx in range(32):
+                                x, y = 0, 0
+                                kp_idx = idx + 1
+                                if kp_idx in kp_dict:
+                                    try:
+                                        kp_data = kp_dict[kp_idx]
+                                        if isinstance(kp_data, dict) and "x" in kp_data and "y" in kp_data:
+                                            x = int(kp_data["x"] * width)
+                                            y = int(kp_data["y"] * height)
+                                    except (KeyError, TypeError, ValueError):
+                                        pass
+                                frame_keypoints.append((x, y))
+                    except (IndexError, ValueError, AttributeError):
+                        frame_keypoints = [(0, 0)] * 32
+                    if len(frame_keypoints) < n_keypoints:
+                        frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints)))
+                    else:
+                        frame_keypoints = frame_keypoints[:n_keypoints]
+                    keypoints[offset + frame_number_in_batch] = frame_keypoints
+            break
+        end = time.time()
+        print(f"Keypoint time: {end - start}")
+        results: List[TVFrameResult] = []
+        for frame_number in range(offset, offset + len(batch_images)):
+            frame_boxes = bboxes.get(frame_number, [])
+            frame_keypoints = keypoints.get(frame_number, [(0, 0) for _ in range(n_keypoints)])
+            result = TVFrameResult(
+                frame_id=frame_number,
+                boxes=frame_boxes,
+                keypoints=frame_keypoints,
+            )
+            results.append(result)
+        if len(batch_images) > 0:
+            h, w = batch_images[0].shape[:2]
+            results = run_keypoints_post_processing(results, w, h)
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
         return results