diff --git "a/miner.py" "b/miner.py" new file mode 100644--- /dev/null +++ "b/miner.py" @@ -0,0 +1,4126 @@ +from __future__ import annotations + +import gc +import math +import os +import threading +import time +from itertools import combinations +from pathlib import Path +from concurrent.futures import ThreadPoolExecutor +from collections import OrderedDict, defaultdict +from typing import Any, Dict, Iterable, List, Optional + +import cv2 +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import yaml +from numpy import ndarray +from PIL import Image +import torchvision.transforms as T +from sklearn.cluster import KMeans +from pydantic import BaseModel +from ultralytics import YOLO + +try: + from scipy.optimize import linear_sum_assignment as _linear_sum_assignment +except ImportError: + _linear_sum_assignment = None + +_f0 = True +BatchNorm2d = nn.BatchNorm2d +_v0 = 0.1 + + +def _c0(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d: + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) + + +class _B0(nn.Module): + expansion = 1 + + def __init__(self, inplanes: int, planes: int, stride: int = 1, downsample: Any = None): + super().__init__() + self.conv1 = _c0(inplanes, planes, stride) + self.bn1 = BatchNorm2d(planes, momentum=_v0) + self.relu = nn.ReLU(inplace=True) + self.conv2 = _c0(planes, planes) + self.bn2 = BatchNorm2d(planes, momentum=_v0) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torch.Tensor) -> torch.Tensor: + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + out = self.relu(out) + return out + + +class _B1(nn.Module): + expansion = 4 + + def __init__(self, inplanes: int, planes: int, stride: int = 1, downsample: Any = None): + super().__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = BatchNorm2d(planes, momentum=_v0) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = BatchNorm2d(planes, momentum=_v0) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, bias=False) + self.bn3 = BatchNorm2d(planes * self.expansion, momentum=_v0) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x: torch.Tensor) -> torch.Tensor: + residual = x + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.downsample is not None: + residual = self.downsample(x) + out += residual + out = self.relu(out) + return out + + +_d0 = {"BASIC": _B0, "BOTTLENECK": _B1} + + +def _block_from_cfg(block_key: Any) -> type: + if isinstance(block_key, bool): + return _d0["BOTTLENECK"] if block_key else _d0["BASIC"] + key = str(block_key).upper() if block_key else "BASIC" + if key not in _d0: + key = "BASIC" + return _d0[key] + + +class _H0(nn.Module): + def __init__(self, num_branches: int, blocks: type, num_blocks: list, num_inchannels: list, num_channels: list, fuse_method: str, multi_scale_output: bool = True): + super().__init__() + if isinstance(blocks, bool): + blocks = _d0["BOTTLENECK"] if blocks else _d0["BASIC"] + self._check_branches(num_branches, blocks, num_blocks, num_inchannels, num_channels) + self.num_inchannels = num_inchannels + self.fuse_method = fuse_method + self.num_branches = num_branches + self.multi_scale_output = multi_scale_output + self.branches = self._make_branches(num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU(inplace=True) + + def _check_branches(self, num_branches: int, blocks: type, num_blocks: list, num_inchannels: list, num_channels: list) -> None: + if num_branches != len(num_blocks): + raise ValueError("NUM_BRANCHES <> NUM_BLOCKS") + if num_branches != len(num_channels): + raise ValueError("NUM_BRANCHES <> NUM_CHANNELS") + if num_branches != len(num_inchannels): + raise ValueError("NUM_BRANCHES <> NUM_INCHANNELS") + + def _make_one_branch(self, branch_index: int, block: type, num_blocks: list, num_channels: list, stride: int = 1) -> nn.Sequential: + if isinstance(block, bool): + block = _d0["BOTTLENECK"] if block else _d0["BASIC"] + downsample = None + if stride != 1 or self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.num_inchannels[branch_index], num_channels[branch_index] * block.expansion, kernel_size=1, stride=stride, bias=False), + BatchNorm2d(num_channels[branch_index] * block.expansion, momentum=_v0), + ) + layers = [block(self.num_inchannels[branch_index], num_channels[branch_index], stride, downsample)] + self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion + for _ in range(1, num_blocks[branch_index]): + layers.append(block(self.num_inchannels[branch_index], num_channels[branch_index])) + return nn.Sequential(*layers) + + def _make_branches(self, num_branches: int, block: type, num_blocks: list, num_channels: list) -> nn.ModuleList: + return nn.ModuleList([self._make_one_branch(i, block, num_blocks, num_channels) for i in range(num_branches)]) + + def _make_fuse_layers(self) -> nn.ModuleList | None: + if self.num_branches == 1: + return None + num_branches = self.num_branches + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append(nn.Sequential(nn.Conv2d(num_inchannels[j], num_inchannels[i], 1, 1, 0, bias=False), BatchNorm2d(num_inchannels[i], momentum=_v0))) + elif j == i: + fuse_layer.append(None) + else: + conv3x3s = [] + for k in range(i - j): + if k == i - j - 1: + conv3x3s.append(nn.Sequential(nn.Conv2d(num_inchannels[j], num_inchannels[i], 3, 2, 1, bias=False), BatchNorm2d(num_inchannels[i], momentum=_v0))) + else: + conv3x3s.append(nn.Sequential(nn.Conv2d(num_inchannels[j], num_inchannels[j], 3, 2, 1, bias=False), BatchNorm2d(num_inchannels[j], momentum=_v0), nn.ReLU(inplace=True))) + fuse_layer.append(nn.Sequential(*conv3x3s)) + fuse_layers.append(nn.ModuleList(fuse_layer)) + return nn.ModuleList(fuse_layers) + + def get_num_inchannels(self) -> list: + return self.num_inchannels + + def forward(self, x: list) -> list: + if self.num_branches == 1: + return [self.branches[0](x[0])] + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y = y + x[j] + elif j > i: + y = y + F.interpolate(self.fuse_layers[i][j](x[j]), size=[x[i].shape[2], x[i].shape[3]], mode="bilinear") + else: + y = y + self.fuse_layers[i][j](x[j]) + x_fuse.append(self.relu(y)) + return x_fuse + + +class _H1(nn.Module): + def __init__(self, config: dict, lines: bool = False, **kwargs: Any) -> None: + self.inplanes = 64 + self.lines = lines + extra = config["MODEL"]["EXTRA"] + super().__init__() + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=2, padding=1, bias=False) + self.bn1 = BatchNorm2d(self.inplanes, momentum=_v0) + self.conv2 = nn.Conv2d(self.inplanes, self.inplanes, kernel_size=3, stride=2, padding=1, bias=False) + self.bn2 = BatchNorm2d(self.inplanes, momentum=_v0) + self.relu = nn.ReLU(inplace=True) + self.layer1 = self._make_layer(_B1, 64, 64, 4) + self.stage2_cfg = extra["STAGE2"] + num_channels = [extra["STAGE2"]["NUM_CHANNELS"][i] * _block_from_cfg(extra["STAGE2"]["BLOCK"]).expansion for i in range(len(extra["STAGE2"]["NUM_CHANNELS"]))] + self.transition1 = self._make_transition_layer([256], num_channels) + self.stage2, pre_stage_channels = self._make_stage(self.stage2_cfg, num_channels) + self.stage3_cfg = extra["STAGE3"] + num_channels = [extra["STAGE3"]["NUM_CHANNELS"][i] * _block_from_cfg(extra["STAGE3"]["BLOCK"]).expansion for i in range(len(extra["STAGE3"]["NUM_CHANNELS"]))] + self.transition2 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage(self.stage3_cfg, num_channels) + self.stage4_cfg = extra["STAGE4"] + num_channels = [extra["STAGE4"]["NUM_CHANNELS"][i] * _block_from_cfg(extra["STAGE4"]["BLOCK"]).expansion for i in range(len(extra["STAGE4"]["NUM_CHANNELS"]))] + self.transition3 = self._make_transition_layer(pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage(self.stage4_cfg, num_channels, multi_scale_output=True) + self.upsample = nn.Upsample(scale_factor=2, mode="nearest") + final_inp_channels = sum(pre_stage_channels) + self.inplanes + self.head = nn.Sequential( + nn.Conv2d(final_inp_channels, final_inp_channels, kernel_size=1), + BatchNorm2d(final_inp_channels, momentum=_v0), + nn.ReLU(inplace=True), + nn.Conv2d(final_inp_channels, config["MODEL"]["NUM_JOINTS"], kernel_size=extra["FINAL_CONV_KERNEL"]), + nn.Softmax(dim=1) if not self.lines else nn.Sigmoid(), + ) + + def _make_head(self, x: torch.Tensor, x_skip: torch.Tensor) -> torch.Tensor: + x = self.upsample(x) + x = torch.cat([x, x_skip], dim=1) + return self.head(x) + + def _make_transition_layer(self, num_channels_pre_layer: list, num_channels_cur_layer: list) -> nn.ModuleList: + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + transition_layers = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append(nn.Sequential( + nn.Conv2d(num_channels_pre_layer[i], num_channels_cur_layer[i], 3, 1, 1, bias=False), + BatchNorm2d(num_channels_cur_layer[i], momentum=_v0), + nn.ReLU(inplace=True), + )) + else: + transition_layers.append(None) + else: + conv3x3s = [] + for j in range(i + 1 - num_branches_pre): + inchannels = num_channels_pre_layer[-1] + outchannels = num_channels_cur_layer[i] if j == i - num_branches_pre else inchannels + conv3x3s.append(nn.Sequential( + nn.Conv2d(inchannels, outchannels, 3, 2, 1, bias=False), + BatchNorm2d(outchannels, momentum=_v0), + nn.ReLU(inplace=True), + )) + transition_layers.append(nn.Sequential(*conv3x3s)) + return nn.ModuleList(transition_layers) + + def _make_layer(self, block: type, inplanes: int, planes: int, blocks: int, stride: int = 1) -> nn.Sequential: + if isinstance(block, bool): + block = _d0["BOTTLENECK"] if block else _d0["BASIC"] + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), + BatchNorm2d(planes * block.expansion, momentum=_v0), + ) + layers = [block(inplanes, planes, stride, downsample)] + inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(inplanes, planes)) + return nn.Sequential(*layers) + + def _make_stage(self, layer_config: dict, num_inchannels: list, multi_scale_output: bool = True) -> tuple: + num_modules = layer_config["NUM_MODULES"] + num_blocks = layer_config["NUM_BLOCKS"] + num_channels = layer_config["NUM_CHANNELS"] + block = _block_from_cfg(layer_config["BLOCK"]) + fuse_method = layer_config["FUSE_METHOD"] + modules = [] + for i in range(num_modules): + reset_multi_scale_output = False if (not multi_scale_output and i == num_modules - 1) else True + modules.append(_H0( + layer_config["NUM_BRANCHES"], block, num_blocks, num_inchannels, num_channels, + fuse_method, reset_multi_scale_output, + )) + num_inchannels = modules[-1].get_num_inchannels() + return nn.Sequential(*modules), num_inchannels + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv1(x) + x_skip = x.clone() + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.layer1(x) + x_list = [self.transition1[i](x) if self.transition1[i] is not None else x for i in range(self.stage2_cfg["NUM_BRANCHES"])] + y_list = self.stage2(x_list) + x_list = [self.transition2[i](y_list[-1]) if self.transition2[i] is not None else y_list[i] for i in range(self.stage3_cfg["NUM_BRANCHES"])] + y_list = self.stage3(x_list) + x_list = [self.transition3[i](y_list[-1]) if self.transition3[i] is not None else y_list[i] for i in range(self.stage4_cfg["NUM_BRANCHES"])] + x = self.stage4(x_list) + height, width = x[0].size(2), x[0].size(3) + x1 = F.interpolate(x[1], size=(height, width), mode="bilinear", align_corners=False) + x2 = F.interpolate(x[2], size=(height, width), mode="bilinear", align_corners=False) + x3 = F.interpolate(x[3], size=(height, width), mode="bilinear", align_corners=False) + x = torch.cat([x[0], x1, x2, x3], 1) + return self._make_head(x, x_skip) + + def init_weights(self, pretrained: str = "") -> None: + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + if pretrained and os.path.isfile(pretrained): + w = torch.load(pretrained, map_location="cpu", weights_only=False) + self.load_state_dict({k: v for k, v in w.items() if k in self.state_dict()}, strict=False) + + +def _g0(config: dict, pretrained: str = "", **kwargs: Any) -> _H1: + model = _H1(config, **kwargs) + model.init_weights(pretrained) + return model + + +_K0 = { + 1: 1, 2: 14, 3: 25, 4: 2, 5: 10, 6: 18, 7: 26, 8: 3, 9: 7, 10: 23, + 11: 27, 20: 4, 21: 8, 22: 24, 23: 28, 24: 5, 25: 13, 26: 21, 27: 29, + 28: 6, 29: 17, 30: 30, 31: 11, 32: 15, 33: 19, 34: 12, 35: 16, 36: 20, + 45: 9, 50: 31, 52: 32, 57: 22, +} + +# ── Keypoint mapping & inference helpers ───────────────────────── + +map_keypoints = { + 1: 1, 2: 14, 3: 25, 4: 2, 5: 10, 6: 18, 7: 26, 8: 3, 9: 7, 10: 23, + 11: 27, 20: 4, 21: 8, 22: 24, 23: 28, 24: 5, 25: 13, 26: 21, 27: 29, + 28: 6, 29: 17, 30: 30, 31: 11, 32: 15, 33: 19, 34: 12, 35: 16, 36: 20, + 45: 9, 50: 31, 52: 32, 57: 22 +} + +# Template keypoints for homography refinement (new-5 style) +TEMPLATE_F0: List[Tuple[float, float]] = [ + (5, 5), (5, 140), (5, 250), (5, 430), (5, 540), (5, 675), (55, 250), (55, 430), + (110, 340), (165, 140), (165, 270), (165, 410), (165, 540), (527, 5), (527, 253), + (527, 433), (527, 675), (888, 140), (888, 270), (888, 410), (888, 540), (940, 340), + (998, 250), (998, 430), (1045, 5), (1045, 140), (1045, 250), (1045, 430), (1045, 540), + (1045, 675), (435, 340), (615, 340), +] +TEMPLATE_F1: List[Tuple[float, float]] = [ + (2.5, 2.5), (2.5, 139.5), (2.5, 249.5), (2.5, 430.5), (2.5, 540.5), (2.5, 678), + (54.5, 249.5), (54.5, 430.5), (110.5, 340.5), (164.5, 139.5), (164.5, 269), (164.5, 411), + (164.5, 540.5), (525, 2.5), (525, 249.5), (525, 430.5), (525, 678), (886.5, 139.5), + (886.5, 269), (886.5, 411), (886.5, 540.5), (940.5, 340.5), (998, 249.5), (998, 430.5), + (1048, 2.5), (1048, 139.5), (1048, 249.5), (1048, 430.5), (1048, 540.5), (1048, 678), + (434.5, 340), (615.5, 340), +] + +HOMOGRAPHY_FILL_ONLY_VALID = True +# Step8 (example_miner-style): homography + project template + fill; when True, skip _apply_homography_refinement and use step8 only +STEP8_ENABLED = True +STEP8_FILL_MISSING = True # True = fill all in-frame warped points; False = only detected indices +KP_THRESHOLD = 0.2 # new-5 style (was 0.3) + +# HRNet keypoint input size; smaller = faster, less accurate (540×960 = full) +_KP_H, _KP_W = 540, 960 +# _KP_H, _KP_W = 432, 768 + +def _p0(frames: list) -> torch.Tensor: + target_size = (_KP_H, _KP_W) + batch = [] + for frame in frames: + frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + img = cv2.resize(frame_rgb, (target_size[1], target_size[0])) + img = img.astype(np.float32) / 255.0 + img = np.transpose(img, (2, 0, 1)) + batch.append(img) + return torch.from_numpy(np.stack(batch)).float() + + +def _e0(heatmap: torch.Tensor, scale: int = 2, max_keypoints: int = 1) -> torch.Tensor: + batch_size, n_channels, height, width = heatmap.shape + max_pooled = F.max_pool2d(heatmap, 3, stride=1, padding=1) + local_maxima = max_pooled == heatmap + masked_heatmap = heatmap * local_maxima + flat_heatmap = masked_heatmap.view(batch_size, n_channels, -1) + scores, indices = torch.topk(flat_heatmap, max_keypoints, dim=-1, sorted=False) + y_coords = torch.div(indices, width, rounding_mode="floor") * scale + x_coords = (indices % width) * scale + return torch.stack([x_coords.float(), y_coords.float(), scores], dim=-1) + + +def _p1(kp_coords: torch.Tensor, kp_threshold: float, w: int, h: int, batch_size: int) -> list: + kp_np = kp_coords.cpu().numpy() + batch_results = [] + for batch_idx in range(batch_size): + kp_dict = {} + valid_kps = kp_np[batch_idx, :, 0, 2] > kp_threshold + for ch_idx in np.where(valid_kps)[0]: + x = float(kp_np[batch_idx, ch_idx, 0, 0]) / w + y = float(kp_np[batch_idx, ch_idx, 0, 1]) / h + p = float(kp_np[batch_idx, ch_idx, 0, 2]) + kp_dict[int(ch_idx) + 1] = {"x": x, "y": y, "p": p} + batch_results.append(kp_dict) + return batch_results + + +def _g1(kp_points: dict) -> dict: + return {_K0[k]: v for k, v in kp_points.items() if k in _K0} + + +def _i0(frames: list, model: nn.Module, kp_threshold: float, device: str, batch_size: int = 2) -> list: + results = [] + model_device = next(model.parameters()).device + use_amp = model_device.type == "cuda" + for i in range(0, len(frames), batch_size): + current_batch_size = min(batch_size, len(frames) - i) + batch_frames = frames[i : i + current_batch_size] + batch = _p0(batch_frames).to(model_device, non_blocking=True) + with torch.no_grad(): + with torch.amp.autocast("cuda", enabled=use_amp): + heatmaps = model(batch) + kp_coords = _e0(heatmaps[:, :-1, :, :], scale=2, max_keypoints=1) + batch_results = _p1(kp_coords, kp_threshold, _KP_W, _KP_H, current_batch_size) + results.extend([_g1(kp) for kp in batch_results]) + del heatmaps, kp_coords, batch + gc.collect() + if model_device.type == "cuda": + torch.cuda.empty_cache() + return results + + +def _x0(frames: list, model: nn.Module, kp_threshold: float, device: str = "cpu", batch_size: int = 2) -> list: + return _i0(frames, model, kp_threshold, device, batch_size) + + +def _normalize_keypoints_xyp(kp_results: list | None, frames: list, n_keypoints: int) -> list: + """Produce [(x, y, p), ...] per frame for fix_keypoints_pri thresholding.""" + if not kp_results: + return [] + keypoints = [] + for i in range(min(len(kp_results), len(frames))): + kp_dict = kp_results[i] + h, w = frames[i].shape[:2] + frame_kps = [] + for idx in range(n_keypoints): + kp_idx = idx + 1 + x, y, p = 0, 0, 0.0 + if kp_dict and isinstance(kp_dict, dict) and kp_idx in kp_dict: + d = kp_dict[kp_idx] + if isinstance(d, dict) and "x" in d: + x = int(d["x"] * w) + y = int(d["y"] * h) + p = float(d.get("p", 0.0)) + frame_kps.append((x, y, p)) + keypoints.append(frame_kps) + return keypoints + + +def _n0(keypoints_result: list | None, batch_images: list, n_keypoints: int) -> list: + keypoints = [] + if not keypoints_result: + return [] + for frame_number_in_batch, kp_dict in enumerate(keypoints_result): + if frame_number_in_batch >= len(batch_images): + break + frame_keypoints = [] + try: + height, width = batch_images[frame_number_in_batch].shape[:2] + if kp_dict and isinstance(kp_dict, dict): + for idx in range(32): + x, y = 0, 0 + kp_idx = idx + 1 + if kp_idx in kp_dict: + kp_data = kp_dict[kp_idx] + if isinstance(kp_data, dict) and "x" in kp_data and "y" in kp_data: + x, y = int(kp_data["x"] * width), int(kp_data["y"] * height) + frame_keypoints.append((x, y)) + else: + frame_keypoints = [(0, 0)] * 32 + except (IndexError, ValueError, AttributeError): + frame_keypoints = [(0, 0)] * 32 + if len(frame_keypoints) < n_keypoints: + frame_keypoints.extend([(0, 0)] * (n_keypoints - len(frame_keypoints))) + else: + frame_keypoints = frame_keypoints[:n_keypoints] + keypoints.append(frame_keypoints) + return keypoints + + +def _fix_keypoints(kps: list, n: int) -> list: + if len(kps) < n: + kps += [(0, 0)] * (n - len(kps)) + elif len(kps) > n: + kps = kps[:n] + + if kps[2] != (0,0) and kps[4] != (0,0) and kps[3] == (0,0): + kps[3] = kps[4]; kps[4] = (0,0) + if kps[0] != (0,0) and kps[4] != (0,0) and kps[1] == (0,0): + kps[1] = kps[4]; kps[4] = (0,0) + if kps[2] != (0,0) and kps[3] != (0,0) and kps[1] == (0,0) and kps[3][0] > kps[2][0]: + kps[1] = kps[3]; kps[3] = (0,0) + if kps[28] != (0,0) and kps[25] == (0,0) and kps[26] != (0,0) and kps[26][0] > kps[28][0]: + kps[25] = kps[28]; kps[28] = (0,0) + if kps[24] != (0,0) and kps[28] != (0,0) and kps[25] == (0,0): + kps[25] = kps[28]; kps[28] = (0,0) + if kps[24] != (0,0) and kps[27] != (0,0) and kps[26] == (0,0): + kps[26] = kps[27]; kps[27] = (0,0) + if kps[28] != (0,0) and kps[23] == (0,0) and kps[20] != (0,0) and kps[20][1] > kps[23][1]: + kps[23] = kps[20]; kps[20] = (0,0) + return kps + + +def _keypoints_to_float(keypoints: list) -> List[List[float]]: + """Convert keypoints to [[x, y], ...] float format for homography.""" + return [[float(x), float(y)] for x, y in keypoints] + + +def _keypoints_to_int(keypoints: list) -> List[Tuple[int, int]]: + """Convert keypoints to [(x, y), ...] integer format.""" + return [(int(round(float(kp[0]))), int(round(float(kp[1])))) for kp in keypoints] + + +# --- fix_keypoints_pri: select best keypoint config per frame from multiple candidates --- +_FKP_KEYPOINTS: List[Tuple[int, int]] = [ + (5, 5), (5, 140), (5, 250), (5, 430), (5, 540), (5, 675), + (55, 250), (55, 430), (110, 340), (165, 140), (165, 270), (165, 410), (165, 540), + (527, 5), (527, 253), (527, 433), (527, 675), + (888, 140), (888, 270), (888, 410), (888, 540), (940, 340), + (998, 250), (998, 430), (1045, 5), (1045, 140), (1045, 250), (1045, 430), (1045, 540), (1045, 675), + (435, 340), (615, 340), +] +_FKP_KEYPOINTS_NP = np.asarray(_FKP_KEYPOINTS, dtype=np.float32) +_FKP_GROUPS = { + 1: [2, 3, 7, 10], 2: [1, 3, 7, 10], 3: [2, 4, 7, 8], 4: [3, 5, 8, 7], 5: [4, 8, 6, 3], 6: [5, 4, 8, 13], + 7: [3, 8, 9, 10], 8: [4, 7, 9, 13], 9: [7, 8, 11, 12], 10: [9, 11, 7, 2], 11: [9, 10, 12, 31], 12: [9, 11, 13, 31], + 13: [9, 12, 8, 5], 14: [15, 31, 32, 16], 15: [31, 16, 32, 14], 16: [31, 15, 32, 17], 17: [31, 16, 32, 15], + 18: [19, 22, 23, 26], 19: [18, 22, 20, 32], 20: [19, 22, 21, 32], 21: [20, 22, 24, 29], 22: [23, 24, 19, 20], + 23: [27, 24, 22, 28], 24: [28, 23, 22, 27], 25: [26, 27, 23, 18], 26: [25, 27, 23, 18], 27: [26, 23, 28, 24], + 28: [27, 24, 29, 23], 29: [28, 30, 24, 21], 30: [29, 28, 24, 21], 31: [15, 16, 32, 14], 32: [15, 31, 16, 14], +} +_FKP_GROUPS_ARRAY = [np.asarray(_FKP_GROUPS[i], dtype=np.int32) - 1 for i in range(1, 33)] +_FKP_BLACKLISTS = [ + [23, 24, 27, 28], [7, 8, 3, 4], [2, 10, 1, 14], [18, 26, 14, 25], [5, 13, 6, 17], [21, 29, 17, 30], + [10, 11, 2, 3], [10, 11, 2, 7], [12, 13, 4, 5], [12, 13, 5, 8], [18, 19, 26, 27], [18, 19, 26, 23], + [20, 21, 24, 29], [20, 21, 28, 29], [8, 4, 5, 13], [3, 7, 2, 10], [23, 27, 18, 26], [24, 28, 21, 29], +] +_FKP_PREPARED_BLACKLISTS = [(set(bl), bl[0] - 1, bl[1] - 1) for bl in _FKP_BLACKLISTS] +_FKP_DILATE_KERNEL = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) +_FKP_KERNEL_31 = cv2.getStructuringElement(cv2.MORPH_RECT, (31, 31)) +_FKP_TEMPLATE_GRAY: Optional[ndarray] = None +_FKP_SHARED_EXECUTOR: Optional[ThreadPoolExecutor] = None +_FKP_PER_KEY_LOCKS: Dict[Any, threading.Lock] = defaultdict(threading.Lock) + + +class _FKP_MaxSizeCache(OrderedDict): + def __init__(self, maxlen: int = 500): + super().__init__() + self.maxlen = maxlen + self._lock = threading.Lock() + + def set(self, k: Any, v: Any) -> None: + with self._lock: + if k in self: + self.move_to_end(k) + self[k] = v + if len(self) > self.maxlen: + self.popitem(last=False) + + def get(self, k: Any) -> Any: + with self._lock: + return super().get(k) + + def exists(self, k: Any) -> bool: + with self._lock: + return k in self + + +_FKP_CACHED = _FKP_MaxSizeCache() + + +def _fkp_load_template_gray() -> ndarray: + global _FKP_TEMPLATE_GRAY + if _FKP_TEMPLATE_GRAY is None: + template_path = Path(__file__).parent / "football_pitch_template.png" + img = cv2.imread(str(template_path), cv2.IMREAD_COLOR) + if img is not None: + _FKP_TEMPLATE_GRAY = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + else: + _FKP_TEMPLATE_GRAY = np.zeros((680, 1050), dtype=np.uint8) + return _FKP_TEMPLATE_GRAY + + +def _fkp_get_or_compute_masks(key: Any, compute_fn: Any) -> Any: + lock = _FKP_PER_KEY_LOCKS[key] + with lock: + if _FKP_CACHED.exists(key): + return _FKP_CACHED.get(key) + masks = compute_fn() + _FKP_CACHED.set(key, masks) + return masks + + +def _fkp_canonical(obj: Any) -> Any: + if isinstance(obj, np.ndarray): + return _fkp_canonical(obj.tolist()) + if isinstance(obj, (list, tuple)): + return tuple(_fkp_canonical(x) for x in obj) + if isinstance(obj, set): + return tuple(sorted(_fkp_canonical(x) for x in obj)) + if isinstance(obj, dict): + return tuple((k, _fkp_canonical(v)) for k, v in sorted(obj.items())) + return obj + + +def _fkp_are_collinear(pts: Any, eps: float = 1e-9) -> bool: + pts = np.asarray(pts) + if len(pts) < 3: + return True + a, b, c = pts[:3] + area = np.abs(np.cross(b - a, c - a)) + return bool(area < eps) + + +def _fkp_unique_points(src: Any, dst: Any) -> Any: + src, dst = np.asarray(src, float), np.asarray(dst, float) + src_nonzero = ~np.all(np.abs(src) < 1e-9, axis=1) + dst_nonzero = ~np.all(np.abs(dst) < 1e-9, axis=1) + valid_mask = src_nonzero & dst_nonzero + if not valid_mask.any(): + return np.array([]), np.array([]) + src_valid = src[valid_mask] + dst_valid = dst[valid_mask] + _, unique_idx = np.unique(src_valid, axis=0, return_index=True) + unique_idx.sort() + return src_valid[unique_idx], dst_valid[unique_idx] + + +def _fkp_apply_transform(M: ndarray, P: Any) -> Tuple[int, int]: + x, y = P[0], P[1] + return (int(M[0, 0] * x + M[0, 1] * y + M[0, 2]), int(M[1, 0] * x + M[1, 1] * y + M[1, 2])) + + +def _fkp_apply_homo_transform(M: ndarray, P: Any) -> Tuple[int, int]: + x, y = P[0], P[1] + w = M[2, 0] * x + M[2, 1] * y + M[2, 2] + x_new = (M[0, 0] * x + M[0, 1] * y + M[0, 2]) / w + y_new = (M[1, 0] * x + M[1, 1] * y + M[1, 2]) / w + return (int(x_new), int(y_new)) + + +def _fkp_affine_from_4_points(src_pts: Any, dst_pts: Any) -> ndarray: + P, Q = np.array(src_pts, dtype=np.float64), np.array(dst_pts, dtype=np.float64) + x, y = P[:, 0], P[:, 1] + u, v = Q[:, 0], Q[:, 1] + A = np.zeros((8, 6), dtype=np.float64) + A[0::2, 0], A[0::2, 1], A[0::2, 2] = x, y, 1 + A[1::2, 3], A[1::2, 4], A[1::2, 5] = x, y, 1 + b = np.empty(8, dtype=np.float64) + b[0::2], b[1::2] = u, v + params, _, _, _ = np.linalg.lstsq(A, b, rcond=None) + a, b_, e, c, d, f = params + return np.array([[a, b_, e], [c, d, f], [0, 0, 1]], dtype=np.float64) + + +def _fkp_four_point_homography(src_pts: Any, dst_pts: Any) -> ndarray: + src, dst = np.array(src_pts, dtype=np.float64), np.array(dst_pts, dtype=np.float64) + x, y = src[:, 0], src[:, 1] + u, v = dst[:, 0], dst[:, 1] + A = np.zeros((8, 9), dtype=np.float64) + A[0::2, 0], A[0::2, 1], A[0::2, 2] = -x, -y, -1 + A[0::2, 6], A[0::2, 7], A[0::2, 8] = x * u, y * u, u + A[1::2, 3], A[1::2, 4], A[1::2, 5] = -x, -y, -1 + A[1::2, 6], A[1::2, 7], A[1::2, 8] = x * v, y * v, v + _, _, Vt = np.linalg.svd(A) + h = Vt[-1, :] + return (h.reshape(3, 3) / h[8]).astype(np.float64) + + +def _fkp_three_point_affine(P: Any, Q: Any) -> ndarray: + P, Q = np.array(P, dtype=np.float64), np.array(Q, dtype=np.float64) + x, y = P[:, 0], P[:, 1] + u, v = Q[:, 0], Q[:, 1] + n = P.shape[0] + A = np.zeros((2 * n, 6), dtype=np.float64) + A[0::2, 0], A[0::2, 1], A[0::2, 2] = x, y, 1 + A[1::2, 3], A[1::2, 4], A[1::2, 5] = x, y, 1 + b = np.empty(2 * n, dtype=np.float64) + b[0::2], b[1::2] = u, v + params, _, _, _ = np.linalg.lstsq(A, b, rcond=None) + a, b_, e, c, d, f = params + return np.array([[a, b_, e], [c, d, f], [0, 0, 1]], dtype=np.float64) + + +def _fkp_line_to_line_transform(P1: Any, P2: Any, Q1: Any, Q2: Any) -> ndarray: + P1, P2 = np.asarray(P1, dtype=np.float64), np.asarray(P2, dtype=np.float64) + Q1, Q2 = np.asarray(Q1, dtype=np.float64), np.asarray(Q2, dtype=np.float64) + v_s, v_t = P2 - P1, Q2 - Q1 + s = np.hypot(v_t[0], v_t[1]) / (np.hypot(v_s[0], v_s[1]) + 1e-12) + theta = np.arctan2(v_t[1], v_t[0]) - np.arctan2(v_s[1], v_s[0]) + c, s_ = np.cos(theta), np.sin(theta) + return np.array([ + [s * c, -s * s_, Q1[0] - (s * c * P1[0] - s * s_ * P1[1])], + [s * s_, s * c, Q1[1] - (s * s_ * P1[0] + s * c * P1[1])], + [0, 0, 1] + ], dtype=np.float64) + + +def _fkp_robust_transform(src_pts: Any, dst_pts: Any) -> Any: + src, dst = _fkp_unique_points(src_pts, dst_pts) + n = len(src) + if n >= 4: + if _fkp_are_collinear(src) or _fkp_are_collinear(dst): + H = _fkp_affine_from_4_points(src, dst) + return lambda pt: _fkp_apply_transform(H, pt) + H = _fkp_four_point_homography(src, dst) + return lambda pt: _fkp_apply_homo_transform(H, pt) + elif n == 3: + H = _fkp_three_point_affine(src, dst) + return lambda pt: _fkp_apply_transform(H, pt) + elif n == 2: + H = _fkp_line_to_line_transform(src[0], src[1], dst[0], dst[1]) + return lambda pt: _fkp_apply_transform(H, pt) + elif n == 1: + H = np.eye(3) + H[:2, 2] = dst[0] - src[0] + return lambda pt: _fkp_apply_transform(H, pt) + return lambda pt: _fkp_apply_transform(np.eye(3), pt) + + +def _fkp_pick_pt(points: Any) -> List[int]: + if not points: + return [] + pts_arr = np.asarray(points, dtype=np.int32) + seen = np.zeros(32, dtype=bool) + valid_mask = (pts_arr >= 0) & (pts_arr < 32) + seen[pts_arr[valid_mask]] = True + out_seen = np.zeros(32, dtype=bool) + out: List[int] = [] + for p in pts_arr[valid_mask]: + neigh = _FKP_GROUPS_ARRAY[p] + candidates = neigh[~seen[neigh] & ~out_seen[neigh]] + out_seen[candidates] = True + out.extend(candidates.tolist()) + return out + + +def _fkp_is_include(kp: Any, all_kps: Any) -> bool: + for kps in all_kps: + if np.sum(np.abs(np.array(kps) - np.array(kp))) <= 2: + return True + return False + + +def _fkp_get_edge_mask(x: float, y: float, W: int, H: int, t: int = 100) -> int: + mask = 0 + if x <= t: + mask |= 1 + if x >= W - t: + mask |= 2 + if y <= t: + mask |= 4 + if y >= H - t: + mask |= 8 + return mask + + +def _fkp_both_points_same_direction_fast(A: Any, B: Any, W: int, H: int, t: int = 100) -> bool: + mask_a = _fkp_get_edge_mask(A[0], A[1], W, H, t) + if mask_a == 0: + return False + mask_b = _fkp_get_edge_mask(B[0], B[1], W, H, t) + return (mask_a & mask_b) != 0 + + +def _fkp_project_image(image: ndarray, src_kps: Any, dst_kps: Any, w: int, h: int) -> ndarray: + src_arr = np.array(src_kps, dtype=np.float32) + dst_arr = np.array(dst_kps, dtype=np.float32) + valid_mask = ~((dst_arr[:, 0] == 0) & (dst_arr[:, 1] == 0)) + H, _ = cv2.findHomography(src_arr[valid_mask], dst_arr[valid_mask]) + if H is None: + raise ValueError("Homography not found") + return cv2.warpPerspective(image, H, (w, h)) + + +def _fkp_extract_masks(image: ndarray) -> tuple: + gray = image if image.ndim == 2 else cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + _, mask_ground = cv2.threshold(gray, 10, 1, cv2.THRESH_BINARY) + _, mask_lines = cv2.threshold(gray, 200, 1, cv2.THRESH_BINARY) + return mask_ground, mask_lines + + +def _fkp_convert_to_gray(image: ndarray) -> ndarray: + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + gray = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, _FKP_KERNEL_31) + gray = cv2.GaussianBlur(gray, (5, 5), 0) + return cv2.Canny(gray, 30, 100) + + +def _fkp_evaluate_keypoints_for_frame( + frame_keypoints: Any, frame_index: int, h: int, w: int, check_frame_list: List[ndarray], precomputed_key: Any = None +) -> float: + key = precomputed_key or _fkp_canonical((frame_keypoints, w, h)) + floor_markings = _fkp_load_template_gray() + + def compute_masks(fkp: Any, ww: int, hh: int) -> Any: + try: + non_idxs_set = {i + 1 for i, kpt in enumerate(fkp) if kpt[0] != 0 or kpt[1] != 0} + for bl_set, idx0, idx1 in _FKP_PREPARED_BLACKLISTS: + if non_idxs_set.issubset(bl_set): + if _fkp_both_points_same_direction_fast(fkp[idx0], fkp[idx1], ww, hh): + return None, 0, None + warped = _fkp_project_image(floor_markings, _FKP_KEYPOINTS, fkp, ww, hh) + mask_ground, mask_lines = _fkp_extract_masks(warped) + ys, xs = np.where(mask_lines == 1) + if len(xs) == 0: + bbox = None + else: + bbox = (xs.min(), ys.min(), xs.max(), ys.max()) + bbox_area = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) if bbox else 1 + if (bbox_area / (hh * ww)) < 0.2: + return None, 0, None + return mask_lines, int(cv2.countNonZero(mask_lines)), mask_ground + except Exception: + return None, 0, None + + try: + mask_exp, pixels_on_lines, mask_ground = _fkp_get_or_compute_masks( + key, lambda: compute_masks(frame_keypoints, w, h) + ) + if mask_exp is None or pixels_on_lines == 0 or mask_ground is None: + return 0.0 + if frame_index >= len(check_frame_list): + return 0.0 + scale = max(1, _FKP_EVAL_DOWNSCALE) + if scale > 1 and h > scale and w > scale: + h_s, w_s = h // scale, w // scale + frame_s = cv2.resize(check_frame_list[frame_index], (w_s, h_s), interpolation=cv2.INTER_AREA) + mask_ground_s = cv2.resize(mask_ground, (w_s, h_s), interpolation=cv2.INTER_NEAREST) + mask_exp_s = cv2.resize(mask_exp, (w_s, h_s), interpolation=cv2.INTER_NEAREST) + pixels_on_lines = cv2.countNonZero(mask_exp_s) + if pixels_on_lines == 0: + return 0.0 + work_buffer = np.zeros((h_s, w_s), dtype=np.uint8) + cv2.bitwise_and(frame_s, frame_s, dst=work_buffer, mask=mask_ground_s) + cv2.dilate(work_buffer, _FKP_DILATE_KERNEL, dst=work_buffer, iterations=2) + cv2.threshold(work_buffer, 0, 255, cv2.THRESH_BINARY, dst=work_buffer) + pixels_predicted = cv2.countNonZero(work_buffer) + cv2.bitwise_and(work_buffer, mask_exp_s, dst=work_buffer) + pixels_overlapping = cv2.countNonZero(work_buffer) + else: + work_buffer = np.zeros((h, w), dtype=np.uint8) + cv2.bitwise_and(check_frame_list[frame_index], check_frame_list[frame_index], dst=work_buffer, mask=mask_ground) + cv2.dilate(work_buffer, _FKP_DILATE_KERNEL, dst=work_buffer, iterations=3) + cv2.threshold(work_buffer, 0, 255, cv2.THRESH_BINARY, dst=work_buffer) + pixels_predicted = cv2.countNonZero(work_buffer) + cv2.bitwise_and(work_buffer, mask_exp, dst=work_buffer) + pixels_overlapping = cv2.countNonZero(work_buffer) + pixels_rest = pixels_predicted - pixels_overlapping + total_pixels = pixels_predicted + pixels_on_lines - pixels_overlapping + if total_pixels > 0 and (pixels_rest / total_pixels) > 0.9: + return 0.0 + return pixels_overlapping / (pixels_on_lines + 1e-8) + except Exception: + pass + return 0.0 + + +def _fkp_make_possible_keypoints(all_keypoints: Any, frame_width: int, frame_height: int, limit: int | None = None) -> List[Any]: + if not all_keypoints: + return [] + max_candidates = limit if limit is not None else _FKP_MAX_CANDIDATES_PER_FRAME + results: List[Any] = [] + for keypoints in all_keypoints: + if len(results) >= max_candidates: + break + kps = _keypoints_to_int(keypoints) + arr = np.asarray(kps, dtype=np.int32) + if arr.ndim != 2 or arr.shape[1] != 2: + continue + mask = (arr[:, 0] != 0) & (arr[:, 1] != 0) + non_zero_count = int(mask.sum()) + if non_zero_count > 4: + if not _fkp_is_include(kps, results): + results.append(kps) + continue + if non_zero_count < 2: + continue + # Only use actually detected keypoints; do not add projected/inferred points + if not _fkp_is_include(kps, results): + results.append(kps) + return results + + +def _fkp_get_executor(max_workers: int) -> ThreadPoolExecutor: + global _FKP_SHARED_EXECUTOR + if _FKP_SHARED_EXECUTOR is None: + _FKP_SHARED_EXECUTOR = ThreadPoolExecutor(max_workers=max_workers) + return _FKP_SHARED_EXECUTOR + + +def _fkp_evaluates( + jobs: Any, h: int, w: int, total_frames: int, time_left: float, check_frame_list: List[ndarray] +) -> List[Any]: + start = time.time() + results = [[(0, 0)] * 32 for _ in range(total_frames)] + if len(jobs) == 0: + return results + unique_jobs: List[Any] = [] + seen: set = set() + for (job, frame_index) in jobs: + try: + key_bytes = np.asarray(job, dtype=np.int32).tobytes() if not isinstance(job, np.ndarray) else (job.astype(np.int32).tobytes() if job.dtype != np.int32 else job.tobytes()) + sig = (frame_index, key_bytes) + if sig in seen: + continue + seen.add(sig) + unique_jobs.append((job, frame_index, key_bytes)) + except Exception: + continue + if len(unique_jobs) <= 10: + scores_unique = [ + _fkp_evaluate_keypoints_for_frame(job, frame_index, h, w, check_frame_list, (key_bytes, w, h)) + for (job, frame_index, key_bytes) in unique_jobs + ] + else: + cpu_count = max(1, (os.cpu_count() or 1)) + max_workers = min(max(2, cpu_count), 8) + chunk_size = 24 + scores_unique = [] + ex = _fkp_get_executor(max_workers) + time_left -= (time.time() - start) + for i in range(0, len(unique_jobs), chunk_size): + start = time.time() + chunk = unique_jobs[i : min(i + chunk_size, len(unique_jobs))] + scores_unique.extend(ex.map( + lambda pair: _fkp_evaluate_keypoints_for_frame(pair[0], pair[1], h, w, check_frame_list, (pair[2], w, h)), + chunk, + )) + time_left -= (time.time() - start) + if time_left <= 0: + unique_jobs = unique_jobs[: min(i + chunk_size, len(unique_jobs))] + break + scores = np.full(total_frames, 0.0, dtype=np.float32) + for score, (k, frame_index, _) in zip(scores_unique, unique_jobs): + if score > scores[frame_index]: + scores[frame_index] = score + results[frame_index] = k + return results + + +def _fkp_normalize_results(frame_results: Any, threshold: float) -> List[Any]: + if not frame_results: + return [] + results_array: List[Any] = [] + for result in frame_results: + pad_len = 32 - len(result) + if pad_len > 0: + result = list(result) + [(0, 0, 0.0)] * pad_len + result = result[:32] + arr = np.array(result, dtype=np.float32) + if arr.size == 0: + results_array.append([(0, 0)] * 32) + continue + if arr.ndim == 2 and arr.shape[1] >= 3: + mask = arr[:, 2] > threshold + scaled = np.where(mask[:, None], arr[:, :2].copy(), 0) + results_array.append([(int(x), int(y)) for x, y in scaled]) + else: + results_array.append([(0, 0)] * 32) + return results_array + + +def fix_keypoints_pri( + results_frames: Any, frame_width: int, frame_height: int, time_left: float, check_frame_list: List[ndarray] +) -> List[Any]: + start = time.time() + max_frames = len(results_frames) + all_possible = [None] * max_frames + for i in range(max_frames): + all_possible[i] = _fkp_make_possible_keypoints(results_frames[i], frame_width, frame_height) + default_kps: List[Any] = [] + for i in range(len(all_possible)): + default_kps.append(all_possible[i][0] if all_possible[i] else [(0, 0)] * 32) + total_jobs: List[Any] = [] + is_end = [0] * len(all_possible) + while is_end.count(-1) != len(is_end): + for frame_index in range(max_frames): + if is_end[frame_index] == -1: + continue + if is_end[frame_index] == len(all_possible[frame_index]): + is_end[frame_index] = -1 + continue + total_jobs.append((all_possible[frame_index][is_end[frame_index]], frame_index)) + is_end[frame_index] += 1 + time_left -= (time.time() - start) + if time_left <= 0: + return default_kps + return _fkp_evaluates(total_jobs, frame_height, frame_width, max_frames, time_left, check_frame_list) + + +def _step8_one_frame_kp( + kps: list, + frame_width: int, + frame_height: int, + fill_missing: bool, + n_keypoints: int = 32, +) -> Optional[List[List[float]]]: + """Step8 (example_miner _z1): homography from template to frame, project all template points, optionally fill missing.""" + if not isinstance(kps, list) or len(kps) != n_keypoints or frame_width <= 0 or frame_height <= 0: + return None + if n_keypoints != 32 or len(TEMPLATE_F0) != 32 or len(TEMPLATE_F1) != 32: + return None + filtered_src: List[Tuple[float, float]] = [] + filtered_dst: List[Tuple[float, float]] = [] + valid_indices: List[int] = [] + for idx, kp in enumerate(kps): + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + continue + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + continue + if x == 0.0 and y == 0.0: + continue + if idx >= len(TEMPLATE_F1): + continue + filtered_src.append(TEMPLATE_F1[idx]) + filtered_dst.append((x, y)) + valid_indices.append(idx) + if len(filtered_src) < 4: + return None + src_np = np.array(filtered_src, dtype=np.float32) + dst_np = np.array(filtered_dst, dtype=np.float32) + H_corrected, _ = cv2.findHomography(src_np, dst_np) + if H_corrected is None: + return None + fk_np = np.array(TEMPLATE_F0, dtype=np.float32).reshape(1, -1, 2) + projected_np = cv2.perspectiveTransform(fk_np, H_corrected)[0] + valid_indices_set = set(valid_indices) + adjusted_kps: List[List[float]] = [[0.0, 0.0] for _ in range(n_keypoints)] + for idx in range(n_keypoints): + x, y = float(projected_np[idx][0]), float(projected_np[idx][1]) + if not (0 <= x < frame_width and 0 <= y < frame_height): + continue + if fill_missing or idx in valid_indices_set: + adjusted_kps[idx] = [x, y] + return adjusted_kps + + +def _apply_homography_refinement( + keypoints: List[List[float]], + frame: np.ndarray, + n_keypoints: int, +) -> List[List[float]]: + """Refine keypoints using homography from template to frame (new-5 style).""" + if n_keypoints != 32 or len(TEMPLATE_F0) != 32 or len(TEMPLATE_F1) != 32: + return keypoints + frame_height, frame_width = frame.shape[:2] + valid_src: List[Tuple[float, float]] = [] + valid_dst: List[Tuple[float, float]] = [] + valid_indices: List[int] = [] + for kp_idx, kp in enumerate(keypoints): + if kp and len(kp) >= 2: + x, y = float(kp[0]), float(kp[1]) + if not (abs(x) < 1e-6 and abs(y) < 1e-6) and 0 <= x < frame_width and 0 <= y < frame_height: + valid_src.append(TEMPLATE_F1[kp_idx]) + valid_dst.append((x, y)) + valid_indices.append(kp_idx) + if len(valid_src) < 4: + return keypoints + src_pts = np.array(valid_src, dtype=np.float32) + dst_pts = np.array(valid_dst, dtype=np.float32) + H, _ = cv2.findHomography(src_pts, dst_pts) + if H is None: + return keypoints + all_template_points = np.array(TEMPLATE_F0, dtype=np.float32).reshape(-1, 1, 2) + adjusted_points = cv2.perspectiveTransform(all_template_points, H) + adjusted_points = adjusted_points.reshape(-1, 2) + adj_x = adjusted_points[:32, 0] + adj_y = adjusted_points[:32, 1] + valid_mask = (adj_x >= 0) & (adj_y >= 0) & (adj_x < frame_width) & (adj_y < frame_height) + valid_indices_set = set(valid_indices) + adjusted_kps: List[List[float]] = [[0.0, 0.0] for _ in range(32)] + for i in np.where(valid_mask)[0]: + if not HOMOGRAPHY_FILL_ONLY_VALID or i in valid_indices_set: + adjusted_kps[i] = [float(adj_x[i]), float(adj_y[i])] + return adjusted_kps + + +def _c1(keypoints: list) -> list: + return [[round(float(x), 1), round(float(y), 1)] for x, y in keypoints] + + +def _l0(model_dir: Path, device: str | None = None, config_name: str = "hrnetv2_w48.yaml", weights_subdir: str | None = None) -> nn.Module: + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + config_path = model_dir / config_name + weights_path = (model_dir / weights_subdir / "keypoint") if weights_subdir else (model_dir / "keypoint") + if not config_path.exists(): + raise FileNotFoundError(f"Keypoint config not found: {config_path}") + if not weights_path.exists(): + raise FileNotFoundError(f"Keypoint weights not found: {weights_path}") + with open(config_path) as f: + cfg = yaml.safe_load(f) + loaded = torch.load(weights_path, map_location=device, weights_only=False) + state = loaded.get("state_dict", loaded) if isinstance(loaded, dict) else loaded + if not isinstance(state, dict): + raise ValueError(f"Keypoint weights must be state_dict or dict with 'state_dict'; got {type(state)}") + if state and next(iter(state.keys()), "").startswith("module."): + state = {k.replace("module.", "", 1): v for k, v in state.items()} + def _remap_head(k: str) -> str: + if k.startswith("head.0."): + return "head." + k[7:] + return k + state = {_remap_head(k): v for k, v in state.items()} + model = _g0(cfg) + model.load_state_dict(state, strict=True) + model.to(device) + model.eval() + return model + +_C0 = 0 +_C1 = 1 +_C2 = 2 +_C3 = 3 +_CLS_TO_VALIDATOR: dict[int, int] = {_C2: 0, _C3: 1, _C1: 2, _C0: 3} + +_B0: float = 0.25 +_B1: bool = True +_B2: bool = False +_B3: bool = False +_B4: bool = False +_B5: bool = True + +_D0 = 640 +_D0_PERSON = 640 +_TRACK_IOU_THRESH = 0.3 +_TRACK_IOU_HIGH = 0.4 +_TRACK_IOU_LOW = 0.2 +_TRACK_MAX_AGE = 3 +_TRACK_USE_VELOCITY = True +_D1 = 0.3 +_T0 = 0.5 +_R0 = 5 +_R1 = 0.10 +_R2 = 0.70 +_q0 = 0.0 +_q1 = 0.0 + +_P0 = True + +_E0: bool = True +_E1: bool = True + +_BX_BS: bool = 16 +_KP_BS: int = 16 + +_A0: bool = False +_S0 = 8 + +_G0: bool = True +_G1 = 5 +_G2 = 4 +_G3 = 3 +_G6: bool = False +_G7: bool = True +_G5: bool = True +_G8: bool = True + +ENABLE_KEYPOINT_CONVERT: bool = False +_U0 = ENABLE_KEYPOINT_CONVERT +_J0 = True +_J1 = True +_J2: list[float] = [0.3, 0.5] +_J3: int = 20 +_J4 = True +_J5: float = 50.0 +_J6: int = 2 +_W0: list[int] = [4, 9, 10, 11, 12, 17, 18, 19, 20, 28] +_W1: list[int] = [13, 14, 15] +_W2: list[int] = [5, 16, 29] +_W3: list[int] = [4, 9, 10, 11, 12, 17, 18, 19, 20, 28] +_W4: list[int] = [13, 14, 15] +_W5: list[int] = [5, 16, 29] +_KP16_WEIGHT: int = 8 +_INDICES_H3_VS_H1: set[int] = {5, 13, 14, 15, 16, 29} +_INDICES_H3_VS_H2: set[int] = {4, 9, 10, 11, 12, 17, 18, 19, 20, 28} +_ALWAYS_INCLUDE_INDICES: tuple[int, ...] = (5, 16, 29) +_MASK_RETRY_ERRORS: tuple[str, ...] = ("A projected line is too wide", "Projected ground should not be rectangular") +# Keypoint refinement speed/quality +_FKP_FAST_MODE: bool = True +_FKP_THRESHOLDS: tuple[float, ...] = (0.2, 0.4, 0.6, 0.8) +_FKP_SINGLE_THRESHOLD: float = 0.4 +_FKP_MAX_CANDIDATES_PER_FRAME: int = 2 +_FKP_TIME_BUDGET_SEC: float = 2.5 +_FKP_EVAL_DOWNSCALE: int = 2 +_Z8_MIN_BATCH_FRAMES: int = 6 +_Z8_MAX_PROBLEMATIC_PER_BATCH: int = 8 +_STEP0_ENABLED: bool = True +_STEP0_PROXIMITY_PX: float = 30.0 +_STEP5_2_RIGHT_QUAD_HALFLENGTH: float = 200.0 +_STEP5_2_8PX_COARSE_STEP: int = 10 +_STEP5_2_8PX_REFINE_WINDOW: int = 10 +_STEP5_2_ROI_MARGIN: int = 10 +_STEP5_2_LONGEST_SEGMENT_MAX_PTS: int = 28 +_STEP5_2_8PX_HALFRES: bool = True +_STEP5_2_8PX_REFINE_PASS: bool = True +_STEP5_2_HEAVY_SEARCH_FLAG: bool = True +_F0: list[tuple[float, float]] = [ + (5, 5), (5, 140), (5, 250), (5, 430), (5, 540), (5, 675), + (55, 250), (55, 430), (110, 340), (165, 140), (165, 270), + (165, 410), (165, 540), (527, 5), (527, 253), (527, 433), + (527, 675), (888, 140), (888, 270), (888, 410), (888, 540), + (940, 340), (998, 250), (998, 430), (1045, 5), (1045, 140), + (1045, 250), (1045, 430), (1045, 540), (1045, 675), + (435, 340), (615, 340), +] +_F1: list[tuple[float, float]] = [ + (2.5, 2.5), (2.5, 139.5), (2.5, 249.5), (2.5, 430.5), (2.5, 540.5), (2.5, 678.0), + (54.5, 249.5), (54.5, 430.5), (110.5, 340.5), (164.5, 139.5), (164.5, 269.0), + (164.5, 411.0), (164.5, 540.5), (525.0, 2.5), (525.0, 249.5), (525.0, 430.5), + (525.0, 678.0), (886.5, 139.5), (886.5, 269.0), (886.5, 411.0), (886.5, 540.5), + (940.5, 340.5), (998.0, 249.5), (998.0, 430.5), (1048.0, 2.5), (1048.0, 139.5), + (1048.0, 249.5), (1048.0, 430.5), (1048.0, 540.5), (1048.0, 678.0), + (434.5, 340.0), (615.5, 340.0), +] +_I0 = 5 +_I1 = 29 +_I2 = 0 +_I3 = 24 +_N0 = len(_F0) + + +def _step0_remove_close_keypoints(kps: list[list[float]], proximity_px: float = 30.0) -> int: + n = len(kps) + if n == 0: + return 0 + + def _valid(i: int) -> bool: + if i >= n or not isinstance(kps[i], (list, tuple)) or len(kps[i]) < 2: + return False + x, y = float(kps[i][0]), float(kps[i][1]) + return not (x == 0.0 and y == 0.0) + + valid_indices = [i for i in range(n) if _valid(i)] + if len(valid_indices) < 2: + return 0 + to_remove: set[int] = set() + for ii in range(len(valid_indices)): + a = valid_indices[ii] + ax, ay = float(kps[a][0]), float(kps[a][1]) + for jj in range(ii + 1, len(valid_indices)): + b = valid_indices[jj] + bx, by = float(kps[b][0]), float(kps[b][1]) + if math.hypot(ax - bx, ay - by) <= proximity_px: + to_remove.add(a) + to_remove.add(b) + for idx in to_remove: + kps[idx] = [0.0, 0.0] + return len(to_remove) + + +class _Xe(Exception): + pass + + +def _y0() -> ndarray: + template_path = Path(__file__).parent / "football_pitch_template.png" + img = cv2.imread(str(template_path), cv2.IMREAD_COLOR) + if img is None: + return np.zeros((720, 1280, 3), dtype=np.uint8) + return img + + +def _y1(mask: ndarray) -> bool: + contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + for cnt in contours: + _, _, w, h = cv2.boundingRect(cnt) + if w == 0 or h == 0: + continue + if min(w, h) / max(w, h) >= 1.0: + return True + return False + + +def _y2(ground_mask: ndarray, line_mask: ndarray) -> None: + if ground_mask.sum() == 0: + raise _Xe("No projected ground (empty mask)") + pts = cv2.findNonZero(ground_mask) + if pts is None: + raise _Xe("No projected ground (empty mask)") + _, _, w, h = cv2.boundingRect(pts) + if cv2.countNonZero(ground_mask) == w * h: + raise _Xe("Projected ground should not be rectangular") + n_labels, _ = cv2.connectedComponents(ground_mask) + if n_labels - 1 > 1: + raise _Xe("Projected ground should be a single object") + if ground_mask.sum() / ground_mask.size >= 0.9: + raise _Xe("Projected ground covers too much of the image") + if line_mask.sum() == 0: + raise _Xe("No projected lines") + if line_mask.sum() == line_mask.size: + raise _Xe("Projected lines cover the entire image") + if _y1(line_mask): + raise _Xe("A projected line is too wide") + + +def _y3(pts: ndarray) -> bool: + def _ccw(a: tuple, b: tuple, c: tuple) -> bool: + return (c[1] - a[1]) * (b[0] - a[0]) > (b[1] - a[1]) * (c[0] - a[0]) + + def _intersect(p1: tuple, p2: tuple, q1: tuple, q2: tuple) -> bool: + return (_ccw(p1, q1, q2) != _ccw(p2, q1, q2)) and (_ccw(p1, p2, q1) != _ccw(p1, p2, q2)) + + p = pts.reshape(-1, 2) + if len(p) < 4: + return False + edges = [(p[0], p[1]), (p[1], p[2]), (p[2], p[3]), (p[3], p[0])] + return _intersect(*edges[0], *edges[2]) or _intersect(*edges[1], *edges[3]) + + +def _y4( + template: ndarray, + src_kps: list[tuple[float, float]], + dst_kps: list[tuple[float, float]], + frame_width: int, + frame_height: int, +) -> ndarray: + src = np.array(src_kps, dtype=np.float32) + dst = np.array(dst_kps, dtype=np.float32) + H, _ = cv2.findHomography(src, dst) + if H is None: + raise ValueError("Homography computation failed") + warped = cv2.warpPerspective(template, H, (frame_width, frame_height)) + corner_indices = [_I0, _I1, _I3, _I2] + if len(src_kps) > max(corner_indices): + src_corners = np.array( + [[src_kps[i][0], src_kps[i][1]] for i in corner_indices], + dtype=np.float32, + ).reshape(1, 4, 2) + proj_corners = cv2.perspectiveTransform(src_corners, H)[0] + if _y3(proj_corners): + raise _Xe("Projection twisted!") + return warped + + +def _y5(warped: ndarray) -> tuple[ndarray, ndarray]: + gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) + _, m_ground = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY) + _, m_lines = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY) + ground_bin = (m_ground > 0).astype(np.uint8) + lines_bin = (m_lines > 0).astype(np.uint8) + _y2(ground_bin, lines_bin) + return ground_bin, lines_bin + + +def _y6(frame: ndarray, ground_mask: ndarray) -> ndarray: + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (31, 31)) + gray = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel) + gray = cv2.GaussianBlur(gray, (5, 5), 0) + edges = cv2.Canny(gray, 30, 100) + edges_on_ground = cv2.bitwise_and(edges, edges, mask=ground_mask) + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + edges_on_ground = cv2.dilate(edges_on_ground, dilate_kernel, iterations=3) + return (edges_on_ground > 0).astype(np.uint8) + + +def _fit_line_to_points(points: list[tuple[float, float]]) -> tuple[float, float, float] | None: + if len(points) < 2: + return None + pts = np.array(points, dtype=np.float64) + x = pts[:, 0] + y = pts[:, 1] + mx, my = float(x.mean()), float(y.mean()) + u = x - mx + v = y - my + n = len(pts) + cxx = (u * u).sum() / n + cxy = (u * v).sum() / n + cyy = (v * v).sum() / n + trace = cxx + cyy + diff = cxx - cyy + lambda_small = (trace - np.sqrt(diff * diff + 4.0 * cxy * cxy)) * 0.5 + a = float(cxy) + b = float(lambda_small - cxx) + norm = np.sqrt(a * a + b * b) + if norm < 1e-12: + a, b = 1.0, 0.0 + else: + a, b = a / norm, b / norm + c = -(a * mx + b * my) + return (a, b, c) + + +def _line_intersection( + a1: float, b1: float, c1: float, + a2: float, b2: float, c2: float, +) -> tuple[float, float] | None: + det = a1 * b2 - a2 * b1 + if abs(det) < 1e-12: + return None + x = (b1 * c2 - b2 * c1) / det + y = (a2 * c1 - a1 * c2) / det + return (float(x), float(y)) + + +def _line_through_two_points(x1: float, y1: float, x2: float, y2: float) -> tuple[float, float, float]: + a = y2 - y1 + b = -(x2 - x1) + c = (x2 - x1) * y1 - (y2 - y1) * x1 + return (a, b, c) + + +def _frame_line_edges(frame: ndarray) -> ndarray: + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (31, 31)) + gray = cv2.morphologyEx(gray, cv2.MORPH_TOPHAT, kernel) + gray = cv2.GaussianBlur(gray, (5, 5), 0) + return cv2.Canny(gray, 30, 100) + + +def _dilate_uint8_full_frame(frame: ndarray) -> ndarray: + edges = _frame_line_edges(frame) + dilate_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) + dilated = cv2.dilate(edges, dilate_kernel, iterations=3) + return ((dilated > 0).astype(np.uint8)) * 255 + + +def _clip_segment_to_rect( + x1: float, y1: float, x2: float, y2: float, + w: int, h: int, +) -> tuple[tuple[float, float], tuple[float, float]] | None: + dx, dy = x2 - x1, y2 - y1 + pts: list[tuple[float, float]] = [] + if 0 <= x1 <= w and 0 <= y1 <= h: + pts.append((x1, y1)) + if 0 <= x2 <= w and 0 <= y2 <= h: + pts.append((x2, y2)) + if abs(dx) >= 1e-12: + for x_edge in (0.0, float(w - 1)): + t = (x_edge - x1) / dx + if 0 <= t <= 1: + y = y1 + t * dy + if 0 <= y <= h - 1: + pts.append((x_edge, y)) + if abs(dy) >= 1e-12: + for y_edge in (0.0, float(h - 1)): + t = (y_edge - y1) / dy + if 0 <= t <= 1: + x = x1 + t * dx + if 0 <= x <= w - 1: + pts.append((x, y_edge)) + if len(pts) < 2: + if len(pts) == 1: + return (pts[0], pts[0]) + return None + pts_sorted = sorted(pts, key=lambda p: p[0]) + return (pts_sorted[0], pts_sorted[-1]) + + +def _segment_fully_inside_mask( + p1: tuple[int, int], + p2: tuple[int, int], + mask: ndarray, +) -> bool: + h, w = mask.shape[:2] + x1, y1 = p1[0], p1[1] + x2, y2 = p2[0], p2[1] + n = max(abs(x2 - x1), abs(y2 - y1), 1) + for k in range(n + 1): + t = k / n + x = int(round(x1 + t * (x2 - x1))) + y = int(round(y1 + t * (y2 - y1))) + if x < 0 or x >= w or y < 0 or y >= h: + return False + if mask[y, x] == 0: + return False + return True + + +def _longest_segment_fully_inside_mask( + mask: ndarray, + contour_points: ndarray, +) -> tuple[tuple[int, int], tuple[int, int]] | None: + pts = contour_points.reshape(-1, 2) + n_pts = len(pts) + if n_pts < 2: + return None + best_len_sq = -1.0 + best_p1, best_p2 = None, None + for i in range(n_pts): + for j in range(i + 1, n_pts): + p1 = (int(pts[i][0]), int(pts[i][1])) + p2 = (int(pts[j][0]), int(pts[j][1])) + if not _segment_fully_inside_mask(p1, p2, mask): + continue + d_sq = (pts[i][0] - pts[j][0]) ** 2 + (pts[i][1] - pts[j][1]) ** 2 + if d_sq > best_len_sq: + best_len_sq = d_sq + best_p1, best_p2 = p1, p2 + if best_p1 is not None and best_p2 is not None: + return (best_p1, best_p2) + return None + + +def _line_segment_for_drawing( + a: float, b: float, c: float, w: int, h: int, +) -> tuple[tuple[float, float], tuple[float, float]] | None: + pts: list[tuple[float, float]] = [] + if abs(b) >= 1e-12: + for x in (0.0, float(w - 1)): + y = -(a * x + c) / b + if -50 <= y <= h + 50: + pts.append((x, y)) + if abs(a) >= 1e-12: + for y in (0.0, float(h - 1)): + x = -(b * y + c) / a + if -50 <= x <= w + 50: + pts.append((x, y)) + if len(pts) < 2: + return None + seen: set[tuple[float, float]] = set() + unique = [] + for p in pts: + key = (round(p[0], 2), round(p[1], 2)) + if key not in seen: + seen.add(key) + unique.append(p) + if len(unique) < 2: + return None + unique.sort(key=lambda p: (p[0], p[1])) + return (unique[0], unique[-1]) + + +def _y7() -> dict[int, int]: + return {i: 2 for i in _W0} + + +def _y8() -> dict[int, int]: + m: dict[int, int] = {} + for i in _W1: + m[i] = 3 + for i in _W2: + m[i] = 4 + m[16] = _KP16_WEIGHT + return m + + +def _y9() -> dict[int, int]: + m: dict[int, int] = {} + for i in _W3: + m[i] = 2 + for i in _W4: + m[i] = 3 + for i in _W5: + m[i] = 4 + m[16] = _KP16_WEIGHT + return m + + +def _y10( + valid_indices: list[int], + valid_src: list[tuple[float, float]], + valid_dst: list[tuple[float, float]], + weight_by_index: dict[int, int], +) -> ndarray | None: + src_list: list[tuple[float, float]] = [] + dst_list: list[tuple[float, float]] = [] + for idx, (s, d) in zip(valid_indices, zip(valid_src, valid_dst)): + w = max(1, weight_by_index.get(idx, 1)) + for _ in range(w): + src_list.append(s) + dst_list.append(d) + if len(src_list) < 4: + return None + src_np = np.array(src_list, dtype=np.float32) + dst_np = np.array(dst_list, dtype=np.float32) + H, _ = cv2.findHomography(src_np, dst_np) + return H + + +def _y11( + H: ndarray, + template_image: ndarray, + video_frame: ndarray, + valid_indices: list[int] | None = None, + valid_src: list[tuple[float, float]] | None = None, + valid_dst: list[tuple[float, float]] | None = None, + weight_map: dict[int, int] | None = None, +) -> tuple[float, ndarray | None, list[tuple[float, float]] | None]: + h, w = video_frame.shape[:2] + + def _score_from_warped(warped: ndarray) -> float: + ground_mask, line_mask = _y5(warped) + predicted_mask = _y6(video_frame, ground_mask) + overlap = cv2.bitwise_and(line_mask, predicted_mask) + pixels_on_lines = int(line_mask.sum()) + pixels_overlap = int(overlap.sum()) + return float(pixels_overlap) / float(pixels_on_lines + 1e-8) + + try: + warped = cv2.warpPerspective(template_image, H, (w, h)) + score = _score_from_warped(warped) + return (score, H, None) + except _Xe as e: + err_msg = e.args[0] if e.args else "" + if ( + err_msg in _MASK_RETRY_ERRORS + and valid_indices is not None + and valid_src is not None + and valid_dst is not None + and weight_map is not None + ): + idx_smallest_y = min(range(len(valid_dst)), key=lambda i: valid_dst[i][1]) + x0, y0 = valid_dst[idx_smallest_y] + for dx, dy in [(0, -1), (0, 1), (-1, 0), (1, 0)]: + new_dst = list(valid_dst) + new_dst[idx_smallest_y] = (x0 + dx, y0 + dy) + H2 = _y10(valid_indices, valid_src, new_dst, weight_map) + if H2 is None: + continue + try: + warped2 = cv2.warpPerspective(template_image, H2, (w, h)) + score = _score_from_warped(warped2) + return (score, H2, new_dst) + except _Xe: + continue + return (0.0, None, None) + except Exception: + return (0.0, None, None) + + +def _is_kp_valid(kp: Any) -> bool: + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + return False + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + return False + return not (x == 0.0 and y == 0.0) + + +def _refine_kp5_kp16_kp29( + kps: list[list[float]], + H: ndarray, + video_frame: ndarray, + template_image: ndarray, + *, + precomputed_dilate_uint8: ndarray | None = None, + precomputed_warped: ndarray | None = None, + precomputed_ground_mask: ndarray | None = None, +) -> tuple[bool, str | None]: + n_valid_5_16_29 = sum(1 for i in (5, 16, 29) if i < len(kps) and _is_kp_valid(kps[i])) + if n_valid_5_16_29 >= 2: + return (False, None) + h, w = video_frame.shape[:2] + kp16_valid_input = _is_kp_valid(kps[16]) if len(kps) > 16 else False + left_set = [0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12] + right_set = [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29] + middle_set = [9, 13, 14, 15, 16, 17, 30, 31] + decision: str | None = None + if any(i < len(kps) and _is_kp_valid(kps[i]) for i in left_set): + decision = "left" + elif any(i < len(kps) and _is_kp_valid(kps[i]) for i in right_set): + decision = "right" + elif any(i < len(kps) and _is_kp_valid(kps[i]) for i in middle_set): + decision = "middle" + else: + decision = "other" + src_pts = np.array([_F1[i] for i in (5, 16, 29)], dtype=np.float32).reshape(1, 3, 2) + projected = cv2.perspectiveTransform(src_pts, H)[0] + for idx, i in enumerate((5, 16, 29)): + if i < len(kps) and not _is_kp_valid(kps[i]): + kps[i] = [float(projected[idx][0]), float(projected[idx][1])] + tkp_5 = (float(kps[5][0]), float(kps[5][1])) + tkp_16 = (float(kps[16][0]), float(kps[16][1])) + tkp_29 = (float(kps[29][0]), float(kps[29][1])) + clip = _clip_segment_to_rect(tkp_5[0], tkp_5[1], tkp_29[0], tkp_29[1], w, h) + if clip is None: + return (False, None) + (ax, ay), (bx, by) = clip + + if decision == "right": + clip_r = _clip_segment_to_rect(tkp_16[0], tkp_16[1], tkp_29[0], tkp_29[1], w, h) + if clip_r is None: + return (False, None) + (Ax, Ay), (Bx, By) = clip_r + valid_indices_52 = [] + valid_src_52 = [] + valid_dst_52 = [] + for idx, kp in enumerate(kps): + if not _is_kp_valid(kp): + continue + x, y = float(kp[0]), float(kp[1]) + valid_indices_52.append(idx) + valid_src_52.append(_F1[idx] if idx < len(_F1) else (0.0, 0.0)) + valid_dst_52.append((x, y)) + warped_r = precomputed_warped + ground_mask_r = precomputed_ground_mask + H_use_r = H + if warped_r is None or ground_mask_r is None: + try: + warped_r = cv2.warpPerspective(template_image, H_use_r, (w, h)) + ground_mask_r, _ = _y5(warped_r) + except _Xe as e: + err_msg = e.args[0] if e.args else "" + if err_msg in _MASK_RETRY_ERRORS and len(valid_indices_52) >= 4 and len(valid_dst_52) >= 4: + idx_smallest_y = min(range(len(valid_dst_52)), key=lambda i: valid_dst_52[i][1]) + x0, y0 = valid_dst_52[idx_smallest_y] + for dx, dy in [(0, -1), (0, 1), (-1, 0), (1, 0)]: + new_dst = list(valid_dst_52) + new_dst[idx_smallest_y] = (x0 + dx, y0 + dy) + H_retry = _y10(valid_indices_52, valid_src_52, new_dst, {}) + if H_retry is None: + continue + try: + warped_r = cv2.warpPerspective(template_image, H_retry, (w, h)) + ground_mask_r, _ = _y5(warped_r) + H_use_r = H_retry + break + except _Xe: + continue + if warped_r is None or ground_mask_r is None: + return (False, None) + except Exception: + return (False, None) + if warped_r is None or ground_mask_r is None: + return (False, None) + dilate_uint8_r = precomputed_dilate_uint8 if precomputed_dilate_uint8 is not None else _dilate_uint8_full_frame(video_frame) + pts_right = [(float(kps[i][0]), float(kps[i][1])) for i in [24, 25, 26, 27, 28, 29] if i < len(kps) and _is_kp_valid(kps[i])] + if len(pts_right) >= 2: + line3 = _fit_line_to_points(pts_right) + else: + src_24_29 = np.array([[_F1[i] for i in [24, 25, 26, 27, 28, 29]]], dtype=np.float32) + tkp_24_29 = cv2.perspectiveTransform(src_24_29, H_use_r)[0] + pts_right = [(float(tkp_24_29[i][0]), float(tkp_24_29[i][1])) for i in range(6)] + line3 = _fit_line_to_points(pts_right) + if line3 is None: + return (False, None) + a3, b3, c3 = line3 + norm_u = math.hypot(b3, -a3) + if norm_u < 1e-12: + return (False, None) + ux, uy = b3 / norm_u, -a3 / norm_u + d = _STEP5_2_RIGHT_QUAD_HALFLENGTH + A1 = (Ax - d * ux, Ay - d * uy) + A2 = (Ax + d * ux, Ay + d * uy) + B1 = (Bx - d * ux, By - d * uy) + B2 = (Bx + d * ux, By + d * uy) + pts_poly = np.array([[A1[0], A1[1]], [A2[0], A2[1]], [B2[0], B2[1]], [B1[0], B1[1]]], dtype=np.int32) + mask_poly = np.zeros((h, w), dtype=np.uint8) + cv2.fillConvexPoly(mask_poly, pts_poly, 255) + dilate_in_roi = cv2.bitwise_and(dilate_uint8_r, mask_poly) + px = pts_poly[:, 0] + py = pts_poly[:, 1] + x_min = max(0, int(px.min()) - _STEP5_2_ROI_MARGIN) + y_min = max(0, int(py.min()) - _STEP5_2_ROI_MARGIN) + x_max = min(w, int(px.max()) + 1 + _STEP5_2_ROI_MARGIN) + y_max = min(h, int(py.max()) + 1 + _STEP5_2_ROI_MARGIN) + roi_w = x_max - x_min + roi_h = y_max - y_min + dilate_roi = dilate_in_roi[y_min:y_max, x_min:x_max] + num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(dilate_roi, connectivity=8) + best_label = 0 + best_area = 0 + for i in range(1, num_labels): + area = stats[i, cv2.CC_STAT_AREA] + if area > best_area: + best_area = area + best_label = i + longest_mask_roi = ((labels == best_label).astype(np.uint8)) * 255 + longest_mask = np.zeros((h, w), dtype=np.uint8) + longest_mask[y_min:y_max, x_min:x_max] = longest_mask_roi + p1, p2 = None, None + A3, B3 = None, None + contours, _ = cv2.findContours(longest_mask_roi, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + if contours: + contour = max(contours, key=cv2.contourArea) + pts_contour = contour.reshape(-1, 2) + n_c = len(pts_contour) + max_pts = _STEP5_2_LONGEST_SEGMENT_MAX_PTS + if n_c > max_pts: + step = max(1, n_c // max_pts) + pts_subsample = pts_contour[np.arange(0, n_c, step)] + else: + pts_subsample = pts_contour + if _STEP5_2_HEAVY_SEARCH_FLAG: + result = _longest_segment_fully_inside_mask(longest_mask_roi, pts_subsample) + if result is not None: + p1_roi, p2_roi = result + p1 = (p1_roi[0] + x_min, p1_roi[1] + y_min) + p2 = (p2_roi[0] + x_min, p2_roi[1] + y_min) + else: + best_len_sq = -1.0 + best_p1_roi, best_p2_roi = None, None + for i in range(len(pts_subsample)): + for j in range(i + 1, len(pts_subsample)): + d_sq = (pts_subsample[i][0] - pts_subsample[j][0]) ** 2 + (pts_subsample[i][1] - pts_subsample[j][1]) ** 2 + if d_sq > best_len_sq: + best_len_sq = d_sq + best_p1_roi = (int(pts_subsample[i][0]), int(pts_subsample[i][1])) + best_p2_roi = (int(pts_subsample[j][0]), int(pts_subsample[j][1])) + if best_p1_roi is not None and best_p2_roi is not None: + p1 = (best_p1_roi[0] + x_min, best_p1_roi[1] + y_min) + p2 = (best_p2_roi[0] + x_min, best_p2_roi[1] + y_min) + if p1 is not None and p2 is not None: + a_long, b_long, c_long = _line_through_two_points(float(p1[0]), float(p1[1]), float(p2[0]), float(p2[1])) + a2, b2, c2 = _line_through_two_points(B1[0], B1[1], B2[0], B2[1]) + B3 = _line_intersection(a_long, b_long, c_long, a2, b2, c2) + seg_border = _line_segment_for_drawing(a_long, b_long, c_long, w, h) + if seg_border is not None: + A3 = seg_border[0] + if A3 is not None and B3 is not None: + c4 = -a3 * A3[0] - b3 * A3[1] + A3x, A3y = A3[0], A3[1] + B3x, B3y = B3[0], B3[1] + A3x_roi = A3x - x_min + A3y_roi = A3y - y_min + B3x_roi = B3x - x_min + B3y_roi = B3y - y_min + if _STEP5_2_8PX_HALFRES and roi_w >= 4 and roi_h >= 4: + dilate_8px = cv2.resize(dilate_roi, (roi_w // 2, roi_h // 2), interpolation=cv2.INTER_NEAREST) + roi_w_8, roi_h_8 = roi_w // 2, roi_h // 2 + scale_8, seg_width_8 = 0.5, 4 + else: + dilate_8px = dilate_roi + roi_w_8, roi_h_8 = roi_w, roi_h + scale_8, seg_width_8 = 1.0, 8 + mask_8_roi = np.zeros((roi_h_8, roi_w_8), dtype=np.uint8) + overlap_roi = np.empty((roi_h_8, roi_w_8), dtype=np.uint8) + best_count_8 = -1 + best_s, best_t = 0, 0 + for s in range(-30, 31, _STEP5_2_8PX_COARSE_STEP): + for t in range(-30, 31, _STEP5_2_8PX_COARSE_STEP): + A4x_roi = A3x_roi + s * ux + A4y_roi = A3y_roi + s * uy + B4x_roi = B3x_roi + t * ux + B4y_roi = B3y_roi + t * uy + ax_d = int(round(A4x_roi * scale_8)) + ay_d = int(round(A4y_roi * scale_8)) + bx_d = int(round(B4x_roi * scale_8)) + by_d = int(round(B4y_roi * scale_8)) + mask_8_roi.fill(0) + cv2.line(mask_8_roi, (ax_d, ay_d), (bx_d, by_d), 255, seg_width_8) + cv2.bitwise_and(dilate_8px, mask_8_roi, overlap_roi) + count = cv2.countNonZero(overlap_roi) + if count > best_count_8: + best_count_8 = count + best_s, best_t = s, t + if _STEP5_2_8PX_REFINE_PASS: + s_lo = max(-30, best_s - _STEP5_2_8PX_REFINE_WINDOW) + s_hi = min(31, best_s + _STEP5_2_8PX_REFINE_WINDOW + 1) + t_lo = max(-30, best_t - _STEP5_2_8PX_REFINE_WINDOW) + t_hi = min(31, best_t + _STEP5_2_8PX_REFINE_WINDOW + 1) + for s in range(s_lo, s_hi, 5): + for t in range(t_lo, t_hi, 5): + A4x_roi = A3x_roi + s * ux + A4y_roi = A3y_roi + s * uy + B4x_roi = B3x_roi + t * ux + B4y_roi = B3y_roi + t * uy + ax_d = int(round(A4x_roi * scale_8)) + ay_d = int(round(A4y_roi * scale_8)) + bx_d = int(round(B4x_roi * scale_8)) + by_d = int(round(B4y_roi * scale_8)) + mask_8_roi.fill(0) + cv2.line(mask_8_roi, (ax_d, ay_d), (bx_d, by_d), 255, seg_width_8) + cv2.bitwise_and(dilate_8px, mask_8_roi, overlap_roi) + count = cv2.countNonZero(overlap_roi) + if count > best_count_8: + best_count_8 = count + best_s, best_t = s, t + A4 = (A3x + best_s * ux, A3y + best_s * uy) + B4 = (B3x + best_t * ux, B3y + best_t * uy) + a_ab, b_ab, c_ab = _line_through_two_points(A4[0], A4[1], B4[0], B4[1]) + kkp29 = _line_intersection(a_ab, b_ab, c_ab, a3, b3, c3) + center_pts = [(float(kps[i][0]), float(kps[i][1])) for i in [13, 14, 15, 16] if i < len(kps) and _is_kp_valid(kps[i])] + if len(center_pts) >= 2: + line_13_16 = _fit_line_to_points(center_pts) + else: + src_13_16 = np.array([[_F1[i] for i in [13, 14, 15, 16]]], dtype=np.float32) + tkp_13_16 = cv2.perspectiveTransform(src_13_16, H_use_r)[0] + center_pts = [(float(tkp_13_16[i][0]), float(tkp_13_16[i][1])) for i in range(4)] + line_13_16 = _fit_line_to_points(center_pts) + kkp16 = _line_intersection(a_ab, b_ab, c_ab, line_13_16[0], line_13_16[1], line_13_16[2]) if line_13_16 is not None else None + if kkp29 is not None: + kps[29] = [float(kkp29[0]), float(kkp29[1])] + if kkp16 is not None: + kps[16] = [float(kkp16[0]), float(kkp16[1])] + if kkp16 is not None and kkp16[0] > 0: + pts_0_5_r = [(float(kps[i][0]), float(kps[i][1])) for i in [0, 1, 2, 3, 4, 5] if i < len(kps) and _is_kp_valid(kps[i])] + if len(pts_0_5_r) >= 2: + line_0_5_r = _fit_line_to_points(pts_0_5_r) + else: + src_0_5_r = np.array([[_F1[i] for i in [0, 1, 2, 3, 4, 5]]], dtype=np.float32) + tkp_0_5_r = cv2.perspectiveTransform(src_0_5_r, H_use_r)[0] + pts_0_5_r = [(float(tkp_0_5_r[i][0]), float(tkp_0_5_r[i][1])) for i in range(6)] + line_0_5_r = _fit_line_to_points(pts_0_5_r) + kkp5_r = _line_intersection(a_ab, b_ab, c_ab, line_0_5_r[0], line_0_5_r[1], line_0_5_r[2]) if line_0_5_r is not None else None + if kkp5_r is not None: + kps[5] = [float(kkp5_r[0]), float(kkp5_r[1])] + return (True, "right") + + if decision == "left": + clip_l = _clip_segment_to_rect(tkp_5[0], tkp_5[1], tkp_16[0], tkp_16[1], w, h) + if clip_l is None: + return (False, None) + (Bx, By), (Ax, Ay) = clip_l + valid_indices_52 = [] + valid_src_52 = [] + valid_dst_52 = [] + for idx, kp in enumerate(kps): + if not _is_kp_valid(kp): + continue + x, y = float(kp[0]), float(kp[1]) + valid_indices_52.append(idx) + valid_src_52.append(_F1[idx] if idx < len(_F1) else (0.0, 0.0)) + valid_dst_52.append((x, y)) + warped_l = precomputed_warped + ground_mask_l = precomputed_ground_mask + H_use_l = H + if warped_l is None or ground_mask_l is None: + try: + warped_l = cv2.warpPerspective(template_image, H_use_l, (w, h)) + ground_mask_l, _ = _y5(warped_l) + except _Xe as e: + err_msg = e.args[0] if e.args else "" + if err_msg in _MASK_RETRY_ERRORS and len(valid_indices_52) >= 4 and len(valid_dst_52) >= 4: + idx_smallest_y = min(range(len(valid_dst_52)), key=lambda i: valid_dst_52[i][1]) + x0, y0 = valid_dst_52[idx_smallest_y] + for dx, dy in [(0, -1), (0, 1), (-1, 0), (1, 0)]: + new_dst = list(valid_dst_52) + new_dst[idx_smallest_y] = (x0 + dx, y0 + dy) + H_retry = _y10(valid_indices_52, valid_src_52, new_dst, {}) + if H_retry is None: + continue + try: + warped_l = cv2.warpPerspective(template_image, H_retry, (w, h)) + ground_mask_l, _ = _y5(warped_l) + H_use_l = H_retry + break + except _Xe: + continue + if warped_l is None or ground_mask_l is None: + return (False, None) + except Exception: + return (False, None) + if warped_l is None or ground_mask_l is None: + return (False, None) + dilate_uint8_l = precomputed_dilate_uint8 if precomputed_dilate_uint8 is not None else _dilate_uint8_full_frame(video_frame) + pts_left = [(float(kps[i][0]), float(kps[i][1])) for i in [0, 1, 2, 3, 4, 5] if i < len(kps) and _is_kp_valid(kps[i])] + if len(pts_left) >= 2: + line3_l = _fit_line_to_points(pts_left) + else: + src_0_5 = np.array([[_F1[i] for i in [0, 1, 2, 3, 4, 5]]], dtype=np.float32) + tkp_0_5 = cv2.perspectiveTransform(src_0_5, H_use_l)[0] + pts_left = [(float(tkp_0_5[i][0]), float(tkp_0_5[i][1])) for i in range(6)] + line3_l = _fit_line_to_points(pts_left) + if line3_l is None: + return (False, None) + a3_l, b3_l, c3_l = line3_l + norm_u_l = math.hypot(b3_l, -a3_l) + if norm_u_l < 1e-12: + return (False, None) + ux_l, uy_l = b3_l / norm_u_l, -a3_l / norm_u_l + d_l = _STEP5_2_RIGHT_QUAD_HALFLENGTH + A1_l = (Ax - d_l * ux_l, Ay - d_l * uy_l) + A2_l = (Ax + d_l * ux_l, Ay + d_l * uy_l) + B1_l = (Bx - d_l * ux_l, By - d_l * uy_l) + B2_l = (Bx + d_l * ux_l, By + d_l * uy_l) + pts_poly_l = np.array([[A1_l[0], A1_l[1]], [A2_l[0], A2_l[1]], [B2_l[0], B2_l[1]], [B1_l[0], B1_l[1]]], dtype=np.int32) + mask_poly_l = np.zeros((h, w), dtype=np.uint8) + cv2.fillConvexPoly(mask_poly_l, pts_poly_l, 255) + dilate_in_roi_l = cv2.bitwise_and(dilate_uint8_l, mask_poly_l) + px_l = pts_poly_l[:, 0] + py_l = pts_poly_l[:, 1] + x_min_l = max(0, int(px_l.min()) - _STEP5_2_ROI_MARGIN) + y_min_l = max(0, int(py_l.min()) - _STEP5_2_ROI_MARGIN) + x_max_l = min(w, int(px_l.max()) + 1 + _STEP5_2_ROI_MARGIN) + y_max_l = min(h, int(py_l.max()) + 1 + _STEP5_2_ROI_MARGIN) + roi_w_l = x_max_l - x_min_l + roi_h_l = y_max_l - y_min_l + dilate_roi_l = dilate_in_roi_l[y_min_l:y_max_l, x_min_l:x_max_l] + num_labels_l, labels_l, stats_l, _ = cv2.connectedComponentsWithStats(dilate_roi_l, connectivity=8) + best_label_l = 0 + best_area_l = 0 + for i in range(1, num_labels_l): + area = stats_l[i, cv2.CC_STAT_AREA] + if area > best_area_l: + best_area_l = area + best_label_l = i + longest_mask_roi_l = ((labels_l == best_label_l).astype(np.uint8)) * 255 + longest_mask_l = np.zeros((h, w), dtype=np.uint8) + longest_mask_l[y_min_l:y_max_l, x_min_l:x_max_l] = longest_mask_roi_l + p1_l, p2_l = None, None + A3_l, B3_l = None, None + contours_l, _ = cv2.findContours(longest_mask_roi_l, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) + if contours_l: + contour_l = max(contours_l, key=cv2.contourArea) + pts_contour_l = contour_l.reshape(-1, 2) + n_c_l = len(pts_contour_l) + max_pts_l = _STEP5_2_LONGEST_SEGMENT_MAX_PTS + if n_c_l > max_pts_l: + step_l = max(1, n_c_l // max_pts_l) + pts_subsample_l = pts_contour_l[np.arange(0, n_c_l, step_l)] + else: + pts_subsample_l = pts_contour_l + if _STEP5_2_HEAVY_SEARCH_FLAG: + result_l = _longest_segment_fully_inside_mask(longest_mask_roi_l, pts_subsample_l) + if result_l is not None: + p1_roi_l, p2_roi_l = result_l + p1_l = (p1_roi_l[0] + x_min_l, p1_roi_l[1] + y_min_l) + p2_l = (p2_roi_l[0] + x_min_l, p2_roi_l[1] + y_min_l) + else: + best_len_sq_l = -1.0 + best_p1_roi_l, best_p2_roi_l = None, None + for i in range(len(pts_subsample_l)): + for j in range(i + 1, len(pts_subsample_l)): + d_sq = (pts_subsample_l[i][0] - pts_subsample_l[j][0]) ** 2 + (pts_subsample_l[i][1] - pts_subsample_l[j][1]) ** 2 + if d_sq > best_len_sq_l: + best_len_sq_l = d_sq + best_p1_roi_l = (int(pts_subsample_l[i][0]), int(pts_subsample_l[i][1])) + best_p2_roi_l = (int(pts_subsample_l[j][0]), int(pts_subsample_l[j][1])) + if best_p1_roi_l is not None and best_p2_roi_l is not None: + p1_l = (best_p1_roi_l[0] + x_min_l, best_p1_roi_l[1] + y_min_l) + p2_l = (best_p2_roi_l[0] + x_min_l, best_p2_roi_l[1] + y_min_l) + if p1_l is not None and p2_l is not None: + a_long_l, b_long_l, c_long_l = _line_through_two_points(float(p1_l[0]), float(p1_l[1]), float(p2_l[0]), float(p2_l[1])) + a2_l, b2_l, c2_l = _line_through_two_points(B1_l[0], B1_l[1], B2_l[0], B2_l[1]) + B3_l = _line_intersection(a_long_l, b_long_l, c_long_l, a2_l, b2_l, c2_l) + seg_border_l = _line_segment_for_drawing(a_long_l, b_long_l, c_long_l, w, h) + if seg_border_l is not None: + A3_l = seg_border_l[1] + if A3_l is not None and B3_l is not None: + A3x_l, A3y_l = A3_l[0], A3_l[1] + B3x_l, B3y_l = B3_l[0], B3_l[1] + A3x_roi_l = A3x_l - x_min_l + A3y_roi_l = A3y_l - y_min_l + B3x_roi_l = B3x_l - x_min_l + B3y_roi_l = B3y_l - y_min_l + if _STEP5_2_8PX_HALFRES and roi_w_l >= 4 and roi_h_l >= 4: + dilate_8px_l = cv2.resize(dilate_roi_l, (roi_w_l // 2, roi_h_l // 2), interpolation=cv2.INTER_NEAREST) + roi_w_8_l, roi_h_8_l = roi_w_l // 2, roi_h_l // 2 + scale_8_l, seg_width_8_l = 0.5, 4 + else: + dilate_8px_l = dilate_roi_l + roi_w_8_l, roi_h_8_l = roi_w_l, roi_h_l + scale_8_l, seg_width_8_l = 1.0, 8 + mask_8_roi_l = np.zeros((roi_h_8_l, roi_w_8_l), dtype=np.uint8) + overlap_roi_l = np.empty((roi_h_8_l, roi_w_8_l), dtype=np.uint8) + best_count_8_l = -1 + best_s_l, best_t_l = 0, 0 + for s in range(-30, 31, _STEP5_2_8PX_COARSE_STEP): + for t in range(-30, 31, _STEP5_2_8PX_COARSE_STEP): + A4x_roi_l = A3x_roi_l + s * ux_l + A4y_roi_l = A3y_roi_l + s * uy_l + B4x_roi_l = B3x_roi_l + t * ux_l + B4y_roi_l = B3y_roi_l + t * uy_l + ax_d_l = int(round(A4x_roi_l * scale_8_l)) + ay_d_l = int(round(A4y_roi_l * scale_8_l)) + bx_d_l = int(round(B4x_roi_l * scale_8_l)) + by_d_l = int(round(B4y_roi_l * scale_8_l)) + mask_8_roi_l.fill(0) + cv2.line(mask_8_roi_l, (ax_d_l, ay_d_l), (bx_d_l, by_d_l), 255, seg_width_8_l) + cv2.bitwise_and(dilate_8px_l, mask_8_roi_l, overlap_roi_l) + count = cv2.countNonZero(overlap_roi_l) + if count > best_count_8_l: + best_count_8_l = count + best_s_l, best_t_l = s, t + if _STEP5_2_8PX_REFINE_PASS: + s_lo_l = max(-30, best_s_l - _STEP5_2_8PX_REFINE_WINDOW) + s_hi_l = min(31, best_s_l + _STEP5_2_8PX_REFINE_WINDOW + 1) + t_lo_l = max(-30, best_t_l - _STEP5_2_8PX_REFINE_WINDOW) + t_hi_l = min(31, best_t_l + _STEP5_2_8PX_REFINE_WINDOW + 1) + for s in range(s_lo_l, s_hi_l, 5): + for t in range(t_lo_l, t_hi_l, 5): + A4x_roi_l = A3x_roi_l + s * ux_l + A4y_roi_l = A3y_roi_l + s * uy_l + B4x_roi_l = B3x_roi_l + t * ux_l + B4y_roi_l = B3y_roi_l + t * uy_l + ax_d_l = int(round(A4x_roi_l * scale_8_l)) + ay_d_l = int(round(A4y_roi_l * scale_8_l)) + bx_d_l = int(round(B4x_roi_l * scale_8_l)) + by_d_l = int(round(B4y_roi_l * scale_8_l)) + mask_8_roi_l.fill(0) + cv2.line(mask_8_roi_l, (ax_d_l, ay_d_l), (bx_d_l, by_d_l), 255, seg_width_8_l) + cv2.bitwise_and(dilate_8px_l, mask_8_roi_l, overlap_roi_l) + count = cv2.countNonZero(overlap_roi_l) + if count > best_count_8_l: + best_count_8_l = count + best_s_l, best_t_l = s, t + A4_l = (A3x_l + best_s_l * ux_l, A3y_l + best_s_l * uy_l) + B4_l = (B3x_l + best_t_l * ux_l, B3y_l + best_t_l * uy_l) + a_ab_l, b_ab_l, c_ab_l = _line_through_two_points(A4_l[0], A4_l[1], B4_l[0], B4_l[1]) + kkp5_l = _line_intersection(a_ab_l, b_ab_l, c_ab_l, a3_l, b3_l, c3_l) + center_pts_l = [(float(kps[i][0]), float(kps[i][1])) for i in [13, 14, 15, 16] if i < len(kps) and _is_kp_valid(kps[i])] + if len(center_pts_l) >= 2: + line_13_16_l = _fit_line_to_points(center_pts_l) + else: + src_13_16_l = np.array([[_F1[i] for i in [13, 14, 15, 16]]], dtype=np.float32) + tkp_13_16_l = cv2.perspectiveTransform(src_13_16_l, H_use_l)[0] + center_pts_l = [(float(tkp_13_16_l[i][0]), float(tkp_13_16_l[i][1])) for i in range(4)] + line_13_16_l = _fit_line_to_points(center_pts_l) + kkp16_l = _line_intersection(a_ab_l, b_ab_l, c_ab_l, line_13_16_l[0], line_13_16_l[1], line_13_16_l[2]) if line_13_16_l is not None else None + if kkp5_l is not None: + kps[5] = [float(kkp5_l[0]), float(kkp5_l[1])] + if kkp16_l is not None: + kps[16] = [float(kkp16_l[0]), float(kkp16_l[1])] + if kkp16_l is not None and kkp16_l[0] < w: + pts_24_29_l = [(float(kps[i][0]), float(kps[i][1])) for i in [24, 25, 26, 27, 28, 29] if i < len(kps) and _is_kp_valid(kps[i])] + if len(pts_24_29_l) >= 2: + line_24_29_l = _fit_line_to_points(pts_24_29_l) + else: + src_24_29_l = np.array([[_F1[i] for i in [24, 25, 26, 27, 28, 29]]], dtype=np.float32) + tkp_24_29_l = cv2.perspectiveTransform(src_24_29_l, H_use_l)[0] + pts_24_29_l = [(float(tkp_24_29_l[i][0]), float(tkp_24_29_l[i][1])) for i in range(6)] + line_24_29_l = _fit_line_to_points(pts_24_29_l) + kkp29_l = _line_intersection(a_ab_l, b_ab_l, c_ab_l, line_24_29_l[0], line_24_29_l[1], line_24_29_l[2]) if line_24_29_l is not None else None + if kkp29_l is not None: + kps[29] = [float(kkp29_l[0]), float(kkp29_l[1])] + return (True, "left") + + if not kp16_valid_input: + return (False, None) + x16, y16 = tkp_16[0], tkp_16[1] + valid_indices_52 = [] + valid_src_52 = [] + valid_dst_52 = [] + for idx, kp in enumerate(kps): + if not _is_kp_valid(kp): + continue + x, y = float(kp[0]), float(kp[1]) + valid_indices_52.append(idx) + valid_src_52.append(_F1[idx] if idx < len(_F1) else (0.0, 0.0)) + valid_dst_52.append((x, y)) + warped = None + ground_mask = None + H_use = H + try: + warped = cv2.warpPerspective(template_image, H_use, (w, h)) + ground_mask, _ = _y5(warped) + except _Xe as e: + err_msg = e.args[0] if e.args else "" + if err_msg in _MASK_RETRY_ERRORS and len(valid_indices_52) >= 4 and len(valid_dst_52) >= 4: + idx_smallest_y = min(range(len(valid_dst_52)), key=lambda i: valid_dst_52[i][1]) + x0, y0 = valid_dst_52[idx_smallest_y] + for dx, dy in [(0, -1), (0, 1), (-1, 0), (1, 0)]: + new_dst = list(valid_dst_52) + new_dst[idx_smallest_y] = (x0 + dx, y0 + dy) + H_retry = _y10(valid_indices_52, valid_src_52, new_dst, {}) + if H_retry is None: + continue + try: + warped = cv2.warpPerspective(template_image, H_retry, (w, h)) + ground_mask, _ = _y5(warped) + H_use = H_retry + break + except _Xe: + continue + else: + warped = None + ground_mask = None + if warped is None or ground_mask is None: + return (False, None) + except Exception: + return (False, None) + if warped is None or ground_mask is None: + return (False, None) + dilate_uint8 = _dilate_uint8_full_frame(video_frame) + seg_width = 8 + mask = np.zeros((h, w), dtype=np.uint8) + overlap_buf = np.empty((h, w), dtype=np.uint8) + best_count = -1 + best_ay, best_by = ay, by + step = 5 + for t in range(-100, 101, step): + ay_new = ay + t + if abs(bx - ax) < 1e-12: + by_new = ay_new + else: + by_new = ay_new + (y16 - ay_new) * (bx - ax) / (x16 - ax) if abs(x16 - ax) >= 1e-12 else ay_new + a_pt = (int(round(ax)), int(round(ay_new))) + b_pt = (int(round(bx)), int(round(by_new))) + mask.fill(0) + cv2.line(mask, a_pt, b_pt, 255, seg_width) + cv2.bitwise_and(dilate_uint8, mask, overlap_buf) + count = cv2.countNonZero(overlap_buf) + if count > best_count: + best_count = count + best_ay, best_by = ay_new, by_new + for shift in range(-20, 21, 5): + ay_shift = best_ay + shift + by_shift = best_by + shift + a_pt = (int(round(ax)), int(round(ay_shift))) + b_pt = (int(round(bx)), int(round(by_shift))) + mask.fill(0) + cv2.line(mask, a_pt, b_pt, 255, seg_width) + cv2.bitwise_and(dilate_uint8, mask, overlap_buf) + count = cv2.countNonZero(overlap_buf) + if count > best_count: + best_count = count + best_ay, best_by = ay_shift, by_shift + a_final = (ax, best_ay) + b_final = (bx, best_by) + center_pts = [] + for i in [13, 14, 15, 16]: + if i < len(kps) and _is_kp_valid(kps[i]): + center_pts.append((float(kps[i][0]), float(kps[i][1]))) + line_center = _fit_line_to_points(center_pts) if len(center_pts) >= 2 else None + a_ab, b_ab, c_ab = _line_through_two_points(a_final[0], a_final[1], b_final[0], b_final[1]) + if line_center is not None: + a_c, b_c, c_c = line_center + inter = _line_intersection(a_c, b_c, c_c, a_ab, b_ab, c_ab) + if inter is not None: + x16, y16 = inter[0], inter[1] + d5 = math.hypot(tkp_5[0] - x16, tkp_5[1] - y16) + d29 = math.hypot(tkp_29[0] - x16, tkp_29[1] - y16) + dx_ab = b_final[0] - a_final[0] + dy_ab = b_final[1] - a_final[1] + len_ab = math.hypot(dx_ab, dy_ab) + if len_ab < 1e-12: + kkp5 = (x16, y16) + kkp29 = (x16, y16) + else: + ux = dx_ab / len_ab + uy = dy_ab / len_ab + kkp5_plus = (x16 + d5 * ux, y16 + d5 * uy) + kkp5_minus = (x16 - d5 * ux, y16 - d5 * uy) + dist_plus_to_a = math.hypot(kkp5_plus[0] - a_final[0], kkp5_plus[1] - a_final[1]) + dist_minus_to_a = math.hypot(kkp5_minus[0] - a_final[0], kkp5_minus[1] - a_final[1]) + kkp5 = kkp5_minus if dist_minus_to_a < dist_plus_to_a else kkp5_plus + kkp29_plus = (x16 + d29 * ux, y16 + d29 * uy) + kkp29_minus = (x16 - d29 * ux, y16 - d29 * uy) + dist_plus_to_b = math.hypot(kkp29_plus[0] - b_final[0], kkp29_plus[1] - b_final[1]) + dist_minus_to_b = math.hypot(kkp29_minus[0] - b_final[0], kkp29_minus[1] - b_final[1]) + kkp29 = kkp29_minus if dist_minus_to_b < dist_plus_to_b else kkp29_plus + kps[5] = [kkp5[0], kkp5[1]] + kps[29] = [kkp29[0], kkp29[1]] + kps[16] = [x16, y16] + return (True, None) + + +def _refine_kp4_kp12( + kps: list[list[float]], + H: ndarray, + video_frame: ndarray, + template_image: ndarray, +) -> bool: + if len(kps) <= 12: + return False + if not _is_kp_valid(kps[12]) or _is_kp_valid(kps[4]): + return False + h, w = video_frame.shape[:2] + src_pt4 = np.array([_F1[4]], dtype=np.float32).reshape(1, 1, 2) + inferred_4 = cv2.perspectiveTransform(src_pt4, H)[0, 0] + kp4_x, kp4_y = float(inferred_4[0]), float(inferred_4[1]) + kp12_x = float(kps[12][0]) + kp12_y = float(kps[12][1]) + try: + warped = cv2.warpPerspective(template_image, H, (w, h)) + ground_mask, _ = _y5(warped) + except _Xe: + return False + dilate_image = _y6(video_frame, ground_mask) + dilate_uint8 = (dilate_image.astype(np.uint8)) * 255 + y4_lo = max(0, int(kp4_y) - 50) + y4_hi = min(h - 1, int(kp4_y) + 50) + y12_lo = max(0, int(kp12_y) - 50) + y12_hi = min(h - 1, int(kp12_y) + 50) + step = 5 + best_count = -1 + best_y4 = int(kp4_y) + best_y12 = int(kp12_y) + seg_width = 5 + mask = np.zeros((h, w), dtype=np.uint8) + overlap_buf = np.empty((h, w), dtype=np.uint8) + for y4 in range(y4_lo, min(y4_hi + 1, y4_lo + ((y4_hi - y4_lo) // step) * step + 1), step): + for y12 in range(y12_lo, min(y12_hi + 1, y12_lo + ((y12_hi - y12_lo) // step) * step + 1), step): + p1 = (int(round(kp4_x)), y4) + p2 = (int(round(kp12_x)), y12) + mask.fill(0) + cv2.line(mask, p1, p2, 255, seg_width) + cv2.bitwise_and(dilate_uint8, mask, overlap_buf) + count = cv2.countNonZero(overlap_buf) + if count > best_count: + best_count = count + best_y4 = y4 + best_y12 = y12 + kkp4 = (kp4_x, float(best_y4)) + kkp12 = (kp12_x, float(best_y12)) + line_ext = _line_through_two_points(kkp4[0], kkp4[1], kkp12[0], kkp12[1]) + pts1 = [] + for i in [0, 1, 2, 3, 4]: + if i < len(kps) and _is_kp_valid(kps[i]): + pts1.append((float(kps[i][0]), float(kps[i][1]))) + if len(pts1) < 2: + return False + line1 = _fit_line_to_points(pts1) + if line1 is None: + return False + pts2 = [] + for i in [9, 10, 11, 12]: + if i < len(kps) and _is_kp_valid(kps[i]): + pts2.append((float(kps[i][0]), float(kps[i][1]))) + if len(pts2) < 2: + return False + line2 = _fit_line_to_points(pts2) + if line2 is None: + return False + a1, b1, c1 = line1 + a2, b2, c2 = line2 + inter1 = _line_intersection(a1, b1, c1, line_ext[0], line_ext[1], line_ext[2]) + inter2 = _line_intersection(a2, b2, c2, line_ext[0], line_ext[1], line_ext[2]) + if inter1 is None or inter2 is None: + return False + kps[4] = [inter1[0], inter1[1]] + kps[12] = [inter2[0], inter2[1]] + return True + + +def _refine_kp20_kp28( + kps: list[list[float]], + H: ndarray, + video_frame: ndarray, + template_image: ndarray, +) -> bool: + if len(kps) <= 28: + return False + if not _is_kp_valid(kps[20]) or _is_kp_valid(kps[28]): + return False + h, w = video_frame.shape[:2] + src_pt28 = np.array([_F1[28]], dtype=np.float32).reshape(1, 1, 2) + inferred_28 = cv2.perspectiveTransform(src_pt28, H)[0, 0] + kp28_x, kp28_y = float(inferred_28[0]), float(inferred_28[1]) + kp20_x = float(kps[20][0]) + kp20_y = float(kps[20][1]) + try: + warped = cv2.warpPerspective(template_image, H, (w, h)) + ground_mask, _ = _y5(warped) + except _Xe: + return False + dilate_image = _y6(video_frame, ground_mask) + dilate_uint8 = (dilate_image.astype(np.uint8)) * 255 + y28_lo = max(0, int(kp28_y) - 50) + y28_hi = min(h - 1, int(kp28_y) + 50) + y20_lo = max(0, int(kp20_y) - 50) + y20_hi = min(h - 1, int(kp20_y) + 50) + step = 5 + best_count = -1 + best_y28 = int(kp28_y) + best_y20 = int(kp20_y) + seg_width = 5 + mask = np.zeros((h, w), dtype=np.uint8) + overlap_buf = np.empty((h, w), dtype=np.uint8) + for y28 in range(y28_lo, min(y28_hi + 1, y28_lo + ((y28_hi - y28_lo) // step) * step + 1), step): + for y20 in range(y20_lo, min(y20_hi + 1, y20_lo + ((y20_hi - y20_lo) // step) * step + 1), step): + p1 = (int(round(kp28_x)), y28) + p2 = (int(round(kp20_x)), y20) + mask.fill(0) + cv2.line(mask, p1, p2, 255, seg_width) + cv2.bitwise_and(dilate_uint8, mask, overlap_buf) + count = cv2.countNonZero(overlap_buf) + if count > best_count: + best_count = count + best_y28 = y28 + best_y20 = y20 + kkp28 = (kp28_x, float(best_y28)) + kkp20 = (kp20_x, float(best_y20)) + line_ext = _line_through_two_points(kkp28[0], kkp28[1], kkp20[0], kkp20[1]) + pts1 = [] + for i in [24, 25, 26, 27, 28]: + if i < len(kps) and _is_kp_valid(kps[i]): + pts1.append((float(kps[i][0]), float(kps[i][1]))) + if len(pts1) < 2: + return False + line1 = _fit_line_to_points(pts1) + if line1 is None: + return False + pts2 = [] + for i in [17, 18, 19, 20]: + if i < len(kps) and _is_kp_valid(kps[i]): + pts2.append((float(kps[i][0]), float(kps[i][1]))) + if len(pts2) < 2: + return False + line2 = _fit_line_to_points(pts2) + if line2 is None: + return False + a1, b1, c1 = line1 + a2, b2, c2 = line2 + inter1 = _line_intersection(a1, b1, c1, line_ext[0], line_ext[1], line_ext[2]) + inter2 = _line_intersection(a2, b2, c2, line_ext[0], line_ext[1], line_ext[2]) + if inter1 is None or inter2 is None: + return False + kps[28] = [inter1[0], inter1[1]] + kps[20] = [inter2[0], inter2[1]] + return True + + +def _z0( + kps: list[Any], + video_frame: ndarray, + template_image: ndarray, +) -> list[list[float]] | None: + if not isinstance(kps, list) or len(kps) != _N0: + return None + h, w = video_frame.shape[:2] + frame_width, frame_height = w, h + + def _collect_valid( + kps_list: list[Any], + step52_decision: str | None, + ) -> tuple[list[int], list[tuple[float, float]], list[tuple[float, float]]]: + vi: list[int] = [] + vs: list[tuple[float, float]] = [] + vd: list[tuple[float, float]] = [] + kp16_x: float | None = None + if len(kps_list) > 16 and isinstance(kps_list[16], (list, tuple)) and len(kps_list[16]) >= 1: + try: + kp16_x = float(kps_list[16][0]) + except (TypeError, ValueError): + pass + for idx, kp in enumerate(kps_list): + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + continue + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + continue + if x == 0.0 and y == 0.0: + continue + if idx not in _ALWAYS_INCLUDE_INDICES: + if x < 0 or x > frame_width or y < 0 or y > frame_height: + continue + if idx == 5 and x > frame_width: + continue + if idx == 29 and x < 0: + continue + if step52_decision == "left" and kp16_x is not None and kp16_x > frame_width and idx == 29: + continue + if step52_decision == "right" and kp16_x is not None and kp16_x < 0 and idx == 5: + continue + vi.append(idx) + if idx < len(_F1): + vs.append(_F1[idx]) + vd.append((x, y)) + return (vi, vs, vd) + + valid_indices, valid_src, valid_dst = _collect_valid(kps, None) + if len(valid_src) < 4: + return None + + H0 = _y10(valid_indices, valid_src, valid_dst, {}) + if H0 is not None: + score0, H0_used, dst_retry = _y11( + H0, template_image, video_frame, + valid_indices, valid_src, valid_dst, {}, + ) + if dst_retry is not None and H0_used is not None: + for i, idx in enumerate(valid_indices): + if idx < len(kps): + kps[idx] = [float(dst_retry[i][0]), float(dst_retry[i][1])] + valid_indices, valid_src, valid_dst = _collect_valid(kps, None) + H0 = H0_used + else: + score0 = 0.0 + + refined = False + step52_decision: str | None = None + if H0 is not None: + refined = _refine_kp4_kp12(kps, H0, video_frame, template_image) or refined + refined = _refine_kp20_kp28(kps, H0, video_frame, template_image) or refined + dilate_uint8 = _dilate_uint8_full_frame(video_frame) + warp_52: ndarray | None = None + ground_mask_52: ndarray | None = None + try: + warp_52 = cv2.warpPerspective(template_image, H0, (frame_width, frame_height)) + ground_mask_52, _ = _y5(warp_52) + except _Xe: + pass + step52_refined, step52_decision = _refine_kp5_kp16_kp29( + kps, H0, video_frame, template_image, + precomputed_dilate_uint8=dilate_uint8, + precomputed_warped=warp_52, + precomputed_ground_mask=ground_mask_52, + ) + refined = refined or step52_refined + + if refined: + valid_indices, valid_src, valid_dst = _collect_valid(kps, step52_decision) + if len(valid_src) < 4: + valid_indices, valid_src, valid_dst = _collect_valid(kps, None) + + if len(valid_src) < 4: + if H0 is not None: + src_all = np.array(_F1, dtype=np.float32).reshape(1, -1, 2) + projected = cv2.perspectiveTransform(src_all, H0)[0] + return [[float(projected[i][0]), float(projected[i][1])] for i in range(_N0)] + return None + + w1, w2, w3 = _y7(), _y8(), _y9() + H1 = _y10(valid_indices, valid_src, valid_dst, w1) + H2 = _y10(valid_indices, valid_src, valid_dst, w2) + valid_set = set(valid_indices) + if valid_set.isdisjoint(_INDICES_H3_VS_H1): + H3 = H1 + elif valid_set.isdisjoint(_INDICES_H3_VS_H2): + H3 = H2 + else: + H3 = _y10(valid_indices, valid_src, valid_dst, w3) + score1 = _y11(H1, template_image, video_frame)[0] if H1 is not None else 0.0 + score2 = _y11(H2, template_image, video_frame)[0] if H2 is not None else 0.0 + score3 = _y11(H3, template_image, video_frame)[0] if H3 is not None else 0.0 + best_H = H0 + best_score = score0 + if H1 is not None and score1 > best_score: + best_H, best_score = H1, score1 + if H2 is not None and score2 > best_score: + best_H, best_score = H2, score2 + if H3 is not None and score3 > best_score: + best_H = H3 + if best_H is None: + return None + src_all = np.array(_F1, dtype=np.float32).reshape(1, -1, 2) + projected = cv2.perspectiveTransform(src_all, best_H)[0] + return [[float(projected[i][0]), float(projected[i][1])] for i in range(_N0)] + + +def _z1( + kps: list[Any], + frame_width: int, + frame_height: int, + fill_missing: bool, +) -> list[list[float]] | None: + if not isinstance(kps, list) or len(kps) != _N0 or frame_width <= 0 or frame_height <= 0: + return None + filtered_src: list[tuple[float, float]] = [] + filtered_dst: list[tuple[float, float]] = [] + valid_indices: list[int] = [] + for idx, kp in enumerate(kps): + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + continue + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + continue + if x == 0.0 and y == 0.0: + continue + if idx >= len(_F1): + continue + filtered_src.append(_F1[idx]) + filtered_dst.append((x, y)) + valid_indices.append(idx) + if len(filtered_src) < 4: + return None + src_np = np.array(filtered_src, dtype=np.float32) + dst_np = np.array(filtered_dst, dtype=np.float32) + H_corrected, _ = cv2.findHomography(src_np, dst_np) + if H_corrected is None: + return None + fk_np = np.array(_F0, dtype=np.float32).reshape(1, -1, 2) + projected_np = cv2.perspectiveTransform(fk_np, H_corrected)[0] + valid_indices_set = set(valid_indices) + adjusted_kps: list[list[float]] = [[0.0, 0.0] for _ in range(_N0)] + for idx in range(_N0): + x, y = float(projected_np[idx][0]), float(projected_np[idx][1]) + if not (0 <= x < frame_width and 0 <= y < frame_height): + continue + if fill_missing or idx in valid_indices_set: + adjusted_kps[idx] = [x, y] + return adjusted_kps + + +def _z2( + keypoints: list[list[float]], + video_frame: ndarray, + template_image: ndarray, +) -> float: + score, _ = _z2_score_and_kps(keypoints, video_frame, template_image) + return score + + +def _z2_score_and_kps( + keypoints: list[list[float]], + video_frame: ndarray, + template_image: ndarray, +) -> tuple[float, list[list[float]] | None]: + if not isinstance(keypoints, list) or len(keypoints) != _N0: + return (0.0, None) + valid_indices: list[int] = [] + valid_src: list[tuple[float, float]] = [] + valid_dst: list[tuple[float, float]] = [] + for idx, kp in enumerate(keypoints): + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + continue + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + continue + if x == 0.0 and y == 0.0: + continue + if idx >= len(_F1): + continue + valid_indices.append(idx) + valid_src.append(_F1[idx]) + valid_dst.append((x, y)) + if len(valid_src) < 4: + return (0.0, None) + H = _y10(valid_indices, valid_src, valid_dst, {}) + if H is None: + return (0.0, None) + score, H_used, new_dst = _y11( + H, template_image, video_frame, + valid_indices, valid_src, valid_dst, {}, + ) + if new_dst is not None and H_used is not None: + new_keypoints = [list(kp) if isinstance(kp, (list, tuple)) else [0.0, 0.0] for kp in keypoints] + if len(new_keypoints) != _N0: + new_keypoints = (new_keypoints + [[0.0, 0.0]] * _N0)[:_N0] + for i, idx in enumerate(valid_indices): + if idx < len(new_keypoints) and i < len(new_dst): + new_keypoints[idx] = [float(new_dst[i][0]), float(new_dst[i][1])] + return (score, new_keypoints) + return (score, None) + + +def _z3(kps: list[Any]) -> dict[int, tuple[float, float]]: + out: dict[int, tuple[float, float]] = {} + for idx, kp in enumerate(kps): + if not isinstance(kp, (list, tuple)) or len(kp) < 2: + continue + try: + x, y = float(kp[0]), float(kp[1]) + except (TypeError, ValueError): + continue + if x != 0.0 or y != 0.0: + out[idx] = (x, y) + return out + + +def _z4( + a: dict[int, tuple[float, float]], + b: dict[int, tuple[float, float]], + threshold: float, +) -> int: + count = 0 + for idx, (ax, ay) in a.items(): + if idx not in b: + continue + bx, by = b[idx] + if ((ax - bx) ** 2 + (ay - by) ** 2) ** 0.5 <= threshold: + count += 1 + return count + + +def _z5(a: list[Any], b: list[Any]) -> list[int]: + out: list[int] = [] + for i in range(min(len(a), len(b))): + ka, kb = a[i], b[i] + if not (isinstance(ka, (list, tuple)) and len(ka) >= 2): + continue + if not (isinstance(kb, (list, tuple)) and len(kb) >= 2): + continue + if float(ka[0]) == 0.0 and float(ka[1]) == 0.0: + continue + if float(kb[0]) == 0.0 and float(kb[1]) == 0.0: + continue + out.append(i) + return out + + +def _z6( + a: list[Any], + b: list[Any], + frame_width: int, + frame_height: int, +) -> list[int]: + out: list[int] = [] + for i in range(min(len(a), len(b))): + ka, kb = a[i], b[i] + if not (isinstance(ka, (list, tuple)) and len(ka) >= 2): + continue + if not (isinstance(kb, (list, tuple)) and len(kb) >= 2): + continue + xa, ya = float(ka[0]), float(ka[1]) + xb, yb = float(kb[0]), float(kb[1]) + if xa == 0.0 and ya == 0.0: + continue + if xb == 0.0 and yb == 0.0: + continue + if not (0 <= xa < frame_width and 0 <= ya < frame_height): + continue + if not (0 <= xb < frame_width and 0 <= yb < frame_height): + continue + out.append(i) + return out + + +def _z7( + batch_frame_ids: list[int], + keypoints_by_frame: dict[int, list[list[float]]], +) -> list[list[int]]: + id_kps: list[tuple[int, list[list[float]]]] = [] + for fid in batch_frame_ids: + kps = keypoints_by_frame.get(fid) + if not kps: + continue + vkps = _z3(kps) + if vkps: + id_kps.append((fid, kps)) + id_kps.sort(key=lambda t: t[0]) + segments: list[list[int]] = [] + if not id_kps: + return segments + current_segment: list[int] = [id_kps[0][0]] + prev_vkps = _z3(id_kps[0][1]) + for i in range(1, len(id_kps)): + fid, kps = id_kps[i] + cur_vkps = _z3(kps) + common = _z4(prev_vkps, cur_vkps, _J5) + if common >= _J6: + current_segment.append(fid) + else: + segments.append(current_segment) + current_segment = [fid] + prev_vkps = cur_vkps + segments.append(current_segment) + return segments + + +def _z8( + keypoints_by_frame: dict[int, list[list[float]]], + images: list[ndarray], + offset: int, + template_image: ndarray, +) -> int: + if not _J1 or not images or len(images) < _Z8_MIN_BATCH_FRAMES: + return 0 + batch_frame_ids = [offset + i for i in range(len(images))] + score_map: dict[int, float] = {} + for i, fid in enumerate(batch_frame_ids): + kps = keypoints_by_frame.get(fid) + if not kps or len(kps) != _N0: + score_map[fid] = 0.0 + continue + score_map[fid] = _z2(kps, images[i], template_image) + sorted_ids = sorted(score_map.keys()) + if not sorted_ids: + return 0 + segments = _z7(batch_frame_ids, keypoints_by_frame) + frame_to_seg: dict[int, int] = {} + for seg_idx, seg in enumerate(segments): + for fid in seg: + frame_to_seg[fid] = seg_idx + frame_width = images[0].shape[1] if images else 0 + frame_height = images[0].shape[0] if images else 0 + total_updated = 0 + for threshold in _J2: + problematic = [fid for fid in sorted_ids if score_map[fid] < threshold] + if not problematic: + continue + problematic = problematic[:_Z8_MAX_PROBLEMATIC_PER_BATCH] + segments_seen: dict[tuple[int, int], tuple[list[Any], list[Any], set[int]]] = {} + for problem_id in problematic: + backward_id: int | None = None + for fid in reversed(sorted_ids): + if fid < problem_id and score_map[fid] >= threshold: + backward_id = fid + break + forward_id: int | None = None + for fid in sorted_ids: + if fid > problem_id and score_map[fid] >= threshold: + forward_id = fid + break + if backward_id is None or forward_id is None: + continue + if frame_to_seg.get(backward_id) != frame_to_seg.get(forward_id): + continue + if forward_id - backward_id > _J3: + continue + bwd_kps = keypoints_by_frame.get(backward_id) or [] + fwd_kps = keypoints_by_frame.get(forward_id) or [] + if frame_width > 0 and frame_height > 0: + common_set = set(_z6(bwd_kps, fwd_kps, frame_width, frame_height)) + else: + common_set = set(_z5(bwd_kps, fwd_kps)) + if len(common_set) < 4: + continue + key = (backward_id, forward_id) + if key not in segments_seen: + segments_seen[key] = (bwd_kps, fwd_kps, common_set) + already_rewritten: set[int] = set() + for (backward_id, forward_id), (bwd_kps, fwd_kps, common_set) in segments_seen.items(): + gap = forward_id - backward_id + if gap <= 0: + continue + for interp_id in range(backward_id + 1, forward_id): + if interp_id not in batch_frame_ids or interp_id in already_rewritten: + continue + local_idx = interp_id - offset + if local_idx < 0 or local_idx >= len(images): + continue + video_frame = images[local_idx] + weight = (interp_id - backward_id) / gap + max_len = max(len(bwd_kps), len(fwd_kps), _N0) + new_kps: list[list[float]] = [] + for i in range(max_len): + if i in common_set and i < len(bwd_kps) and i < len(fwd_kps): + bx = float(bwd_kps[i][0]) + by = float(bwd_kps[i][1]) + fx = float(fwd_kps[i][0]) + fy = float(fwd_kps[i][1]) + new_kps.append([bx + (fx - bx) * weight, by + (fy - by) * weight]) + else: + new_kps.append([0.0, 0.0]) + if len(new_kps) < _N0: + new_kps.extend([[0.0, 0.0]] * (_N0 - len(new_kps))) + else: + new_kps = new_kps[:_N0] + before_score = score_map.get(interp_id, 0.0) + new_score, kps_to_apply = _z2_score_and_kps(new_kps, video_frame, template_image) + if new_score <= before_score: + continue + keypoints_by_frame[interp_id] = kps_to_apply if kps_to_apply is not None else new_kps + score_map[interp_id] = new_score + already_rewritten.add(interp_id) + total_updated += 1 + return total_updated + + +class _Bx(BaseModel): + x1: int + y1: int + x2: int + y2: int + cls_id: int + conf: float + team_id: str | None = None + + +class _FRes(BaseModel): + frame_id: int + boxes: List[Dict[str, Any]] + keypoints: List[List[float]] + + +_FRes.model_rebuild() + + +class _Cfg: + def __init__(self, min_area: int = 1300, overlap_iou: float = 0.91): + self.overlap_iou = overlap_iou + + +def _d1(bb: _Bx, cy: float) -> float: + my = 0.5 * (float(bb.y1) + float(bb.y2)) + return (my - cy) ** 2 + + +def _i1(a: _Bx, b: _Bx) -> float: + ax1, ay1, ax2, ay2 = int(a.x1), int(a.y1), int(a.x2), int(a.y2) + bx1, by1, bx2, by2 = int(b.x1), int(b.y1), int(b.x2), int(b.y2) + ix1, iy1 = max(ax1, bx1), max(ay1, by1) + ix2, iy2 = min(ax2, bx2), min(ay2, by2) + iw, ih = max(0, ix2 - ix1), max(0, iy2 - iy1) + inter = iw * ih + if inter <= 0: + return 0.0 + area_a = (ax2 - ax1) * (ay2 - ay1) + area_b = (bx2 - bx1) * (by2 - by1) + union = area_a + area_b - inter + return inter / union if union > 0 else 0.0 + + +def _iou_box4(a: tuple[float, float, float, float], b: tuple[float, float, float, float]) -> float: + ax1, ay1, ax2, ay2 = a + bx1, by1, bx2, by2 = b + ix1, iy1 = max(ax1, bx1), max(ay1, by1) + ix2, iy2 = min(ax2, bx2), min(ay2, by2) + iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1) + inter = iw * ih + if inter <= 0: + return 0.0 + area_a = (ax2 - ax1) * (ay2 - ay1) + area_b = (bx2 - bx1) * (by2 - by1) + union = area_a + area_b - inter + return inter / union if union > 0 else 0.0 + + +def _match_tracks_detections( + prev_list: list[tuple[int, tuple[float, float, float, float]]], + curr_boxes: list[tuple[float, float, float, float]], + iou_thresh: float, + exclude_prev: set[int], + exclude_curr: set[int], +) -> list[tuple[int, int]]: + prev_filtered = [(pi, tid, pbox) for pi, (tid, pbox) in enumerate(prev_list) if pi not in exclude_prev] + curr_filtered = [(ci, cbox) for ci, cbox in enumerate(curr_boxes) if ci not in exclude_curr] + if not prev_filtered or not curr_filtered: + return [] + n_prev, n_curr = len(prev_filtered), len(curr_filtered) + iou_mat = np.zeros((n_prev, n_curr), dtype=np.float64) + for i, (_, _, pbox) in enumerate(prev_filtered): + for j, (_, cbox) in enumerate(curr_filtered): + iou_mat[i, j] = _iou_box4(pbox, cbox) + cost = 1.0 - iou_mat + cost[iou_mat < iou_thresh] = 1e9 + if _linear_sum_assignment is not None: + row_ind, col_ind = _linear_sum_assignment(cost) + matches = [ + (prev_filtered[row_ind[k]][0], curr_filtered[col_ind[k]][0]) + for k in range(len(row_ind)) + if cost[row_ind[k], col_ind[k]] < 1.0 + ] + else: + matches = [] + iou_pairs = [ + (iou_mat[i, j], i, j) + for i in range(n_prev) + for j in range(n_curr) + if iou_mat[i, j] >= iou_thresh + ] + iou_pairs.sort(key=lambda x: -x[0]) + used_prev, used_curr = set(), set() + for _, i, j in iou_pairs: + pi = prev_filtered[i][0] + ci = curr_filtered[j][0] + if pi in used_prev or ci in used_curr: + continue + matches.append((pi, ci)) + used_prev.add(pi) + used_curr.add(ci) + return matches + + +def _predict_box(prev: tuple[float, float, float, float], last: tuple[float, float, float, float]) -> tuple[float, float, float, float]: + px1, py1, px2, py2 = prev + lx1, ly1, lx2, ly2 = last + pcx = 0.5 * (px1 + px2) + pcy = 0.5 * (py1 + py2) + lcx = 0.5 * (lx1 + lx2) + lcy = 0.5 * (ly1 + ly2) + w = lx2 - lx1 + h = ly2 - ly1 + ncx = 2.0 * lcx - pcx + ncy = 2.0 * lcy - pcy + return (ncx - w * 0.5, ncy - h * 0.5, ncx + w * 0.5, ncy + h * 0.5) + + +def _assign_person_track_ids( + prev_state: dict[int, tuple[tuple[float, float, float, float], tuple[float, float, float, float], int]], + next_id: int, + results: list, + iou_thresh: float = _TRACK_IOU_THRESH, + iou_high: float = _TRACK_IOU_HIGH, + iou_low: float = _TRACK_IOU_LOW, + max_age: int = _TRACK_MAX_AGE, + use_velocity: bool = _TRACK_USE_VELOCITY, +) -> tuple[dict[int, tuple[tuple[float, float, float, float], tuple[float, float, float, float], int]], int, list[list[int]]]: + state = {tid: (prev_box, last_box, age) for tid, (prev_box, last_box, age) in prev_state.items()} + nid = next_id + ids_per_result: list[list[int]] = [] + for result in results: + if getattr(result, "boxes", None) is None or len(result.boxes) == 0: + state = { + tid: (prev_box, last_box, age + 1) + for tid, (prev_box, last_box, age) in state.items() + if age + 1 <= max_age + } + ids_per_result.append([]) + continue + b = result.boxes + xyxy = b.xyxy.cpu().numpy() + curr_boxes = [tuple(float(x) for x in row) for row in xyxy] + prev_list: list[tuple[int, tuple[float, float, float, float]]] = [] + for tid, (prev_box, last_box, _age) in state.items(): + if use_velocity and (prev_box != last_box): + pbox = _predict_box(prev_box, last_box) + else: + pbox = last_box + prev_list.append((tid, pbox)) + stage1 = _match_tracks_detections(prev_list, curr_boxes, iou_high, set(), set()) + assigned_prev = {pi for pi, _ in stage1} + assigned_curr = {ci for _, ci in stage1} + stage2 = _match_tracks_detections(prev_list, curr_boxes, iou_low, assigned_prev, assigned_curr) + for pi, ci in stage2: + assigned_prev.add(pi) + assigned_curr.add(ci) + tid_per_curr: dict[int, int] = {} + for pi, ci in stage1 + stage2: + tid_per_curr[ci] = prev_list[pi][0] + ids: list[int] = [] + new_state: dict[int, tuple[tuple[float, float, float, float], tuple[float, float, float, float], int]] = {} + for ci, cbox in enumerate(curr_boxes): + if ci in tid_per_curr: + tid = tid_per_curr[ci] + _prev, last_box, _ = state[tid] + new_state[tid] = (last_box, cbox, 0) + else: + tid = nid + nid += 1 + new_state[tid] = (cbox, cbox, 0) + ids.append(tid) + for pi in range(len(prev_list)): + if pi in assigned_prev: + continue + tid = prev_list[pi][0] + prev_box, last_box, age = state[tid] + if age + 1 <= max_age: + new_state[tid] = (prev_box, last_box, age + 1) + state = new_state + ids_per_result.append(ids) + return (state, nid, ids_per_result) + + +def _s0( + results: list[_FRes], + window: int = _S0, + tids_by_frame: dict[int, list[int | None]] | None = None, +) -> list[_FRes]: + if window <= 1 or not results: + return results + fid_to_idx = {r.frame_id: i for i, r in enumerate(results)} + trajectories: dict[int, list[tuple[int, int, _Bx]]] = {} + for i, r in enumerate(results): + boxes_as_bx = [_Bx(**b) if isinstance(b, dict) else b for b in r.boxes] + for j, bb in enumerate(boxes_as_bx): + tid = tids_by_frame.get(r.frame_id, [None] * len(r.boxes))[j] if tids_by_frame else None + if tid is not None and tid >= 0: + tid = int(tid) + if tid not in trajectories: + trajectories[tid] = [] + trajectories[tid].append((r.frame_id, j, bb)) + smoothed: dict[tuple[int, int], tuple[int, int, int, int]] = {} + half = window // 2 + for tid, items in trajectories.items(): + items.sort(key=lambda x: x[0]) + n = len(items) + for k in range(n): + fid, box_idx, bb = items[k] + result_idx = fid_to_idx[fid] + lo = max(0, k - half) + hi = min(n, k + half + 1) + cx_list = [] + cy_list = [] + w_list = [] + h_list = [] + for m in range(lo, hi): + b = items[m][2] + cx_list.append(0.5 * (b.x1 + b.x2)) + cy_list.append(0.5 * (b.y1 + b.y2)) + w_list.append(b.x2 - b.x1) + h_list.append(b.y2 - b.y1) + cx_avg = sum(cx_list) / len(cx_list) + cy_avg = sum(cy_list) / len(cy_list) + w_avg = sum(w_list) / len(w_list) + h_avg = sum(h_list) / len(h_list) + x1_new = int(round(cx_avg - w_avg / 2)) + y1_new = int(round(cy_avg - h_avg / 2)) + x2_new = int(round(cx_avg + w_avg / 2)) + y2_new = int(round(cy_avg + h_avg / 2)) + smoothed[(result_idx, box_idx)] = (x1_new, y1_new, x2_new, y2_new) + out: list[_FRes] = [] + for i, r in enumerate(results): + boxes_as_bx = [_Bx(**b) if isinstance(b, dict) else b for b in r.boxes] + new_boxes: list[_Bx] = [] + for j, bb in enumerate(boxes_as_bx): + key = (i, j) + if key in smoothed: + x1, y1, x2, y2 = smoothed[key] + new_boxes.append( + _Bx( + x1=x1, + y1=y1, + x2=x2, + y2=y2, + cls_id=int(bb.cls_id), + conf=float(bb.conf), + team_id=bb.team_id, + ) + ) + else: + new_boxes.append( + _Bx( + x1=int(bb.x1), + y1=int(bb.y1), + x2=int(bb.x2), + y2=int(bb.y2), + cls_id=int(bb.cls_id), + conf=float(bb.conf), + team_id=bb.team_id, + ) + ) + out.append(_FRes(frame_id=r.frame_id, boxes=[{"x1": b.x1, "y1": b.y1, "x2": b.x2, "y2": b.y2, "cls_id": b.cls_id, "conf": round(float(b.conf), 2), "team_id": b.team_id} for b in new_boxes], keypoints=r.keypoints)) + return out + + +def _a0( + bboxes: Iterable[_Bx], + *, + frame_width: int, + frame_height: int, + cfg: _Cfg | None = None, + do_goalkeeper_dedup: bool = True, + do_referee_disambiguation: bool = False, + do_ball_dedup: bool = True, +) -> list[_Bx]: + cfg = cfg or _Cfg() + W, H = int(frame_width), int(frame_height) + cy = 0.5 * float(H) + kept: list[_Bx] = list(bboxes or []) + if cfg.overlap_iou > 0 and len(kept) > 1: + balls = [bb for bb in kept if int(bb.cls_id) == _C0] + non_balls = [bb for bb in kept if int(bb.cls_id) != _C0] + if len(non_balls) > 1: + non_balls_sorted = sorted(non_balls, key=lambda bb: float(bb.conf), reverse=True) + kept_nb = [] + for cand in non_balls_sorted: + skip = False + for k in kept_nb: + iou = _i1(cand, k) + if iou >= cfg.overlap_iou: + skip = True + break + if ( + abs(int(cand.x1) - int(k.x1)) <= 3 + and abs(int(cand.y1) - int(k.y1)) <= 3 + and abs(int(cand.x2) - int(k.x2)) <= 3 + and abs(int(cand.y2) - int(k.y2)) <= 3 + and iou > 0.85 + ): + skip = True + break + if not skip: + kept_nb.append(cand) + kept = kept_nb + balls + if do_goalkeeper_dedup: + gks = [bb for bb in kept if int(bb.cls_id) == _C1] + if len(gks) > 1: + best_gk = max(gks, key=lambda bb: float(bb.conf)) + best_gk_conf = float(best_gk.conf) + deduped = [] + for bb in kept: + if int(bb.cls_id) == _C1: + if float(bb.conf) < best_gk_conf or (float(bb.conf) == best_gk_conf and bb is not best_gk): + deduped.append(_Bx(x1=bb.x1, y1=bb.y1, x2=bb.x2, y2=bb.y2, cls_id=_C2, conf=float(bb.conf), team_id="1")) + else: + deduped.append(bb) + else: + deduped.append(bb) + kept = deduped + if do_referee_disambiguation: + refs = [bb for bb in kept if int(bb.cls_id) == _C3] + if len(refs) > 1: + best_ref = min(refs, key=lambda bb: _d1(bb, cy)) + kept = [bb for bb in kept if int(bb.cls_id) != _C3 or bb is best_ref] + if do_ball_dedup: + balls = [bb for bb in kept if int(bb.cls_id) == _C0] + if len(balls) > 1: + best_ball = max(balls, key=lambda bb: float(bb.conf)) + kept = [bb for bb in kept if int(bb.cls_id) != _C0] + [best_ball] + return kept + + +def _k0(feats: np.ndarray, iters: int = 20) -> tuple[np.ndarray, np.ndarray]: + n, d = feats.shape + if n <= 0: + return np.zeros((2, d), dtype=np.float32), np.zeros(0, dtype=np.int64) + if n == 1: + return np.stack([feats[0], feats[0]], axis=0), np.zeros(1, dtype=np.int64) + c0 = feats[0] + d0 = np.linalg.norm(feats - c0[None, :], axis=1) + c1 = feats[int(np.argmax(d0))] + d1 = np.linalg.norm(feats - c1[None, :], axis=1) + c0 = feats[int(np.argmax(d1))] + centroids = np.stack([c0, c1], axis=0).astype(np.float32) + labels = np.zeros(n, dtype=np.int64) + for _ in range(iters): + dist = ((feats[:, None, :] - centroids[None, :, :]) ** 2).sum(axis=2) + labels = dist.argmin(axis=1) + for k in (0, 1): + sel = feats[labels == k] + if len(sel) > 0: + centroids[k] = sel.mean(axis=0) + return centroids, labels + + +def _m0(prev: np.ndarray, new: np.ndarray) -> np.ndarray: + d00 = np.sum((prev[0] - new[0]) ** 2) + d11 = np.sum((prev[1] - new[1]) ** 2) + d01 = np.sum((prev[0] - new[1]) ** 2) + d10 = np.sum((prev[1] - new[0]) ** 2) + if d00 + d11 <= d01 + d10: + return new + return np.stack([new[1], new[0]], axis=0) + + +# ── OSNet team classification (turbo5-style): embed + aggregate by track + KMeans ── +_USE_OSNET_TEAM = True # if True and osnet weights exist, use OSNet for team assignment +OSNET_IMAGE_SIZE = (64, 32) # (height, width) +OSNET_PREPROCESS = T.Compose([ + T.Resize(OSNET_IMAGE_SIZE), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +]) + + +def _crop_upper_body_bx(frame: ndarray, box: _Bx) -> ndarray: + return frame[ + max(0, box.y1) : max(0, box.y2), + max(0, box.x1) : max(0, box.x2), + ] + + +def _preprocess_osnet(crop: ndarray) -> torch.Tensor: + rgb = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB) + pil = Image.fromarray(rgb) + return OSNET_PREPROCESS(pil) + + +def _filter_player_boxes_bx(boxes: list[_Bx]) -> list[_Bx]: + return [b for b in boxes if int(b.cls_id) == _C2] + + +# OSNet architecture (from turbo5) +class _ConvLayer(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups) + self.bn = nn.InstanceNorm2d(out_channels, affine=True) if IN else nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + return self.relu(self.bn(self.conv(x))) + + +class _Conv1x1(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + return self.relu(self.bn(self.conv(x))) + + +class _Conv1x1Linear(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, bn=True): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False) + self.bn = nn.BatchNorm2d(out_channels) if bn else None + + def forward(self, x): + x = self.conv(x) + return self.bn(x) if self.bn is not None else x + + +class _Conv3x3(nn.Module): + def __init__(self, in_channels, out_channels, stride=1, groups=1): + super().__init__() + self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + return self.relu(self.bn(self.conv(x))) + + +class _LightConv3x3(nn.Module): + def __init__(self, in_channels, out_channels): + super().__init__() + self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False) + self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels) + self.bn = nn.BatchNorm2d(out_channels) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv1(x) + x = self.conv2(x) + return self.relu(self.bn(x)) + + +class _LightConvStream(nn.Module): + def __init__(self, in_channels, out_channels, depth): + super().__init__() + layers = [_LightConv3x3(in_channels, out_channels)] + for _ in range(depth - 1): + layers.append(_LightConv3x3(out_channels, out_channels)) + self.layers = nn.Sequential(*layers) + + def forward(self, x): + return self.layers(x) + + +class _ChannelGate(nn.Module): + def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False): + super().__init__() + if num_gates is None: + num_gates = in_channels + self.return_gates = return_gates + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0) + self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1)) if layer_norm else None + self.relu = nn.ReLU() + self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0) + self.gate_activation = nn.Sigmoid() if gate_activation == "sigmoid" else nn.ReLU() + + def forward(self, x): + inp = x + x = self.global_avgpool(x) + x = self.fc1(x) + if self.norm1 is not None: + x = self.norm1(x) + x = self.relu(x) + x = self.fc2(x) + if self.gate_activation is not None: + x = self.gate_activation(x) + return x if self.return_gates else inp * x + + +class _OSBlockX1(nn.Module): + def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4): + super().__init__() + mid_channels = out_channels // bottleneck_reduction + self.conv1 = _Conv1x1(in_channels, mid_channels) + self.conv2a = _LightConv3x3(mid_channels, mid_channels) + self.conv2b = nn.Sequential(_LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels)) + self.conv2c = nn.Sequential(_LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels)) + self.conv2d = nn.Sequential(_LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels), _LightConv3x3(mid_channels, mid_channels)) + self.gate = _ChannelGate(mid_channels) + self.conv3 = _Conv1x1Linear(mid_channels, out_channels) + self.downsample = _Conv1x1Linear(in_channels, out_channels) if in_channels != out_channels else None + self.IN = nn.InstanceNorm2d(out_channels, affine=True) if IN else None + + def forward(self, x): + identity = x + x1 = self.conv1(x) + x2 = self.gate(self.conv2a(x1)) + self.gate(self.conv2b(x1)) + self.gate(self.conv2c(x1)) + self.gate(self.conv2d(x1)) + x3 = self.conv3(x2) + if self.downsample is not None: + identity = self.downsample(identity) + out = x3 + identity + if self.IN is not None: + out = self.IN(out) + return F.relu(out) + + +class _OSNetX1(nn.Module): + def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", IN=False): + super().__init__() + self.loss = loss + self.feature_dim = feature_dim + self.conv1 = _ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN) + self.maxpool = nn.MaxPool2d(3, stride=2, padding=1) + self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN) + self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True) + self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False) + self.conv5 = _Conv1x1(channels[3], channels[3]) + self.global_avgpool = nn.AdaptiveAvgPool2d(1) + self.fc = self._construct_fc_layer(feature_dim, channels[3], dropout_p=None) + self.classifier = nn.Linear(self.feature_dim, num_classes) + self._init_params() + + def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False): + layers_list = [block(in_channels, out_channels, IN=IN)] + for _ in range(1, layer): + layers_list.append(block(out_channels, out_channels, IN=IN)) + if reduce_spatial_size: + layers_list.append(nn.Sequential(_Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2))) + return nn.Sequential(*layers_list) + + def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None): + if fc_dims is None or fc_dims < 0: + self.feature_dim = input_dim + return None + if isinstance(fc_dims, int): + fc_dims = [fc_dims] + layers_list = [] + for dim in fc_dims: + layers_list.append(nn.Linear(input_dim, dim)) + layers_list.append(nn.BatchNorm1d(dim)) + layers_list.append(nn.ReLU(inplace=True)) + if dropout_p is not None: + layers_list.append(nn.Dropout(p=dropout_p)) + input_dim = dim + self.feature_dim = fc_dims[-1] + return nn.Sequential(*layers_list) + + def _init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.InstanceNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.constant_(m.bias, 0) + + def forward(self, x, return_featuremaps=False): + x = self.conv1(x) + x = self.maxpool(x) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + if return_featuremaps: + return x + v = self.global_avgpool(x) + v = v.view(v.size(0), -1) + if self.fc is not None: + v = self.fc(v) + if not self.training: + return v + y = self.classifier(v) + if self.loss == "softmax": + return y + elif self.loss == "triplet": + return y, v + raise KeyError(f"Unsupported loss: {self.loss}") + + +def _osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs): + return _OSNetX1( + num_classes, + blocks=[_OSBlockX1, _OSBlockX1, _OSBlockX1], + layers=[2, 2, 2], + channels=[64, 256, 384, 512], + loss=loss, + **kwargs, + ) + + +def _load_checkpoint_osnet(fpath: str): + fpath = os.path.abspath(os.path.expanduser(fpath)) + map_location = None if torch.cuda.is_available() else "cpu" + return torch.load(fpath, map_location=map_location, weights_only=False) + + +def _load_pretrained_weights_osnet(model: nn.Module, weight_path: str) -> None: + checkpoint = _load_checkpoint_osnet(weight_path) + state_dict = checkpoint.get("state_dict", checkpoint) + model_dict = model.state_dict() + new_state_dict = OrderedDict() + for k, v in state_dict.items(): + if k.startswith("module."): + k = k[7:] + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + +def _load_osnet(device: str = "cuda", weight_path: Optional[Path] = None) -> Optional[nn.Module]: + model = _osnet_x1_0(num_classes=1, loss="softmax", pretrained=False) + if weight_path and Path(weight_path).exists(): + _load_pretrained_weights_osnet(model, str(weight_path)) + model.eval() + model.to(device) + return model + + +def _extract_osnet_embeddings( + model: nn.Module, + frames: list[ndarray], + bboxes_by_frame: dict[int, list[_Bx]], + track_ids_by_frame: dict[int, list[int | None]], + frame_offset: int, + device: str, +) -> tuple[Optional[ndarray], Optional[list[tuple[int, int, int | None]]]]: + """Extract OSNet embeddings for player boxes; return (embeddings, meta) with meta = (frame_idx, box_idx, track_id).""" + crops = [] + meta: list[tuple[int, int, int | None]] = [] + for fi in range(len(frames)): + frame = frames[fi] if fi < len(frames) else None + if frame is None: + continue + frame_id = frame_offset + fi + boxes = bboxes_by_frame.get(frame_id, []) + tids = track_ids_by_frame.get(frame_id, [None] * len(boxes)) + for bi, box in enumerate(boxes): + if int(box.cls_id) != _C2: + continue + track_id = tids[bi] if bi < len(tids) else None + crop = _crop_upper_body_bx(frame, box) + if crop.size == 0: + continue + crops.append(_preprocess_osnet(crop)) + meta.append((fi, bi, track_id)) + if not crops: + return None, None + batch = torch.stack(crops).to(device).float() + with torch.inference_mode(): + embeddings = model(batch) + del batch + embeddings = embeddings.cpu().numpy() + return embeddings, meta + + +def _aggregate_by_track_osnet( + embeddings: ndarray, + meta: list[tuple[int, int, int | None]], +) -> tuple[ndarray, list[tuple[int, int, int | None]]]: + track_map: dict[int | None, list[int]] = defaultdict(list) + meta_by_track: dict[int | None, tuple[int, int, int | None]] = {} + for idx, (fi, bi, tid) in enumerate(meta): + key = tid if tid is not None else id((fi, bi)) + track_map[key].append(idx) + meta_by_track[key] = (fi, bi, tid) + agg_embeddings = [] + agg_meta = [] + for key, indices in track_map.items(): + mean_emb = np.mean(embeddings[indices], axis=0) + norm = np.linalg.norm(mean_emb) + if norm > 1e-12: + mean_emb /= norm + agg_embeddings.append(mean_emb) + agg_meta.append(meta_by_track[key]) + return np.array(agg_embeddings), agg_meta + + +def _classify_teams_osnet( + agg_embeddings: ndarray, + agg_meta: list[tuple[int, int, int | None]], +) -> dict[int | None, str]: + """KMeans on aggregated embeddings; return track_id -> team_id '1' or '2'.""" + n = len(agg_embeddings) + track_to_team: dict[int | None, str] = {} + if n == 0: + return track_to_team + if n == 1: + track_to_team[agg_meta[0][2]] = "1" + return track_to_team + kmeans = KMeans(n_clusters=2, n_init=2, random_state=42) + kmeans.fit(agg_embeddings) + centroids = kmeans.cluster_centers_ + c0, c1 = centroids[0], centroids[1] + norm_0 = np.linalg.norm(c0) + norm_1 = np.linalg.norm(c1) + similarity = np.dot(c0, c1) / (norm_0 * norm_1 + 1e-12) + if similarity > 0.95: + for (_, _, tid) in agg_meta: + track_to_team[tid] = "1" + return track_to_team + if norm_0 <= norm_1: + kmeans.labels_ = 1 - kmeans.labels_ + for (fi, bi, tid), label in zip(agg_meta, kmeans.labels_): + track_to_team[tid] = "1" if label == 0 else "2" + return track_to_team + + +class _Pl: + def __init__(self, repo_root: Path) -> None: + self.repo_root = Path(repo_root) + self._executor = ThreadPoolExecutor(max_workers=3) + self._track_id_to_team_votes: dict[int, dict[str, int]] = {} + self._track_id_to_class_votes: dict[int, dict[int, int]] = {} + self._osnet_model: Optional[nn.Module] = None + self._osnet_device = "cuda" if torch.cuda.is_available() else "cpu" + if _USE_OSNET_TEAM: + _osnet_path = self.repo_root / "models" / "osnet_model.pth.tar-100" + if _osnet_path.exists(): + try: + self._osnet_model = _load_osnet(self._osnet_device, _osnet_path) + except Exception: + self._osnet_model = None + self._tracker_config = "botsort.yaml" + models_dir = self.repo_root / "models" + if _B2: + self.ball_model = YOLO(str(models_dir / "ball-detection-model.onnx"), task="detect") + else: + self.ball_model = None + self.person_model = YOLO(str(models_dir / "person-detection-model.onnx"), task="detect") + self._person_tracker_state: dict[int, tuple[tuple[float, float, float, float], tuple[float, float, float, float], int]] = {} + self._person_tracker_next_id = 0 + self._keypoint_model_hrnet = None + _yaml_path = self.repo_root / "hrnetv2_w48.yaml" + _weights_path = self.repo_root / "models" / "keypoint" + if _f0 and _yaml_path.exists() and _weights_path.exists(): + try: + self._keypoint_model_hrnet = _l0( + self.repo_root, weights_subdir="models" + ) + except Exception: + self._keypoint_model_hrnet = None + self._current_batch_bbox_timings: list[tuple[str, float]] = [] + self._current_batch_kp_timings: list[tuple[str, float]] = [] + self._prev_batch_tail_tid_counts: dict[int, int] = {} + + def reset_for_new_video(self) -> None: + self._track_id_to_team_votes.clear() + self._track_id_to_class_votes.clear() + self._prev_batch_tail_tid_counts.clear() + self._person_tracker_state.clear() + self._person_tracker_next_id = 0 + + def _keypoint_hrnet_task( + self, + images: list[ndarray], + offset: int, + n_keypoints: int, + ) -> dict[int, list[list[float]]]: + _kp_timings: list[tuple[str, float]] = [] + t_total = time.perf_counter() + default_kps = [[0.0, 0.0] for _ in range(n_keypoints)] + if not _f0 or self._keypoint_model_hrnet is None: + self._current_batch_kp_timings = [] + return {offset + i: list(default_kps) for i in range(len(images))} + device = "cuda" if next(self._keypoint_model_hrnet.parameters()).is_cuda else "cpu" + kp_threshold = 0.2 + _t = time.perf_counter() + kp_result = _x0( + images, self._keypoint_model_hrnet, kp_threshold, device, batch_size=_KP_BS + ) + _kp_timings.append(("kp_hrnet", time.perf_counter() - _t)) + _t = time.perf_counter() + h, w = images[0].shape[:2] + if n_keypoints == 32: + keypoints_xyp = _normalize_keypoints_xyp(kp_result, images, n_keypoints) + if _FKP_FAST_MODE: + job = _fkp_normalize_results(keypoints_xyp, _FKP_SINGLE_THRESHOLD) + keypoints = [] + for idx in range(len(images)): + kps = _fix_keypoints(job[idx] if idx < len(job) else [(0, 0)] * 32, n_keypoints) + adjusted = _step8_one_frame_kp(kps, w, h, False, n_keypoints) + keypoints.append(_keypoints_to_float(adjusted if adjusted is not None else kps)) + else: + job = _fkp_normalize_results(keypoints_xyp, _FKP_SINGLE_THRESHOLD) + keypoints = [] + for idx in range(len(images)): + kps = _fix_keypoints(job[idx] if idx < len(job) else [(0, 0)] * 32, n_keypoints) + kps_float = _keypoints_to_float(kps) + try: + refined = _apply_homography_refinement(kps_float, images[idx], n_keypoints) + keypoints.append(refined) + except Exception: + keypoints.append(kps_float) + else: + keypoints = _n0(kp_result, images, n_keypoints) + keypoints = [_fix_keypoints(kps, n_keypoints) for kps in keypoints] + keypoints = [_keypoints_to_float(kps) for kps in keypoints] + _kp_timings.append(("kp_normalize", time.perf_counter() - _t)) + _t = time.perf_counter() + out: dict[int, list[list[float]]] = {} + for i, kpts in enumerate(keypoints): + out[offset + i] = _c1(kpts) + _kp_timings.append(("kp_to_output", time.perf_counter() - _t)) + _kp_timings.append(("kp_total", time.perf_counter() - t_total)) + self._current_batch_kp_timings = _kp_timings + return out + + def _bbox_task( + self, + images: list[ndarray], + offset: int, + imgsz: int, + conf: float, + onnx_batch_size: int, + ) -> dict[int, list[_Bx]]: + _bbox_timings: list[tuple[str, float]] = [] + _t0 = time.perf_counter() + + ball_res: list = [] + if _B2 and self.ball_model is not None: + _t = time.perf_counter() + for start in range(0, len(images), onnx_batch_size): + chunk = images[start : start + onnx_batch_size] + batch_res = self.ball_model.predict(chunk, imgsz=imgsz, conf=conf, verbose=False) + ball_res.extend(batch_res if batch_res else []) + _bbox_timings.append(("bbox_ball_detect", time.perf_counter() - _t)) + _t = time.perf_counter() + batch_res = self.person_model(images, imgsz=_D0_PERSON, conf=conf, iou=0.5, agnostic_nms=True, verbose=False) + if not isinstance(batch_res, list): + batch_res = [batch_res] if batch_res is not None else [] + self._person_tracker_state, self._person_tracker_next_id, person_track_ids = _assign_person_track_ids( + self._person_tracker_state, self._person_tracker_next_id, batch_res, _TRACK_IOU_THRESH + ) + person_res = batch_res + _bbox_timings.append(("bbox_person_track", time.perf_counter() - _t)) + + bboxes_by_frame: dict[int, list[_Bx]] = {} + track_ids_by_frame: dict[int, list[int | None]] = {} + boxes_raw_list: list[list[_Bx]] = [] + track_ids_raw_list: list[list[int | None]] = [] + bbox_to_track_list: list[dict[tuple[int, int, int, int], int]] = [] + _t = time.perf_counter() + for i, frame in enumerate(images): + frame_id = offset + i + boxes_raw = [] + track_ids_raw: list[int | None] = [] + bbox_to_track: dict[tuple[int, int, int, int], int] = {} + if _B2: + det_ball = ball_res[i] if i < len(ball_res) else None + if det_ball is not None and getattr(det_ball, "boxes", None) is not None and len(det_ball.boxes) > 0: + b = det_ball.boxes + xyxy = b.xyxy.cpu().numpy() + confs = b.conf.cpu().numpy() if b.conf is not None else np.ones(len(xyxy), dtype=np.float32) + clss = b.cls.cpu().numpy().astype(int) if b.cls is not None else np.zeros(len(xyxy), dtype=np.int32) + for (x1, y1, x2, y2), c, cf in zip(xyxy, clss, confs): + if int(c) == 0: + boxes_raw.append(_Bx(x1=int(round(x1)), y1=int(round(y1)), x2=int(round(x2)), y2=int(round(y2)), cls_id=_C0, conf=float(cf))) + track_ids_raw.append(None) + det_p = person_res[i] if i < len(person_res) else None + if det_p is not None and getattr(det_p, "boxes", None) is not None and len(det_p.boxes) > 0: + b = det_p.boxes + xyxy = b.xyxy.cpu().numpy() + confs = b.conf.cpu().numpy() if b.conf is not None else np.ones(len(xyxy), dtype=np.float32) + clss = b.cls.cpu().numpy().astype(int) if b.cls is not None else np.zeros(len(xyxy), dtype=np.int32) + if i < len(person_track_ids) and len(person_track_ids[i]) == len(clss): + track_ids = np.array(person_track_ids[i], dtype=np.int32) + else: + track_ids = np.full(len(clss), -1, dtype=np.int32) + for (x1, y1, x2, y2), c, cf, tid in zip(xyxy, clss, confs, track_ids): + c = int(c) + tid = int(tid) + x1r, y1r, x2r, y2r = int(round(x1)), int(round(y1)), int(round(x2)), int(round(y2)) + if tid >= 0: + bbox_to_track[(x1r, y1r, x2r, y2r)] = tid + tid_out = tid if tid >= 0 else None + if c == 0: + boxes_raw.append(_Bx(x1=x1r, y1=y1r, x2=x2r, y2=y2r, cls_id=_C2, conf=float(cf))) + track_ids_raw.append(tid_out) + elif c == 1: + boxes_raw.append(_Bx(x1=x1r, y1=y1r, x2=x2r, y2=y2r, cls_id=_C3, conf=float(cf))) + track_ids_raw.append(tid_out) + elif c == 2: + boxes_raw.append(_Bx(x1=x1r, y1=y1r, x2=x2r, y2=y2r, cls_id=_C1, conf=float(cf))) + track_ids_raw.append(tid_out) + boxes_raw_list.append(boxes_raw) + track_ids_raw_list.append(track_ids_raw) + bbox_to_track_list.append(bbox_to_track) + _bbox_timings.append(("bbox_parse_ball_person", time.perf_counter() - _t)) + for i in range(len(images)): + bboxes_by_frame[offset + i] = boxes_raw_list[i] + track_ids_by_frame[offset + i] = track_ids_raw_list[i] if i < len(track_ids_raw_list) else [None] * len(boxes_raw_list[i]) + if _G0 and len(images) > _G2: + _t = time.perf_counter() + tid_counts: dict[int, int] = {} + tid_first_frame: dict[int, int] = {} + for fid in range(offset, offset + len(images)): + tids = track_ids_by_frame.get(fid, []) + for tid in tids: + if tid is not None and tid >= 0: + t = int(tid) + tid_counts[t] = tid_counts.get(t, 0) + 1 + if t not in tid_first_frame or fid < tid_first_frame[t]: + tid_first_frame[t] = fid + for t, prev_count in self._prev_batch_tail_tid_counts.items(): + tid_counts[t] = tid_counts.get(t, 0) + prev_count + if prev_count > 0: + tid_first_frame[t] = offset + len(images) + boundary = offset + len(images) - _G2 + noise_tids = { + t for t, count in tid_counts.items() + if count < _G1 and tid_first_frame[t] < boundary + } + for fid in range(offset, offset + len(images)): + boxes = bboxes_by_frame.get(fid, []) + tids = track_ids_by_frame.get(fid, [None] * len(boxes)) + if len(tids) != len(boxes): + tids = tids + [None] * (len(boxes) - len(tids)) + keep = [ + i for i in range(len(boxes)) + if tids[i] is None or int(tids[i]) not in noise_tids + ] + bboxes_by_frame[fid] = [boxes[i] for i in keep] + track_ids_by_frame[fid] = [tids[i] for i in keep] + tail_start = offset + len(images) - _G2 + self._prev_batch_tail_tid_counts = {} + for fid in range(tail_start, offset + len(images)): + tids = track_ids_by_frame.get(fid, []) + for tid in tids: + if tid is not None and tid >= 0: + t = int(tid) + self._prev_batch_tail_tid_counts[t] = self._prev_batch_tail_tid_counts.get(t, 0) + 1 + _bbox_timings.append(("bbox_noise_filter", time.perf_counter() - _t)) + _t = time.perf_counter() + for i, frame in enumerate(images): + frame_id = offset + i + boxes_raw = bboxes_by_frame[frame_id] + track_ids_raw = track_ids_by_frame[frame_id] + bbox_to_track = {(int(bb.x1), int(bb.y1), int(bb.x2), int(bb.y2)): int(tid) for bb, tid in zip(boxes_raw, track_ids_raw) if tid is not None and int(tid) >= 0} + boxes_stabilized = [] + track_ids_stabilized: list[int | None] = [] + for idx, bb in enumerate(boxes_raw): + best_tid = -1 + best_iou = 0.0 + for (bx1, by1, bx2, by2), tid in bbox_to_track.items(): + iou = _i1(_Bx(x1=bb.x1, y1=bb.y1, x2=bb.x2, y2=bb.y2, cls_id=0, conf=0.0), _Bx(x1=bx1, y1=by1, x2=bx2, y2=by2, cls_id=0, conf=0.0)) + if iou > best_iou and iou > 0.5: + best_iou, best_tid = iou, tid + tid_out = best_tid if best_tid >= 0 else (track_ids_raw[idx] if idx < len(track_ids_raw) else None) + if best_tid >= 0: + if _G5: + if best_tid not in self._track_id_to_class_votes: + self._track_id_to_class_votes[best_tid] = {} + cls_key = int(bb.cls_id) + self._track_id_to_class_votes[best_tid][cls_key] = self._track_id_to_class_votes[best_tid].get(cls_key, 0) + 1 + boxes_stabilized.append(_Bx(x1=bb.x1, y1=bb.y1, x2=bb.x2, y2=bb.y2, cls_id=bb.cls_id, conf=bb.conf, team_id=None)) + track_ids_stabilized.append(tid_out) + else: + boxes_stabilized.append(_Bx(x1=bb.x1, y1=bb.y1, x2=bb.x2, y2=bb.y2, cls_id=bb.cls_id, conf=bb.conf, team_id=None)) + track_ids_stabilized.append(tid_out) + bboxes_by_frame[frame_id] = boxes_stabilized + track_ids_by_frame[frame_id] = track_ids_stabilized + _bbox_timings.append(("bbox_stabilize_track_ids", time.perf_counter() - _t)) + _t = time.perf_counter() + for fid in range(offset, offset + len(images)): + new_boxes = [] + tids_fid = track_ids_by_frame.get(fid, [None] * len(bboxes_by_frame[fid])) + for box_idx, box in enumerate(bboxes_by_frame[fid]): + tid = tids_fid[box_idx] if box_idx < len(tids_fid) else None + if _G5 and tid is not None and tid >= 0 and tid in self._track_id_to_class_votes: + votes = self._track_id_to_class_votes[tid] + ref_votes = votes.get(_C3, 0) + gk_votes = votes.get(_C1, 0) + if _G6 and ref_votes > _G3: + majority_cls = _C3 + elif _G7 and gk_votes > _G3: + majority_cls = _C1 + else: + majority_cls = max(votes.items(), key=lambda x: x[1])[0] + new_boxes.append(_Bx(x1=box.x1, y1=box.y1, x2=box.x2, y2=box.y2, cls_id=majority_cls, conf=box.conf, team_id=None)) + else: + new_boxes.append(box) + bboxes_by_frame[fid] = new_boxes + track_ids_by_frame[fid] = tids_fid + _bbox_timings.append(("bbox_class_votes", time.perf_counter() - _t)) + if _B5 and len(images) > 1: + _t = time.perf_counter() + track_to_frames: dict[int, list[tuple[int, _Bx]]] = {} + for fid in range(offset, offset + len(images)): + boxes = bboxes_by_frame.get(fid, []) + tids = track_ids_by_frame.get(fid, [None] * len(boxes)) + for bb, tid in zip(boxes, tids): + if tid is not None and int(tid) >= 0: + t = int(tid) + track_to_frames.setdefault(t, []).append((fid, bb)) + to_add: dict[int, list[tuple[_Bx, int]]] = {} + for t, pairs in track_to_frames.items(): + pairs.sort(key=lambda p: p[0]) + for i in range(len(pairs) - 1): + f1, b1 = pairs[i] + f2, b2 = pairs[i + 1] + if f2 - f1 <= 1: + continue + for g in range(f1 + 1, f2): + w = (g - f1) / (f2 - f1) + x1 = int(round((1 - w) * b1.x1 + w * b2.x1)) + y1 = int(round((1 - w) * b1.y1 + w * b2.y1)) + x2 = int(round((1 - w) * b1.x2 + w * b2.x2)) + y2 = int(round((1 - w) * b1.y2 + w * b2.y2)) + interp = _Bx(x1=x1, y1=y1, x2=x2, y2=y2, cls_id=b2.cls_id, conf=b2.conf, team_id=b2.team_id) + to_add.setdefault(g, []).append((interp, t)) + for g, add_list in to_add.items(): + bboxes_by_frame[g] = list(bboxes_by_frame.get(g, [])) + track_ids_by_frame[g] = list(track_ids_by_frame.get(g, [])) + for interp_box, tid in add_list: + bboxes_by_frame[g].append(interp_box) + track_ids_by_frame[g].append(tid) + _bbox_timings.append(("bbox_interp_gaps", time.perf_counter() - _t)) + reid_team_per_frame: list[list[Optional[str]]] = [[None] * len(bboxes_by_frame[offset + fi]) for fi in range(len(images))] + if self._osnet_model is not None: + _t_reid_total = time.perf_counter() + emb, meta = _extract_osnet_embeddings( + self._osnet_model, images, bboxes_by_frame, track_ids_by_frame, offset, self._osnet_device + ) + if emb is not None and meta is not None: + agg_emb, agg_meta = _aggregate_by_track_osnet(emb, meta) + track_to_team = _classify_teams_osnet(agg_emb, agg_meta) + for fi in range(len(images)): + frame_id = offset + fi + boxes_f = bboxes_by_frame.get(frame_id, []) + tids_f = track_ids_by_frame.get(frame_id, []) + for bi in range(len(boxes_f)): + tid = tids_f[bi] if bi < len(tids_f) else None + if tid in track_to_team and bi < len(reid_team_per_frame[fi]): + reid_team_per_frame[fi][bi] = track_to_team[tid] + _bbox_timings.append(("bbox_reid_team", time.perf_counter() - _t_reid_total)) + _t = time.perf_counter() + for i in range(len(images)): + frame_id = offset + i + boxes = bboxes_by_frame[frame_id] + tids_fid = track_ids_by_frame[frame_id] + for box_idx, bb in enumerate(boxes): + tid = tids_fid[box_idx] if box_idx < len(tids_fid) else None + team_from_reid = reid_team_per_frame[i][box_idx] if box_idx < len(reid_team_per_frame[i]) else None + if _G8 and tid is not None and tid >= 0 and team_from_reid: + if tid not in self._track_id_to_team_votes: + self._track_id_to_team_votes[tid] = {} + team_key = team_from_reid.strip() + self._track_id_to_team_votes[tid][team_key] = self._track_id_to_team_votes[tid].get(team_key, 0) + 1 + for fid in range(offset, offset + len(images)): + new_boxes = [] + tids_fid = track_ids_by_frame.get(fid, [None] * len(bboxes_by_frame[fid])) + fi = fid - offset + for box_idx, box in enumerate(bboxes_by_frame[fid]): + tid = tids_fid[box_idx] if box_idx < len(tids_fid) else None + team_from_reid = reid_team_per_frame[fi][box_idx] if fi < len(reid_team_per_frame) and box_idx < len(reid_team_per_frame[fi]) else None + default_team = team_from_reid or box.team_id + if _G8 and tid is not None and tid >= 0 and tid in self._track_id_to_team_votes and self._track_id_to_team_votes[tid]: + majority_team = max(self._track_id_to_team_votes[tid].items(), key=lambda x: x[1])[0] + else: + majority_team = default_team + new_boxes.append(_Bx(x1=box.x1, y1=box.y1, x2=box.x2, y2=box.y2, cls_id=box.cls_id, conf=box.conf, team_id=majority_team)) + bboxes_by_frame[fid] = new_boxes + track_ids_by_frame[fid] = tids_fid + _bbox_timings.append(("bbox_team_votes", time.perf_counter() - _t)) + if len(images) > 0: + _t = time.perf_counter() + H, W = images[0].shape[:2] + for fid in range(offset, offset + len(images)): + orig_boxes = bboxes_by_frame[fid] + orig_tids = track_ids_by_frame.get(fid, [None] * len(orig_boxes)) + adjusted = _a0( + orig_boxes, + frame_width=W, + frame_height=H, + do_goalkeeper_dedup=_B3, + do_referee_disambiguation=_B4, + do_ball_dedup=_B1, + ) + adjusted_tids: list[int | None] = [] + used_orig = set() + for ab in adjusted: + matched = None + for oi, ob in enumerate(orig_boxes): + if oi in used_orig: + continue + if ob.x1 == ab.x1 and ob.y1 == ab.y1 and ob.x2 == ab.x2 and ob.y2 == ab.y2: + matched = orig_tids[oi] if oi < len(orig_tids) else None + used_orig.add(oi) + break + adjusted_tids.append(matched) + if _B0 > 0: + new_adjusted = [] + new_adjusted_tids = [] + for ab, tid in zip(adjusted, adjusted_tids): + if int(ab.cls_id) == _C0 and float(ab.conf) < _B0: + continue + new_adjusted.append(ab) + new_adjusted_tids.append(tid) + adjusted = new_adjusted + adjusted_tids = new_adjusted_tids + if _q0 != 0.0 or _q1 != 0.0: + boxes_offset = [] + offset_tids = [] + for ab_idx, bb in enumerate(adjusted): + cx = 0.5 * (bb.x1 + bb.x2) + cy = 0.5 * (bb.y1 + bb.y2) + w = bb.x2 - bb.x1 + h = bb.y2 - bb.y1 + cx *= 1.0 + _q0 + cy *= 1.0 + _q1 + boxes_offset.append(_Bx(x1=int(round(cx - w/2)), y1=int(round(cy - h/2)), x2=int(round(cx + w/2)), y2=int(round(cy + h/2)), cls_id=bb.cls_id, conf=bb.conf, team_id=bb.team_id)) + offset_tids.append(adjusted_tids[ab_idx] if ab_idx < len(adjusted_tids) else None) + adjusted = boxes_offset + adjusted_tids = offset_tids + bboxes_by_frame[fid] = adjusted + track_ids_by_frame[fid] = adjusted_tids + _bbox_timings.append(("bbox_adjust_boxes", time.perf_counter() - _t)) + if _A0 and _S0 > 1 and len(images) > 0: + _t = time.perf_counter() + _tmp_results = [] + for fid in range(offset, offset + len(images)): + _boxes = bboxes_by_frame.get(fid, []) + _tmp_results.append( + _FRes( + frame_id=fid, + boxes=[{"x1": int(b.x1), "y1": int(b.y1), "x2": int(b.x2), "y2": int(b.y2), "cls_id": int(b.cls_id), "conf": round(float(b.conf), 2), "team_id": b.team_id} for b in _boxes], + keypoints=[], + ) + ) + _tmp_results = _s0(_tmp_results, window=_S0, tids_by_frame=track_ids_by_frame) + for r in _tmp_results: + bboxes_by_frame[int(r.frame_id)] = [_Bx(**box) for box in r.boxes] + _bbox_timings.append(("bbox_smoothing", time.perf_counter() - _t)) + _bbox_timings.append(("bbox_total", time.perf_counter() - _t0)) + self._current_batch_bbox_timings = _bbox_timings + return bboxes_by_frame + + def predict_batch( + self, + batch_images: list[ndarray], + offset: int, + n_keypoints: int, + ) -> list[_FRes]: + if not batch_images: + return [] + if offset == 0: + self.reset_for_new_video() + gc.collect() + try: + import torch + if torch.cuda.is_available(): + torch.cuda.empty_cache() + except Exception: + pass + images = list(batch_images) + n_frames = len(images) + imgsz = _D0 + conf = _D1 + executor = self._executor + default_kps = [[0.0, 0.0] for _ in range(n_keypoints)] + if _E0 and _E1 and _P0: + future_bbox = executor.submit(self._bbox_task, images, offset, imgsz, conf, _BX_BS) + future_kp = executor.submit(self._keypoint_hrnet_task, images, offset, n_keypoints) + bboxes_by_frame = future_bbox.result() + keypoints_by_frame = future_kp.result() + elif _E0 and _E1: + bboxes_by_frame = self._bbox_task(images, offset, imgsz, conf, _BX_BS) + keypoints_by_frame = self._keypoint_hrnet_task(images, offset, n_keypoints) + else: + if _E0: + bboxes_by_frame = self._bbox_task(images, offset, imgsz, conf, _BX_BS) + else: + bboxes_by_frame = {offset + i: [] for i in range(len(images))} + self._current_batch_bbox_timings = [] + if _E1: + keypoints_by_frame = self._keypoint_hrnet_task(images, offset, n_keypoints) + else: + keypoints_by_frame = {offset + i: list(default_kps) for i in range(len(images))} + self._current_batch_kp_timings = [] + if _STEP0_ENABLED and keypoints_by_frame: + _t = time.perf_counter() + for fid in list(keypoints_by_frame.keys()): + kps = keypoints_by_frame[fid] + if isinstance(kps, list) and len(kps) == _N0: + _step0_remove_close_keypoints(kps, _STEP0_PROXIMITY_PX) + self._current_batch_kp_timings.append(("kp_step0_remove_close", time.perf_counter() - _t)) + if _U0 and _E1 and keypoints_by_frame and n_keypoints == 32 and _N0 == 32: + template_img: ndarray | None = getattr(self, "_kp_template_cache", None) + if template_img is None: + template_img = _y0() + if template_img.size > 0 and template_img.sum() > 0: + self._kp_template_cache = template_img + else: + template_img = None + _t = time.perf_counter() + for idx in range(len(images)): + frame_id = offset + idx + kps = keypoints_by_frame.get(frame_id) + if not kps or len(kps) != 32: + continue + frame = images[idx] + frame_height, frame_width = frame.shape[:2] + if template_img is not None: + step5_out = _z0(kps, frame, template_img) + if step5_out is not None: + keypoints_by_frame[frame_id] = step5_out + if template_img is not None and _J1: + _z8(keypoints_by_frame, images, offset, template_img) + self._current_batch_kp_timings.append(("kp_homography", time.perf_counter() - _t)) + if _J4: + _t = time.perf_counter() + for idx in range(len(images)): + frame_id = offset + idx + kps = keypoints_by_frame.get(frame_id) + if not kps or len(kps) != 32: + continue + frame = images[idx] + frame_height, frame_width = frame.shape[:2] + adjusted = _z1(kps, frame_width, frame_height, _J0) + if adjusted is not None: + keypoints_by_frame[frame_id] = adjusted + self._current_batch_kp_timings.append(("kp_adjust", time.perf_counter() - _t)) + results = [] + for idx in range(len(images)): + frame_number = offset + idx + kps = keypoints_by_frame.get(frame_number, [[0.0, 0.0] for _ in range(n_keypoints)]) + if len(kps) != n_keypoints: + kps = (kps[:n_keypoints] if len(kps) >= n_keypoints else kps + [[0.0, 0.0]] * (n_keypoints - len(kps))) + kps = [[round(float(kp[0]), 1), round(float(kp[1]), 1)] for kp in kps] + boxes_raw = bboxes_by_frame.get(frame_number, []) + boxes_for_result = [ + { + "x1": int(b.x1), + "y1": int(b.y1), + "x2": int(b.x2), + "y2": int(b.y2), + "cls_id": _CLS_TO_VALIDATOR.get(int(b.cls_id), int(b.cls_id)), + "conf": round(float(b.conf), 2), + "team_id": b.team_id, + } + for b in boxes_raw + ] + results.append(_FRes(frame_id=frame_number, boxes=boxes_for_result, keypoints=kps)) + return results + +class _M: + def __init__(self, path_hf_repo: Path) -> None: + self.health = "Okay!!!" + self.pipeline: _Pl | None = None + self.path_hf_repo = Path(path_hf_repo) + + def __repr__(self) -> str: + return self.health + + def predict_batch( + self, + batch_images: list[ndarray], + offset: int, + n_keypoints: int, + ) -> list[_FRes]: + if self.pipeline is None: + self.pipeline = _Pl(repo_root=self.path_hf_repo) + return self.pipeline.predict_batch(batch_images, offset, n_keypoints) + + +Miner = _M \ No newline at end of file