Spaces:
Sleeping
Sleeping
Commit ·
c90b51c
1
Parent(s): 220e5fb
better clock reading, faster
Browse files- scripts/benchmark_ocr.py +540 -0
- scripts/detect_plays.py +2 -1
- scripts/diagnose_play_clock.py +210 -0
- scripts/visualize_detections.py +164 -2
- src/detectors/play_clock_reader.py +20 -11
- src/detectors/play_state_machine.py +49 -28
- src/detectors/scorebug_detector.py +159 -25
- src/pipeline/play_detector.py +60 -11
scripts/benchmark_ocr.py
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Benchmark different OCR methods for play clock reading.
|
| 4 |
+
|
| 5 |
+
This script compares:
|
| 6 |
+
1. Tesseract (current method)
|
| 7 |
+
2. EasyOCR (deep learning based)
|
| 8 |
+
3. Template matching (custom digit templates)
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python scripts/benchmark_ocr.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import logging
|
| 15 |
+
import sys
|
| 16 |
+
import time
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import List, Tuple, Optional, Dict, Any
|
| 19 |
+
|
| 20 |
+
import cv2
|
| 21 |
+
import numpy as np
|
| 22 |
+
|
| 23 |
+
# Add src to path for imports
|
| 24 |
+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
| 25 |
+
|
| 26 |
+
# pylint: disable=wrong-import-position
|
| 27 |
+
from detectors import ScorebugDetector
|
| 28 |
+
|
| 29 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 30 |
+
logger = logging.getLogger(__name__)
|
| 31 |
+
|
| 32 |
+
# Constants
|
| 33 |
+
VIDEO_PATH = Path(__file__).parent.parent / "full_videos" / "OSU vs Tenn 12.21.24.mkv"
|
| 34 |
+
TEMPLATE_PATH = Path(__file__).parent.parent / "data" / "templates" / "scorebug_template_main.png"
|
| 35 |
+
CONFIG_PATH = Path(__file__).parent.parent / "data" / "config" / "play_clock_region.json"
|
| 36 |
+
DIGIT_TEMPLATES_DIR = Path(__file__).parent.parent / "data" / "templates" / "digits"
|
| 37 |
+
|
| 38 |
+
# Test segment - sample frames with known clock values (30 frames)
|
| 39 |
+
TEST_TIMESTAMPS = [2320.0 + i for i in range(30)]
|
| 40 |
+
# Expected values based on countdown pattern: 18->17->...->12->40->40->40->39->...
|
| 41 |
+
# This is approximate - the real test will use Tesseract as ground truth
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def load_play_clock_config() -> Tuple[int, int, int, int]:
|
| 45 |
+
"""Load play clock region config."""
|
| 46 |
+
import json
|
| 47 |
+
|
| 48 |
+
with open(CONFIG_PATH, "r") as f:
|
| 49 |
+
data = json.load(f)
|
| 50 |
+
return (data["x_offset"], data["y_offset"], data["width"], data["height"])
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def extract_test_frames(
|
| 54 |
+
video_path: Path, detector: ScorebugDetector, timestamps: List[float]
|
| 55 |
+
) -> List[Tuple[float, np.ndarray, Tuple[int, int, int, int]]]:
|
| 56 |
+
"""Extract frames with scorebug for testing."""
|
| 57 |
+
cap = cv2.VideoCapture(str(video_path))
|
| 58 |
+
if not cap.isOpened():
|
| 59 |
+
raise ValueError(f"Could not open video: {video_path}")
|
| 60 |
+
|
| 61 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 62 |
+
frames = []
|
| 63 |
+
|
| 64 |
+
for ts in timestamps:
|
| 65 |
+
frame_number = int(ts * fps)
|
| 66 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
| 67 |
+
ret, frame = cap.read()
|
| 68 |
+
if not ret:
|
| 69 |
+
continue
|
| 70 |
+
|
| 71 |
+
detection = detector.detect(frame)
|
| 72 |
+
if detection.detected and detection.bbox:
|
| 73 |
+
frames.append((ts, frame, detection.bbox))
|
| 74 |
+
|
| 75 |
+
cap.release()
|
| 76 |
+
return frames
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def extract_play_clock_region(frame: np.ndarray, scorebug_bbox: Tuple[int, int, int, int], config: Tuple[int, int, int, int]) -> np.ndarray:
|
| 80 |
+
"""Extract play clock region from frame."""
|
| 81 |
+
sb_x, sb_y, sb_w, sb_h = scorebug_bbox
|
| 82 |
+
x_offset, y_offset, width, height = config
|
| 83 |
+
|
| 84 |
+
pc_x = sb_x + x_offset
|
| 85 |
+
pc_y = sb_y + y_offset
|
| 86 |
+
|
| 87 |
+
return frame[pc_y : pc_y + height, pc_x : pc_x + width].copy()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def preprocess_for_ocr(region: np.ndarray) -> np.ndarray:
|
| 91 |
+
"""Standard preprocessing for OCR."""
|
| 92 |
+
# Convert to grayscale
|
| 93 |
+
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
|
| 94 |
+
|
| 95 |
+
# Scale up
|
| 96 |
+
scale_factor = 4
|
| 97 |
+
scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
|
| 98 |
+
|
| 99 |
+
# Otsu's threshold
|
| 100 |
+
_, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 101 |
+
|
| 102 |
+
# Invert if needed (dark text on light background)
|
| 103 |
+
if np.mean(binary) < 128:
|
| 104 |
+
binary = cv2.bitwise_not(binary)
|
| 105 |
+
|
| 106 |
+
return binary
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
# ============================================================
|
| 110 |
+
# OCR Method 1: Tesseract (current baseline)
|
| 111 |
+
# ============================================================
|
| 112 |
+
def ocr_tesseract(region: np.ndarray) -> Tuple[Optional[int], float]:
|
| 113 |
+
"""Read digits using Tesseract."""
|
| 114 |
+
import pytesseract
|
| 115 |
+
|
| 116 |
+
preprocessed = preprocess_for_ocr(region)
|
| 117 |
+
|
| 118 |
+
# Add padding
|
| 119 |
+
padding = 10
|
| 120 |
+
preprocessed = cv2.copyMakeBorder(preprocessed, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
|
| 121 |
+
|
| 122 |
+
config = "--psm 7 -c tessedit_char_whitelist=0123456789"
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
data = pytesseract.image_to_data(preprocessed, config=config, output_type=pytesseract.Output.DICT)
|
| 126 |
+
|
| 127 |
+
best_text = ""
|
| 128 |
+
best_conf = 0.0
|
| 129 |
+
|
| 130 |
+
for i, text in enumerate(data["text"]):
|
| 131 |
+
conf = float(data["conf"][i])
|
| 132 |
+
if conf > best_conf and text.strip():
|
| 133 |
+
best_text = text.strip()
|
| 134 |
+
best_conf = conf
|
| 135 |
+
|
| 136 |
+
if best_text and best_text.isdigit():
|
| 137 |
+
value = int(best_text)
|
| 138 |
+
if 0 <= value <= 40:
|
| 139 |
+
return value, best_conf / 100.0
|
| 140 |
+
|
| 141 |
+
except Exception as e:
|
| 142 |
+
logger.debug(f"Tesseract error: {e}")
|
| 143 |
+
|
| 144 |
+
return None, 0.0
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ============================================================
|
| 148 |
+
# OCR Method 2: EasyOCR
|
| 149 |
+
# ============================================================
|
| 150 |
+
_easyocr_reader = None
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def get_easyocr_reader():
|
| 154 |
+
"""Lazy-load EasyOCR reader."""
|
| 155 |
+
global _easyocr_reader
|
| 156 |
+
if _easyocr_reader is None:
|
| 157 |
+
try:
|
| 158 |
+
import easyocr
|
| 159 |
+
|
| 160 |
+
_easyocr_reader = easyocr.Reader(["en"], gpu=False) # CPU mode for fair comparison
|
| 161 |
+
logger.info("EasyOCR reader initialized")
|
| 162 |
+
except ImportError:
|
| 163 |
+
logger.warning("EasyOCR not installed. Install with: pip install easyocr")
|
| 164 |
+
return None
|
| 165 |
+
return _easyocr_reader
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
def ocr_easyocr(region: np.ndarray) -> Tuple[Optional[int], float]:
|
| 169 |
+
"""Read digits using EasyOCR."""
|
| 170 |
+
reader = get_easyocr_reader()
|
| 171 |
+
if reader is None:
|
| 172 |
+
return None, 0.0
|
| 173 |
+
|
| 174 |
+
preprocessed = preprocess_for_ocr(region)
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
# EasyOCR expects BGR or grayscale
|
| 178 |
+
results = reader.readtext(preprocessed, allowlist="0123456789", detail=1)
|
| 179 |
+
|
| 180 |
+
if results:
|
| 181 |
+
# Get highest confidence result
|
| 182 |
+
best_result = max(results, key=lambda x: x[2])
|
| 183 |
+
text = best_result[1].strip()
|
| 184 |
+
conf = best_result[2]
|
| 185 |
+
|
| 186 |
+
if text.isdigit():
|
| 187 |
+
value = int(text)
|
| 188 |
+
if 0 <= value <= 40:
|
| 189 |
+
return value, conf
|
| 190 |
+
|
| 191 |
+
except Exception as e:
|
| 192 |
+
logger.debug(f"EasyOCR error: {e}")
|
| 193 |
+
|
| 194 |
+
return None, 0.0
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
# ============================================================
|
| 198 |
+
# OCR Method 3: Template Matching for Digits
|
| 199 |
+
# ============================================================
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
class DigitTemplateMatcher:
|
| 203 |
+
"""Fast digit recognition using template matching."""
|
| 204 |
+
|
| 205 |
+
def __init__(self):
|
| 206 |
+
self.digit_templates: Dict[str, np.ndarray] = {}
|
| 207 |
+
self._calibrated = False
|
| 208 |
+
|
| 209 |
+
def calibrate_from_tesseract(self, regions: List[np.ndarray]) -> bool:
|
| 210 |
+
"""
|
| 211 |
+
Calibrate digit templates using Tesseract as ground truth on first few frames.
|
| 212 |
+
|
| 213 |
+
This extracts individual digit images from frames where Tesseract successfully reads values.
|
| 214 |
+
"""
|
| 215 |
+
logger.info("Calibrating digit templates from Tesseract readings...")
|
| 216 |
+
|
| 217 |
+
for region in regions:
|
| 218 |
+
# Get Tesseract reading as ground truth
|
| 219 |
+
value, conf = ocr_tesseract(region)
|
| 220 |
+
if value is None or conf < 0.7:
|
| 221 |
+
continue
|
| 222 |
+
|
| 223 |
+
# Preprocess and extract digit regions
|
| 224 |
+
preprocessed = preprocess_for_ocr(region)
|
| 225 |
+
h, w = preprocessed.shape
|
| 226 |
+
|
| 227 |
+
# Find digit contours
|
| 228 |
+
contours, _ = cv2.findContours(cv2.bitwise_not(preprocessed), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 229 |
+
|
| 230 |
+
if not contours:
|
| 231 |
+
continue
|
| 232 |
+
|
| 233 |
+
# Get bounding boxes sorted left-to-right
|
| 234 |
+
boxes = [cv2.boundingRect(c) for c in contours]
|
| 235 |
+
boxes = [(x, y, bw, bh) for x, y, bw, bh in boxes if bh > h * 0.3] # Filter small noise
|
| 236 |
+
boxes.sort(key=lambda b: b[0]) # Sort by x position
|
| 237 |
+
|
| 238 |
+
# Extract digits based on value
|
| 239 |
+
value_str = str(value)
|
| 240 |
+
if len(boxes) != len(value_str):
|
| 241 |
+
continue # Mismatch, skip
|
| 242 |
+
|
| 243 |
+
for i, (x, y, bw, bh) in enumerate(boxes):
|
| 244 |
+
digit = value_str[i]
|
| 245 |
+
# Add padding around digit
|
| 246 |
+
pad = 4
|
| 247 |
+
x1 = max(0, x - pad)
|
| 248 |
+
y1 = max(0, y - pad)
|
| 249 |
+
x2 = min(w, x + bw + pad)
|
| 250 |
+
y2 = min(h, y + bh + pad)
|
| 251 |
+
|
| 252 |
+
digit_img = preprocessed[y1:y2, x1:x2]
|
| 253 |
+
|
| 254 |
+
# Store template (keep best quality one per digit)
|
| 255 |
+
if digit not in self.digit_templates or digit_img.shape[0] * digit_img.shape[1] > self.digit_templates[digit].shape[0] * self.digit_templates[digit].shape[1]:
|
| 256 |
+
self.digit_templates[digit] = digit_img.copy()
|
| 257 |
+
|
| 258 |
+
# Check if we have all digits we need (0-4 for tens, 0-9 for ones)
|
| 259 |
+
if all(str(d) in self.digit_templates for d in range(10)):
|
| 260 |
+
break
|
| 261 |
+
|
| 262 |
+
logger.info(f" Calibrated templates for digits: {sorted(self.digit_templates.keys())}")
|
| 263 |
+
self._calibrated = len(self.digit_templates) >= 5 # At least 0-4 for play clock
|
| 264 |
+
|
| 265 |
+
return self._calibrated
|
| 266 |
+
|
| 267 |
+
def read(self, region: np.ndarray) -> Tuple[Optional[int], float]:
|
| 268 |
+
"""Read digits using template matching."""
|
| 269 |
+
if not self._calibrated:
|
| 270 |
+
return None, 0.0
|
| 271 |
+
|
| 272 |
+
preprocessed = preprocess_for_ocr(region)
|
| 273 |
+
h, w = preprocessed.shape
|
| 274 |
+
|
| 275 |
+
# Find digit contours
|
| 276 |
+
contours, _ = cv2.findContours(cv2.bitwise_not(preprocessed), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 277 |
+
|
| 278 |
+
if not contours:
|
| 279 |
+
return None, 0.0
|
| 280 |
+
|
| 281 |
+
# Get bounding boxes sorted left-to-right
|
| 282 |
+
boxes = [cv2.boundingRect(c) for c in contours]
|
| 283 |
+
boxes = [(x, y, bw, bh) for x, y, bw, bh in boxes if bh > h * 0.3] # Filter noise
|
| 284 |
+
boxes.sort(key=lambda b: b[0])
|
| 285 |
+
|
| 286 |
+
if not boxes:
|
| 287 |
+
return None, 0.0
|
| 288 |
+
|
| 289 |
+
# Match each digit region to templates
|
| 290 |
+
digits = []
|
| 291 |
+
total_conf = 0.0
|
| 292 |
+
|
| 293 |
+
for x, y, bw, bh in boxes:
|
| 294 |
+
# Extract digit with padding
|
| 295 |
+
pad = 4
|
| 296 |
+
x1 = max(0, x - pad)
|
| 297 |
+
y1 = max(0, y - pad)
|
| 298 |
+
x2 = min(w, x + bw + pad)
|
| 299 |
+
y2 = min(h, y + bh + pad)
|
| 300 |
+
|
| 301 |
+
digit_img = preprocessed[y1:y2, x1:x2]
|
| 302 |
+
|
| 303 |
+
# Match against all templates
|
| 304 |
+
best_digit = None
|
| 305 |
+
best_conf = 0.0
|
| 306 |
+
|
| 307 |
+
for digit, template in self.digit_templates.items():
|
| 308 |
+
# Resize template to match digit height
|
| 309 |
+
if template.shape[0] == 0 or digit_img.shape[0] == 0:
|
| 310 |
+
continue
|
| 311 |
+
|
| 312 |
+
scale = digit_img.shape[0] / template.shape[0]
|
| 313 |
+
new_w = max(1, int(template.shape[1] * scale))
|
| 314 |
+
resized = cv2.resize(template, (new_w, digit_img.shape[0]), interpolation=cv2.INTER_LINEAR)
|
| 315 |
+
|
| 316 |
+
# Pad smaller image to match sizes for comparison
|
| 317 |
+
digit_img_padded = digit_img
|
| 318 |
+
if resized.shape[1] < digit_img.shape[1]:
|
| 319 |
+
diff = digit_img.shape[1] - resized.shape[1]
|
| 320 |
+
resized = cv2.copyMakeBorder(resized, 0, 0, diff // 2, diff - diff // 2, cv2.BORDER_CONSTANT, value=255)
|
| 321 |
+
elif digit_img.shape[1] < resized.shape[1]:
|
| 322 |
+
diff = resized.shape[1] - digit_img.shape[1]
|
| 323 |
+
digit_img_padded = cv2.copyMakeBorder(digit_img, 0, 0, diff // 2, diff - diff // 2, cv2.BORDER_CONSTANT, value=255)
|
| 324 |
+
|
| 325 |
+
# Ensure same size
|
| 326 |
+
min_h = min(resized.shape[0], digit_img_padded.shape[0])
|
| 327 |
+
min_w = min(resized.shape[1], digit_img_padded.shape[1])
|
| 328 |
+
resized = resized[:min_h, :min_w]
|
| 329 |
+
digit_img_padded = digit_img_padded[:min_h, :min_w]
|
| 330 |
+
|
| 331 |
+
# Calculate normalized cross-correlation
|
| 332 |
+
if resized.size == 0 or digit_img_padded.size == 0:
|
| 333 |
+
continue
|
| 334 |
+
|
| 335 |
+
# Simple pixel difference score
|
| 336 |
+
diff = np.abs(resized.astype(float) - digit_img_padded.astype(float))
|
| 337 |
+
score = 1.0 - (np.mean(diff) / 255.0)
|
| 338 |
+
|
| 339 |
+
if score > best_conf:
|
| 340 |
+
best_conf = score
|
| 341 |
+
best_digit = digit
|
| 342 |
+
|
| 343 |
+
if best_digit is not None and best_conf > 0.5:
|
| 344 |
+
digits.append(best_digit)
|
| 345 |
+
total_conf += best_conf
|
| 346 |
+
|
| 347 |
+
if not digits:
|
| 348 |
+
return None, 0.0
|
| 349 |
+
|
| 350 |
+
# Combine digits into number
|
| 351 |
+
try:
|
| 352 |
+
value = int("".join(digits))
|
| 353 |
+
avg_conf = total_conf / len(digits)
|
| 354 |
+
if 0 <= value <= 40:
|
| 355 |
+
return value, avg_conf
|
| 356 |
+
except ValueError:
|
| 357 |
+
pass
|
| 358 |
+
|
| 359 |
+
return None, 0.0
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
_digit_matcher = None
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
def get_digit_matcher() -> DigitTemplateMatcher:
|
| 366 |
+
"""Get or create digit template matcher."""
|
| 367 |
+
global _digit_matcher
|
| 368 |
+
if _digit_matcher is None:
|
| 369 |
+
_digit_matcher = DigitTemplateMatcher()
|
| 370 |
+
return _digit_matcher
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def ocr_template_matching(region: np.ndarray) -> Tuple[Optional[int], float]:
|
| 374 |
+
"""Read digits using template matching."""
|
| 375 |
+
matcher = get_digit_matcher()
|
| 376 |
+
return matcher.read(region)
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# ============================================================
|
| 380 |
+
# Benchmark Runner
|
| 381 |
+
# ============================================================
|
| 382 |
+
def run_benchmark(frames: List[Tuple[float, np.ndarray, Tuple[int, int, int, int]]], config: Tuple[int, int, int, int]) -> None:
|
| 383 |
+
"""Run benchmark comparing OCR methods."""
|
| 384 |
+
logger.info("=" * 60)
|
| 385 |
+
logger.info("OCR BENCHMARK")
|
| 386 |
+
logger.info("=" * 60)
|
| 387 |
+
logger.info(f"Testing {len(frames)} frames")
|
| 388 |
+
|
| 389 |
+
# Extract play clock regions
|
| 390 |
+
regions = []
|
| 391 |
+
for ts, frame, scorebug_bbox in frames:
|
| 392 |
+
region = extract_play_clock_region(frame, scorebug_bbox, config)
|
| 393 |
+
regions.append((ts, region))
|
| 394 |
+
|
| 395 |
+
# Method 1: Tesseract (baseline - also used for ground truth)
|
| 396 |
+
logger.info("")
|
| 397 |
+
logger.info("-" * 60)
|
| 398 |
+
logger.info("Method 1: Tesseract (baseline)")
|
| 399 |
+
logger.info("-" * 60)
|
| 400 |
+
|
| 401 |
+
tesseract_results = []
|
| 402 |
+
t_start = time.perf_counter()
|
| 403 |
+
for ts, region in regions:
|
| 404 |
+
value, conf = ocr_tesseract(region)
|
| 405 |
+
tesseract_results.append((ts, value, conf))
|
| 406 |
+
tesseract_time = time.perf_counter() - t_start
|
| 407 |
+
|
| 408 |
+
tesseract_success = sum(1 for _, v, _ in tesseract_results if v is not None)
|
| 409 |
+
logger.info(f" Success rate: {tesseract_success}/{len(regions)} ({100*tesseract_success/len(regions):.1f}%)")
|
| 410 |
+
logger.info(f" Total time: {tesseract_time:.3f}s")
|
| 411 |
+
logger.info(f" Per-frame: {1000*tesseract_time/len(regions):.1f}ms")
|
| 412 |
+
logger.info(f" Values: {[v for _, v, _ in tesseract_results]}")
|
| 413 |
+
|
| 414 |
+
# Use Tesseract results as ground truth for accuracy comparison
|
| 415 |
+
ground_truth = {ts: v for ts, v, _ in tesseract_results if v is not None}
|
| 416 |
+
|
| 417 |
+
# Method 2: EasyOCR
|
| 418 |
+
logger.info("")
|
| 419 |
+
logger.info("-" * 60)
|
| 420 |
+
logger.info("Method 2: EasyOCR")
|
| 421 |
+
logger.info("-" * 60)
|
| 422 |
+
|
| 423 |
+
reader = get_easyocr_reader()
|
| 424 |
+
easyocr_time = 0
|
| 425 |
+
easyocr_success = 0
|
| 426 |
+
easyocr_accuracy = 0
|
| 427 |
+
|
| 428 |
+
if reader:
|
| 429 |
+
easyocr_results = []
|
| 430 |
+
t_start = time.perf_counter()
|
| 431 |
+
for ts, region in regions:
|
| 432 |
+
value, conf = ocr_easyocr(region)
|
| 433 |
+
easyocr_results.append((ts, value, conf))
|
| 434 |
+
easyocr_time = time.perf_counter() - t_start
|
| 435 |
+
|
| 436 |
+
easyocr_success = sum(1 for _, v, _ in easyocr_results if v is not None)
|
| 437 |
+
# Calculate accuracy vs ground truth
|
| 438 |
+
easyocr_correct = sum(1 for ts, v, _ in easyocr_results if ts in ground_truth and v == ground_truth[ts])
|
| 439 |
+
easyocr_accuracy = easyocr_correct / len(ground_truth) * 100 if ground_truth else 0
|
| 440 |
+
|
| 441 |
+
logger.info(f" Success rate: {easyocr_success}/{len(regions)} ({100*easyocr_success/len(regions):.1f}%)")
|
| 442 |
+
logger.info(f" Accuracy vs Tesseract: {easyocr_correct}/{len(ground_truth)} ({easyocr_accuracy:.1f}%)")
|
| 443 |
+
logger.info(f" Total time: {easyocr_time:.3f}s")
|
| 444 |
+
logger.info(f" Per-frame: {1000*easyocr_time/len(regions):.1f}ms")
|
| 445 |
+
logger.info(f" Speedup vs Tesseract: {tesseract_time/easyocr_time:.2f}x")
|
| 446 |
+
logger.info(f" Values: {[v for _, v, _ in easyocr_results]}")
|
| 447 |
+
else:
|
| 448 |
+
logger.info(" SKIPPED (EasyOCR not installed)")
|
| 449 |
+
|
| 450 |
+
# Method 3: Template Matching
|
| 451 |
+
logger.info("")
|
| 452 |
+
logger.info("-" * 60)
|
| 453 |
+
logger.info("Method 3: Template Matching")
|
| 454 |
+
logger.info("-" * 60)
|
| 455 |
+
|
| 456 |
+
matcher = get_digit_matcher()
|
| 457 |
+
|
| 458 |
+
# Calibrate using first 10 regions (not counted in benchmark time)
|
| 459 |
+
calibration_regions = [r for _, r in regions[:10]]
|
| 460 |
+
if matcher.calibrate_from_tesseract(calibration_regions):
|
| 461 |
+
template_results = []
|
| 462 |
+
t_start = time.perf_counter()
|
| 463 |
+
for ts, region in regions:
|
| 464 |
+
value, conf = ocr_template_matching(region)
|
| 465 |
+
template_results.append((ts, value, conf))
|
| 466 |
+
template_time = time.perf_counter() - t_start
|
| 467 |
+
|
| 468 |
+
template_success = sum(1 for _, v, _ in template_results if v is not None)
|
| 469 |
+
template_correct = sum(1 for ts, v, _ in template_results if ts in ground_truth and v == ground_truth[ts])
|
| 470 |
+
template_accuracy = template_correct / len(ground_truth) * 100 if ground_truth else 0
|
| 471 |
+
|
| 472 |
+
logger.info(f" Success rate: {template_success}/{len(regions)} ({100*template_success/len(regions):.1f}%)")
|
| 473 |
+
logger.info(f" Accuracy vs Tesseract: {template_correct}/{len(ground_truth)} ({template_accuracy:.1f}%)")
|
| 474 |
+
logger.info(f" Total time: {template_time:.3f}s")
|
| 475 |
+
logger.info(f" Per-frame: {1000*template_time/len(regions):.1f}ms")
|
| 476 |
+
logger.info(f" Speedup vs Tesseract: {tesseract_time/template_time:.2f}x")
|
| 477 |
+
logger.info(f" Values: {[v for _, v, _ in template_results]}")
|
| 478 |
+
else:
|
| 479 |
+
logger.info(" SKIPPED (calibration failed)")
|
| 480 |
+
template_time = 0
|
| 481 |
+
template_success = 0
|
| 482 |
+
template_accuracy = 0
|
| 483 |
+
|
| 484 |
+
# Summary
|
| 485 |
+
logger.info("")
|
| 486 |
+
logger.info("=" * 60)
|
| 487 |
+
logger.info("SUMMARY")
|
| 488 |
+
logger.info("=" * 60)
|
| 489 |
+
logger.info(f"{'Method':<20} {'Time/frame':<12} {'Success':<12} {'Accuracy':<12} {'Speedup':<10}")
|
| 490 |
+
logger.info("-" * 66)
|
| 491 |
+
logger.info(f"{'Tesseract':<20} {f'{1000*tesseract_time/len(regions):.1f}ms':<12} {f'{tesseract_success}/{len(regions)}':<12} {'(baseline)':<12} {'1.00x':<10}")
|
| 492 |
+
if reader and easyocr_time > 0:
|
| 493 |
+
logger.info(f"{'EasyOCR':<20} {f'{1000*easyocr_time/len(regions):.1f}ms':<12} {f'{easyocr_success}/{len(regions)}':<12} {f'{easyocr_accuracy:.1f}%':<12} {f'{tesseract_time/easyocr_time:.2f}x':<10}")
|
| 494 |
+
if template_time > 0:
|
| 495 |
+
logger.info(f"{'Template Matching':<20} {f'{1000*template_time/len(regions):.1f}ms':<12} {f'{template_success}/{len(regions)}':<12} {f'{template_accuracy:.1f}%':<12} {f'{tesseract_time/template_time:.2f}x':<10}")
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
def main():
|
| 499 |
+
"""Main entry point."""
|
| 500 |
+
logger.info("OCR Benchmark Tool")
|
| 501 |
+
logger.info("=" * 60)
|
| 502 |
+
|
| 503 |
+
# Verify paths
|
| 504 |
+
if not VIDEO_PATH.exists():
|
| 505 |
+
logger.error(f"Video not found: {VIDEO_PATH}")
|
| 506 |
+
return 1
|
| 507 |
+
|
| 508 |
+
if not TEMPLATE_PATH.exists():
|
| 509 |
+
logger.error(f"Template not found: {TEMPLATE_PATH}")
|
| 510 |
+
return 1
|
| 511 |
+
|
| 512 |
+
if not CONFIG_PATH.exists():
|
| 513 |
+
logger.error(f"Config not found: {CONFIG_PATH}")
|
| 514 |
+
return 1
|
| 515 |
+
|
| 516 |
+
# Load config
|
| 517 |
+
config = load_play_clock_config()
|
| 518 |
+
logger.info(f"Play clock config: {config}")
|
| 519 |
+
|
| 520 |
+
# Initialize scorebug detector
|
| 521 |
+
detector = ScorebugDetector(template_path=str(TEMPLATE_PATH))
|
| 522 |
+
|
| 523 |
+
# Extract test frames
|
| 524 |
+
logger.info(f"Extracting {len(TEST_TIMESTAMPS)} test frames...")
|
| 525 |
+
frames = extract_test_frames(VIDEO_PATH, detector, TEST_TIMESTAMPS)
|
| 526 |
+
logger.info(f"Extracted {len(frames)} frames with scorebug")
|
| 527 |
+
|
| 528 |
+
if not frames:
|
| 529 |
+
logger.error("No frames with scorebug found!")
|
| 530 |
+
return 1
|
| 531 |
+
|
| 532 |
+
# Run benchmark
|
| 533 |
+
run_benchmark(frames, config)
|
| 534 |
+
|
| 535 |
+
return 0
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
if __name__ == "__main__":
|
| 539 |
+
sys.exit(main())
|
| 540 |
+
|
scripts/detect_plays.py
CHANGED
|
@@ -95,7 +95,8 @@ def main():
|
|
| 95 |
parser.add_argument("--output", type=str, help="Output JSON file path")
|
| 96 |
|
| 97 |
# Processing options
|
| 98 |
-
|
|
|
|
| 99 |
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
| 100 |
|
| 101 |
args = parser.parse_args()
|
|
|
|
| 95 |
parser.add_argument("--output", type=str, help="Output JSON file path")
|
| 96 |
|
| 97 |
# Processing options
|
| 98 |
+
# Play clock only changes once per second, so 0.5s (2 fps) is sufficient and much faster
|
| 99 |
+
parser.add_argument("--interval", type=float, default=0.5, help="Frame sampling interval in seconds (default: 0.5)")
|
| 100 |
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
| 101 |
|
| 102 |
args = parser.parse_args()
|
scripts/diagnose_play_clock.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Diagnostic script to visualize play clock region extraction and preprocessing.
|
| 4 |
+
|
| 5 |
+
This script extracts a few frames and saves debug images showing:
|
| 6 |
+
1. The full frame with scorebug and play clock region highlighted
|
| 7 |
+
2. The extracted play clock region (raw)
|
| 8 |
+
3. The preprocessed play clock region (what OCR sees)
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python scripts/diagnose_play_clock.py
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
import logging
|
| 15 |
+
import sys
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
|
| 18 |
+
import cv2
|
| 19 |
+
import numpy as np
|
| 20 |
+
|
| 21 |
+
# Add src to path for imports
|
| 22 |
+
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
| 23 |
+
|
| 24 |
+
# pylint: disable=wrong-import-position
|
| 25 |
+
from detectors import ScorebugDetector, PlayClockReader
|
| 26 |
+
|
| 27 |
+
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
|
| 30 |
+
# Constants
|
| 31 |
+
VIDEO_PATH = Path(__file__).parent.parent / "full_videos" / "OSU vs Tenn 12.21.24.mkv"
|
| 32 |
+
TEMPLATE_PATH = Path(__file__).parent.parent / "data" / "templates" / "scorebug_template_main.png"
|
| 33 |
+
CONFIG_PATH = Path(__file__).parent.parent / "data" / "config" / "play_clock_region.json"
|
| 34 |
+
OUTPUT_DIR = Path(__file__).parent.parent / "output" / "debug"
|
| 35 |
+
|
| 36 |
+
# Test at 38:40 - a known segment with plays
|
| 37 |
+
TEST_TIMESTAMPS = [2320.0, 2321.0, 2322.0, 2325.0, 2328.0] # Sample timestamps in seconds
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def extract_debug_info(video_path: Path, detector: ScorebugDetector, reader: PlayClockReader, timestamps: list) -> None:
|
| 41 |
+
"""
|
| 42 |
+
Extract frames and save debug visualizations.
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
video_path: Path to video file
|
| 46 |
+
detector: ScorebugDetector instance
|
| 47 |
+
reader: PlayClockReader instance
|
| 48 |
+
timestamps: List of timestamps to analyze
|
| 49 |
+
"""
|
| 50 |
+
cap = cv2.VideoCapture(str(video_path))
|
| 51 |
+
if not cap.isOpened():
|
| 52 |
+
raise ValueError("Could not open video: %s" % video_path)
|
| 53 |
+
|
| 54 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 55 |
+
logger.info("Video FPS: %.2f", fps)
|
| 56 |
+
|
| 57 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 58 |
+
|
| 59 |
+
for timestamp in timestamps:
|
| 60 |
+
# Seek to timestamp
|
| 61 |
+
frame_number = int(timestamp * fps)
|
| 62 |
+
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
| 63 |
+
|
| 64 |
+
ret, frame = cap.read()
|
| 65 |
+
if not ret:
|
| 66 |
+
logger.warning("Could not read frame at %.1fs", timestamp)
|
| 67 |
+
continue
|
| 68 |
+
|
| 69 |
+
# Detect scorebug
|
| 70 |
+
detection = detector.detect(frame)
|
| 71 |
+
if not detection.detected or not detection.bbox:
|
| 72 |
+
logger.warning("No scorebug at %.1fs", timestamp)
|
| 73 |
+
continue
|
| 74 |
+
|
| 75 |
+
sb_x, sb_y, sb_w, sb_h = detection.bbox
|
| 76 |
+
logger.info("Frame %.1fs: Scorebug at (%d, %d, %d, %d) conf=%.2f", timestamp, sb_x, sb_y, sb_w, sb_h, detection.confidence)
|
| 77 |
+
|
| 78 |
+
# Get play clock config
|
| 79 |
+
config = reader.config
|
| 80 |
+
if config is None:
|
| 81 |
+
logger.error("No play clock config loaded")
|
| 82 |
+
continue
|
| 83 |
+
|
| 84 |
+
# Calculate play clock region in absolute coordinates
|
| 85 |
+
pc_x = sb_x + config.x_offset
|
| 86 |
+
pc_y = sb_y + config.y_offset
|
| 87 |
+
pc_w = config.width
|
| 88 |
+
pc_h = config.height
|
| 89 |
+
logger.info("Play clock region: (%d, %d, %d, %d)", pc_x, pc_y, pc_w, pc_h)
|
| 90 |
+
|
| 91 |
+
# Extract play clock region
|
| 92 |
+
play_clock_region = frame[pc_y : pc_y + pc_h, pc_x : pc_x + pc_w].copy()
|
| 93 |
+
|
| 94 |
+
# Preprocess for OCR (same as PlayClockReader)
|
| 95 |
+
preprocessed = preprocess_for_debug(play_clock_region)
|
| 96 |
+
|
| 97 |
+
# Run OCR and get result
|
| 98 |
+
reading = reader.read(frame, detection.bbox)
|
| 99 |
+
|
| 100 |
+
# Create debug visualization
|
| 101 |
+
debug_frame = frame.copy()
|
| 102 |
+
|
| 103 |
+
# Draw scorebug bbox (blue)
|
| 104 |
+
cv2.rectangle(debug_frame, (sb_x, sb_y), (sb_x + sb_w, sb_y + sb_h), (255, 0, 0), 2)
|
| 105 |
+
cv2.putText(debug_frame, "Scorebug", (sb_x, sb_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
|
| 106 |
+
|
| 107 |
+
# Draw play clock region (green if detected, red otherwise)
|
| 108 |
+
pc_color = (0, 255, 0) if reading.detected else (0, 0, 255)
|
| 109 |
+
cv2.rectangle(debug_frame, (pc_x, pc_y), (pc_x + pc_w, pc_y + pc_h), pc_color, 2)
|
| 110 |
+
|
| 111 |
+
# Add text showing OCR result
|
| 112 |
+
if reading.detected:
|
| 113 |
+
text = "Clock: %d (%.0f%%)" % (reading.value, reading.confidence * 100)
|
| 114 |
+
else:
|
| 115 |
+
text = "FAILED: '%s'" % reading.raw_text
|
| 116 |
+
cv2.putText(debug_frame, text, (pc_x, pc_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, pc_color, 2)
|
| 117 |
+
|
| 118 |
+
# Save outputs
|
| 119 |
+
ts_str = "%.0f" % timestamp
|
| 120 |
+
|
| 121 |
+
# Save full debug frame
|
| 122 |
+
cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_full.png" % ts_str)), debug_frame)
|
| 123 |
+
|
| 124 |
+
# Save cropped scorebug region
|
| 125 |
+
scorebug_crop = frame[sb_y : sb_y + sb_h, sb_x : sb_x + sb_w].copy()
|
| 126 |
+
cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_scorebug.png" % ts_str)), scorebug_crop)
|
| 127 |
+
|
| 128 |
+
# Save play clock region (raw and scaled)
|
| 129 |
+
cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_raw.png" % ts_str)), play_clock_region)
|
| 130 |
+
|
| 131 |
+
# Scale up raw for easier viewing
|
| 132 |
+
scaled_raw = cv2.resize(play_clock_region, None, fx=4, fy=4, interpolation=cv2.INTER_NEAREST)
|
| 133 |
+
cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_scaled.png" % ts_str)), scaled_raw)
|
| 134 |
+
|
| 135 |
+
# Save preprocessed (what OCR sees)
|
| 136 |
+
cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_preprocessed.png" % ts_str)), preprocessed)
|
| 137 |
+
|
| 138 |
+
logger.info("Saved debug images for frame %.1fs", timestamp)
|
| 139 |
+
logger.info(" OCR Result: detected=%s, value=%s, conf=%.2f, raw='%s'", reading.detected, reading.value, reading.confidence, reading.raw_text)
|
| 140 |
+
|
| 141 |
+
cap.release()
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def preprocess_for_debug(region: np.ndarray) -> np.ndarray:
|
| 145 |
+
"""
|
| 146 |
+
Preprocess the play clock region for OCR (same as PlayClockReader).
|
| 147 |
+
Returns the preprocessed image for debugging.
|
| 148 |
+
"""
|
| 149 |
+
# Convert to grayscale
|
| 150 |
+
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
|
| 151 |
+
|
| 152 |
+
# Scale up by 4x for better OCR accuracy on small digits
|
| 153 |
+
scale_factor = 4
|
| 154 |
+
scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
|
| 155 |
+
|
| 156 |
+
# Use Otsu's thresholding - works better for high-contrast scorebug displays
|
| 157 |
+
_, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 158 |
+
|
| 159 |
+
# Determine if we need to invert (Tesseract prefers dark text on light background)
|
| 160 |
+
mean_intensity = np.mean(binary)
|
| 161 |
+
if mean_intensity < 128:
|
| 162 |
+
# Image is mostly dark (light digits on dark background) - invert for Tesseract
|
| 163 |
+
binary = cv2.bitwise_not(binary)
|
| 164 |
+
|
| 165 |
+
# Apply morphological operations to clean up noise
|
| 166 |
+
kernel = np.ones((2, 2), np.uint8)
|
| 167 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
|
| 168 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
|
| 169 |
+
|
| 170 |
+
# Add padding
|
| 171 |
+
padding = 10
|
| 172 |
+
binary = cv2.copyMakeBorder(binary, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
|
| 173 |
+
|
| 174 |
+
return binary
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def main():
|
| 178 |
+
"""Main entry point for play clock diagnostic."""
|
| 179 |
+
logger.info("Play Clock Diagnostic Tool")
|
| 180 |
+
logger.info("=" * 50)
|
| 181 |
+
|
| 182 |
+
# Verify paths
|
| 183 |
+
if not VIDEO_PATH.exists():
|
| 184 |
+
logger.error("Video not found: %s", VIDEO_PATH)
|
| 185 |
+
return 1
|
| 186 |
+
|
| 187 |
+
if not TEMPLATE_PATH.exists():
|
| 188 |
+
logger.error("Template not found: %s", TEMPLATE_PATH)
|
| 189 |
+
return 1
|
| 190 |
+
|
| 191 |
+
if not CONFIG_PATH.exists():
|
| 192 |
+
logger.error("Config not found: %s", CONFIG_PATH)
|
| 193 |
+
return 1
|
| 194 |
+
|
| 195 |
+
# Initialize
|
| 196 |
+
logger.info("Initializing detectors...")
|
| 197 |
+
detector = ScorebugDetector(template_path=str(TEMPLATE_PATH))
|
| 198 |
+
reader = PlayClockReader(region_config_path=str(CONFIG_PATH))
|
| 199 |
+
|
| 200 |
+
# Run diagnostic
|
| 201 |
+
logger.info("Extracting debug info for %d timestamps...", len(TEST_TIMESTAMPS))
|
| 202 |
+
extract_debug_info(VIDEO_PATH, detector, reader, TEST_TIMESTAMPS)
|
| 203 |
+
|
| 204 |
+
logger.info("Debug images saved to: %s", OUTPUT_DIR)
|
| 205 |
+
logger.info("Diagnostic complete!")
|
| 206 |
+
return 0
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
sys.exit(main())
|
scripts/visualize_detections.py
CHANGED
|
@@ -271,9 +271,130 @@ def create_timeline_image(plays: List[Dict], segment_start: float, segment_end:
|
|
| 271 |
logger.info("Timeline saved to: %s", output_path)
|
| 272 |
|
| 273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> None:
|
| 275 |
"""
|
| 276 |
-
Generate video clips for each detected play.
|
| 277 |
|
| 278 |
Args:
|
| 279 |
results: Detection results
|
|
@@ -351,6 +472,37 @@ def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: st
|
|
| 351 |
logger.info("Clip generation complete!")
|
| 352 |
|
| 353 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
def main():
|
| 355 |
"""Main entry point."""
|
| 356 |
parser = argparse.ArgumentParser(description="Visualize play detection results")
|
|
@@ -359,6 +511,8 @@ def main():
|
|
| 359 |
parser.add_argument("--ground-truth", type=str, help="Path to ground truth JSON file")
|
| 360 |
parser.add_argument("--video", type=str, help="Path to video file (for clip generation)")
|
| 361 |
parser.add_argument("--generate-clips", action="store_true", help="Generate video clips for each play")
|
|
|
|
|
|
|
| 362 |
parser.add_argument("--output-dir", type=str, help="Output directory for visualizations")
|
| 363 |
|
| 364 |
args = parser.parse_args()
|
|
@@ -396,6 +550,7 @@ def main():
|
|
| 396 |
create_timeline_image(results.get("plays", []), segment.get("start", 0), segment.get("end", 0), timeline_path)
|
| 397 |
|
| 398 |
# Generate clips if requested
|
|
|
|
| 399 |
if args.generate_clips:
|
| 400 |
video_path = args.video or str(DEFAULT_VIDEO_PATH)
|
| 401 |
if not Path(video_path).exists():
|
|
@@ -403,7 +558,14 @@ def main():
|
|
| 403 |
return 1
|
| 404 |
|
| 405 |
clips_dir = str(Path(output_dir) / "clips")
|
| 406 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
|
| 408 |
return 0
|
| 409 |
|
|
|
|
| 271 |
logger.info("Timeline saved to: %s", output_path)
|
| 272 |
|
| 273 |
|
| 274 |
+
def generate_play_clips_ffmpeg(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> Dict[str, float]:
|
| 275 |
+
"""
|
| 276 |
+
Generate video clips for each detected play using ffmpeg (much faster than OpenCV).
|
| 277 |
+
|
| 278 |
+
Args:
|
| 279 |
+
results: Detection results
|
| 280 |
+
video_path: Path to source video
|
| 281 |
+
output_dir: Directory to save clips
|
| 282 |
+
padding: Seconds of padding before/after play
|
| 283 |
+
|
| 284 |
+
Returns:
|
| 285 |
+
Dictionary with timing information
|
| 286 |
+
"""
|
| 287 |
+
import subprocess
|
| 288 |
+
import time
|
| 289 |
+
|
| 290 |
+
timing = {"clip_extraction": 0.0, "concatenation": 0.0}
|
| 291 |
+
|
| 292 |
+
plays = results.get("plays", [])
|
| 293 |
+
if not plays:
|
| 294 |
+
logger.warning("No plays to generate clips for")
|
| 295 |
+
return timing
|
| 296 |
+
|
| 297 |
+
# Create output directory
|
| 298 |
+
output_path = Path(output_dir)
|
| 299 |
+
output_path.mkdir(parents=True, exist_ok=True)
|
| 300 |
+
|
| 301 |
+
logger.info("Generating %d play clips with ffmpeg...", len(plays))
|
| 302 |
+
clip_paths = []
|
| 303 |
+
|
| 304 |
+
t_start = time.perf_counter()
|
| 305 |
+
|
| 306 |
+
for play in plays:
|
| 307 |
+
play_num = play.get("play_number", 0)
|
| 308 |
+
start_time = max(0, play.get("start_time", 0) - padding)
|
| 309 |
+
end_time = play.get("end_time", 0) + padding
|
| 310 |
+
duration = end_time - start_time
|
| 311 |
+
|
| 312 |
+
# Create output file
|
| 313 |
+
clip_path = output_path / ("play_%02d.mp4" % play_num)
|
| 314 |
+
clip_paths.append(clip_path)
|
| 315 |
+
|
| 316 |
+
# Use ffmpeg for fast extraction
|
| 317 |
+
# -ss before -i for fast seeking, -t for duration
|
| 318 |
+
cmd = [
|
| 319 |
+
"ffmpeg",
|
| 320 |
+
"-y", # Overwrite output
|
| 321 |
+
"-ss",
|
| 322 |
+
str(start_time),
|
| 323 |
+
"-i",
|
| 324 |
+
video_path,
|
| 325 |
+
"-t",
|
| 326 |
+
str(duration),
|
| 327 |
+
"-c:v",
|
| 328 |
+
"libx264", # Re-encode for compatibility
|
| 329 |
+
"-preset",
|
| 330 |
+
"fast",
|
| 331 |
+
"-crf",
|
| 332 |
+
"23",
|
| 333 |
+
"-c:a",
|
| 334 |
+
"aac",
|
| 335 |
+
"-b:a",
|
| 336 |
+
"128k",
|
| 337 |
+
"-loglevel",
|
| 338 |
+
"error",
|
| 339 |
+
str(clip_path),
|
| 340 |
+
]
|
| 341 |
+
|
| 342 |
+
try:
|
| 343 |
+
subprocess.run(cmd, check=True, capture_output=True)
|
| 344 |
+
logger.info(" Created: %s (%.1fs - %.1fs, duration: %.1fs)", clip_path.name, start_time, end_time, duration)
|
| 345 |
+
except subprocess.CalledProcessError as e:
|
| 346 |
+
logger.error(" Failed to create %s: %s", clip_path.name, e.stderr.decode() if e.stderr else str(e))
|
| 347 |
+
|
| 348 |
+
timing["clip_extraction"] = time.perf_counter() - t_start
|
| 349 |
+
logger.info("Clip extraction complete! (%.2fs)", timing["clip_extraction"])
|
| 350 |
+
|
| 351 |
+
# Concatenate all clips into a single highlight video
|
| 352 |
+
if len(clip_paths) > 1:
|
| 353 |
+
t_start = time.perf_counter()
|
| 354 |
+
concat_path = output_path / "all_plays.mp4"
|
| 355 |
+
logger.info("Concatenating %d clips into %s...", len(clip_paths), concat_path.name)
|
| 356 |
+
|
| 357 |
+
# Create concat file list
|
| 358 |
+
concat_list_path = output_path / "concat_list.txt"
|
| 359 |
+
with open(concat_list_path, "w") as f:
|
| 360 |
+
for clip_path in clip_paths:
|
| 361 |
+
f.write("file '%s'\n" % clip_path.name)
|
| 362 |
+
|
| 363 |
+
# Use ffmpeg concat demuxer
|
| 364 |
+
cmd = [
|
| 365 |
+
"ffmpeg",
|
| 366 |
+
"-y",
|
| 367 |
+
"-f",
|
| 368 |
+
"concat",
|
| 369 |
+
"-safe",
|
| 370 |
+
"0",
|
| 371 |
+
"-i",
|
| 372 |
+
str(concat_list_path),
|
| 373 |
+
"-c",
|
| 374 |
+
"copy", # No re-encoding needed
|
| 375 |
+
"-loglevel",
|
| 376 |
+
"error",
|
| 377 |
+
str(concat_path),
|
| 378 |
+
]
|
| 379 |
+
|
| 380 |
+
try:
|
| 381 |
+
subprocess.run(cmd, check=True, capture_output=True, cwd=str(output_path))
|
| 382 |
+
logger.info(" Created: %s", concat_path.name)
|
| 383 |
+
except subprocess.CalledProcessError as e:
|
| 384 |
+
logger.error(" Failed to concatenate: %s", e.stderr.decode() if e.stderr else str(e))
|
| 385 |
+
|
| 386 |
+
# Clean up concat list
|
| 387 |
+
concat_list_path.unlink(missing_ok=True)
|
| 388 |
+
|
| 389 |
+
timing["concatenation"] = time.perf_counter() - t_start
|
| 390 |
+
logger.info("Concatenation complete! (%.2fs)", timing["concatenation"])
|
| 391 |
+
|
| 392 |
+
return timing
|
| 393 |
+
|
| 394 |
+
|
| 395 |
def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> None:
|
| 396 |
"""
|
| 397 |
+
Generate video clips for each detected play (legacy OpenCV version - slow).
|
| 398 |
|
| 399 |
Args:
|
| 400 |
results: Detection results
|
|
|
|
| 472 |
logger.info("Clip generation complete!")
|
| 473 |
|
| 474 |
|
| 475 |
+
def print_timing_summary(results: Dict[str, Any], clip_timing: Optional[Dict[str, float]] = None) -> None:
|
| 476 |
+
"""Print timing breakdown from detection and clip generation."""
|
| 477 |
+
timing = results.get("timing", {})
|
| 478 |
+
|
| 479 |
+
if not timing and not clip_timing:
|
| 480 |
+
return
|
| 481 |
+
|
| 482 |
+
logger.info("")
|
| 483 |
+
logger.info("=" * 60)
|
| 484 |
+
logger.info("TIMING BREAKDOWN")
|
| 485 |
+
logger.info("=" * 60)
|
| 486 |
+
|
| 487 |
+
total_detection = 0.0
|
| 488 |
+
if timing:
|
| 489 |
+
logger.info("Detection Phase:")
|
| 490 |
+
for section, duration in timing.items():
|
| 491 |
+
logger.info(" %s: %.2fs", section, duration)
|
| 492 |
+
total_detection += duration
|
| 493 |
+
logger.info(" DETECTION TOTAL: %.2fs", total_detection)
|
| 494 |
+
|
| 495 |
+
if clip_timing:
|
| 496 |
+
logger.info("Clip Generation Phase:")
|
| 497 |
+
total_clips = 0.0
|
| 498 |
+
for section, duration in clip_timing.items():
|
| 499 |
+
logger.info(" %s: %.2fs", section, duration)
|
| 500 |
+
total_clips += duration
|
| 501 |
+
logger.info(" CLIP TOTAL: %.2fs", total_clips)
|
| 502 |
+
|
| 503 |
+
logger.info("=" * 60)
|
| 504 |
+
|
| 505 |
+
|
| 506 |
def main():
|
| 507 |
"""Main entry point."""
|
| 508 |
parser = argparse.ArgumentParser(description="Visualize play detection results")
|
|
|
|
| 511 |
parser.add_argument("--ground-truth", type=str, help="Path to ground truth JSON file")
|
| 512 |
parser.add_argument("--video", type=str, help="Path to video file (for clip generation)")
|
| 513 |
parser.add_argument("--generate-clips", action="store_true", help="Generate video clips for each play")
|
| 514 |
+
parser.add_argument("--use-opencv", action="store_true", help="Use OpenCV instead of ffmpeg for clip generation (slower)")
|
| 515 |
+
parser.add_argument("--padding", type=float, default=2.0, help="Seconds of padding before/after each play (default: 2.0)")
|
| 516 |
parser.add_argument("--output-dir", type=str, help="Output directory for visualizations")
|
| 517 |
|
| 518 |
args = parser.parse_args()
|
|
|
|
| 550 |
create_timeline_image(results.get("plays", []), segment.get("start", 0), segment.get("end", 0), timeline_path)
|
| 551 |
|
| 552 |
# Generate clips if requested
|
| 553 |
+
clip_timing = None
|
| 554 |
if args.generate_clips:
|
| 555 |
video_path = args.video or str(DEFAULT_VIDEO_PATH)
|
| 556 |
if not Path(video_path).exists():
|
|
|
|
| 558 |
return 1
|
| 559 |
|
| 560 |
clips_dir = str(Path(output_dir) / "clips")
|
| 561 |
+
|
| 562 |
+
if args.use_opencv:
|
| 563 |
+
generate_play_clips(results, video_path, clips_dir, padding=args.padding)
|
| 564 |
+
else:
|
| 565 |
+
clip_timing = generate_play_clips_ffmpeg(results, video_path, clips_dir, padding=args.padding)
|
| 566 |
+
|
| 567 |
+
# Print timing summary
|
| 568 |
+
print_timing_summary(results, clip_timing)
|
| 569 |
|
| 570 |
return 0
|
| 571 |
|
src/detectors/play_clock_reader.py
CHANGED
|
@@ -184,8 +184,9 @@ class PlayClockReader:
|
|
| 184 |
Preprocessing steps:
|
| 185 |
1. Convert to grayscale
|
| 186 |
2. Scale up for better digit recognition
|
| 187 |
-
3. Apply
|
| 188 |
-
4. Invert
|
|
|
|
| 189 |
|
| 190 |
Args:
|
| 191 |
region: Play clock region (BGR format)
|
|
@@ -196,23 +197,31 @@ class PlayClockReader:
|
|
| 196 |
# Convert to grayscale
|
| 197 |
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
|
| 198 |
|
| 199 |
-
# Scale up by
|
| 200 |
-
scale_factor =
|
| 201 |
-
scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.
|
| 202 |
|
| 203 |
-
#
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
# Apply adaptive thresholding for better handling of varying lighting
|
| 207 |
-
binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
|
| 208 |
|
| 209 |
# Determine if we need to invert (Tesseract prefers dark text on light background)
|
| 210 |
# Check if the image is predominantly dark (likely light digits on dark background)
|
| 211 |
mean_intensity = np.mean(binary)
|
| 212 |
if mean_intensity < 128:
|
| 213 |
-
#
|
| 214 |
binary = cv2.bitwise_not(binary)
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
return binary
|
| 217 |
|
| 218 |
def _run_ocr(self, image: np.ndarray) -> Tuple[str, float]:
|
|
|
|
| 184 |
Preprocessing steps:
|
| 185 |
1. Convert to grayscale
|
| 186 |
2. Scale up for better digit recognition
|
| 187 |
+
3. Apply Otsu's thresholding (better for high-contrast scorebug displays)
|
| 188 |
+
4. Invert to get dark text on light background (Tesseract preference)
|
| 189 |
+
5. Apply morphological operations to clean up noise
|
| 190 |
|
| 191 |
Args:
|
| 192 |
region: Play clock region (BGR format)
|
|
|
|
| 197 |
# Convert to grayscale
|
| 198 |
gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
|
| 199 |
|
| 200 |
+
# Scale up by 4x for better OCR accuracy on small digits
|
| 201 |
+
scale_factor = 4
|
| 202 |
+
scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
|
| 203 |
|
| 204 |
+
# Use Otsu's thresholding - works better for high-contrast scorebug displays
|
| 205 |
+
# The play clock has white digits on a dark background with good contrast
|
| 206 |
+
_, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
|
|
|
|
|
|
| 207 |
|
| 208 |
# Determine if we need to invert (Tesseract prefers dark text on light background)
|
| 209 |
# Check if the image is predominantly dark (likely light digits on dark background)
|
| 210 |
mean_intensity = np.mean(binary)
|
| 211 |
if mean_intensity < 128:
|
| 212 |
+
# Image is mostly dark (light digits on dark background) - invert for Tesseract
|
| 213 |
binary = cv2.bitwise_not(binary)
|
| 214 |
|
| 215 |
+
# Apply morphological operations to clean up noise
|
| 216 |
+
# Use a small kernel to remove small noise while preserving digit shapes
|
| 217 |
+
kernel = np.ones((2, 2), np.uint8)
|
| 218 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # Fill small holes
|
| 219 |
+
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # Remove small noise
|
| 220 |
+
|
| 221 |
+
# Add padding around the image - helps Tesseract with edge detection
|
| 222 |
+
padding = 10
|
| 223 |
+
binary = cv2.copyMakeBorder(binary, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
|
| 224 |
+
|
| 225 |
return binary
|
| 226 |
|
| 227 |
def _run_ocr(self, image: np.ndarray) -> Tuple[str, float]:
|
src/detectors/play_state_machine.py
CHANGED
|
@@ -50,6 +50,7 @@ class PlayStateMachine:
|
|
| 50 |
Detection Strategy:
|
| 51 |
- Play START: Detected when play clock resets to 40 (or potentially freezes - needs validation)
|
| 52 |
- Play END: **Always use backward counting** - calculate from next observed clock value after play
|
|
|
|
| 53 |
|
| 54 |
Backward Counting:
|
| 55 |
When the play clock reappears showing value X (where X < 40), the play end time is:
|
|
@@ -62,6 +63,7 @@ class PlayStateMachine:
|
|
| 62 |
clock_stable_frames: int = 3 # Frames with same clock value to consider it "stable"
|
| 63 |
max_play_duration: float = 15.0 # Maximum expected play duration in seconds
|
| 64 |
scorebug_lost_timeout: float = 30.0 # Seconds before resetting state when scorebug lost
|
|
|
|
| 65 |
|
| 66 |
# Internal state
|
| 67 |
state: PlayState = field(default=PlayState.IDLE)
|
|
@@ -77,6 +79,7 @@ class PlayStateMachine:
|
|
| 77 |
_current_play_start_clock: Optional[int] = field(default=None)
|
| 78 |
_last_scorebug_timestamp: Optional[float] = field(default=None)
|
| 79 |
_direct_end_time: Optional[float] = field(default=None)
|
|
|
|
| 80 |
|
| 81 |
def update(self, timestamp: float, scorebug: ScorebugDetection, clock: PlayClockReading) -> Optional[PlayEvent]:
|
| 82 |
"""
|
|
@@ -188,20 +191,14 @@ class PlayStateMachine:
|
|
| 188 |
self._start_play(timestamp, "clock_reset", self._last_clock_value)
|
| 189 |
return None
|
| 190 |
|
| 191 |
-
#
|
| 192 |
if clock_value == self._last_clock_value:
|
| 193 |
self._clock_stable_count += 1
|
| 194 |
-
# If clock has been stable for a while and value is low, might be a freeze
|
| 195 |
-
if self._clock_stable_count >= self.clock_stable_frames and clock_value <= 5:
|
| 196 |
-
# Calculate time the clock has been at this value
|
| 197 |
-
time_at_value = timestamp - (self._last_clock_timestamp or timestamp)
|
| 198 |
-
if time_at_value > 1.0: # More than 1 second at same low value
|
| 199 |
-
logger.info("Play START detected at %.1fs (clock frozen at %d for %.1fs)", timestamp, clock_value, time_at_value)
|
| 200 |
-
self._start_play(timestamp - time_at_value, "clock_freeze", clock_value)
|
| 201 |
-
return None
|
| 202 |
else:
|
| 203 |
self._clock_stable_count = 1
|
| 204 |
|
|
|
|
|
|
|
| 205 |
return None
|
| 206 |
|
| 207 |
def _handle_play_in_progress(self, timestamp: float, clock_value: int) -> Optional[PlayEvent]:
|
|
@@ -213,30 +210,52 @@ class PlayStateMachine:
|
|
| 213 |
play_duration = timestamp - self._current_play_start_time
|
| 214 |
if play_duration > self.max_play_duration:
|
| 215 |
logger.warning("Play duration (%.1fs) exceeded max (%.1fs), forcing end", play_duration, self.max_play_duration)
|
| 216 |
-
# Use current timestamp as direct end, but will recalculate with backward counting
|
| 217 |
self._direct_end_time = timestamp
|
|
|
|
| 218 |
return self._end_play(timestamp, clock_value, "direct_detect")
|
| 219 |
|
| 220 |
-
# If
|
|
|
|
| 221 |
if clock_value == 40:
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
return
|
| 226 |
|
| 227 |
-
#
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
)
|
| 238 |
-
|
| 239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
return None
|
| 242 |
|
|
@@ -276,6 +295,7 @@ class PlayStateMachine:
|
|
| 276 |
self._current_play_start_time = timestamp
|
| 277 |
self._current_play_start_method = method
|
| 278 |
self._current_play_start_clock = clock_value
|
|
|
|
| 279 |
self.state = PlayState.PLAY_IN_PROGRESS
|
| 280 |
logger.debug("Play started: time=%.1fs, method=%s, clock=%s", timestamp, method, clock_value)
|
| 281 |
|
|
@@ -351,6 +371,7 @@ class PlayStateMachine:
|
|
| 351 |
self._current_play_start_clock = None
|
| 352 |
self._direct_end_time = None
|
| 353 |
self._clock_stable_count = 0
|
|
|
|
| 354 |
|
| 355 |
def _reset_state(self) -> None:
|
| 356 |
"""Fully reset state machine."""
|
|
|
|
| 50 |
Detection Strategy:
|
| 51 |
- Play START: Detected when play clock resets to 40 (or potentially freezes - needs validation)
|
| 52 |
- Play END: **Always use backward counting** - calculate from next observed clock value after play
|
| 53 |
+
Requires K consecutive descending clock ticks to confirm (avoids false positives)
|
| 54 |
|
| 55 |
Backward Counting:
|
| 56 |
When the play clock reappears showing value X (where X < 40), the play end time is:
|
|
|
|
| 63 |
clock_stable_frames: int = 3 # Frames with same clock value to consider it "stable"
|
| 64 |
max_play_duration: float = 15.0 # Maximum expected play duration in seconds
|
| 65 |
scorebug_lost_timeout: float = 30.0 # Seconds before resetting state when scorebug lost
|
| 66 |
+
required_countdown_ticks: int = 3 # Number of consecutive descending ticks required to confirm play end
|
| 67 |
|
| 68 |
# Internal state
|
| 69 |
state: PlayState = field(default=PlayState.IDLE)
|
|
|
|
| 79 |
_current_play_start_clock: Optional[int] = field(default=None)
|
| 80 |
_last_scorebug_timestamp: Optional[float] = field(default=None)
|
| 81 |
_direct_end_time: Optional[float] = field(default=None)
|
| 82 |
+
_countdown_history: List[tuple] = field(default_factory=list) # List of (timestamp, clock_value) for countdown tracking
|
| 83 |
|
| 84 |
def update(self, timestamp: float, scorebug: ScorebugDetection, clock: PlayClockReading) -> Optional[PlayEvent]:
|
| 85 |
"""
|
|
|
|
| 191 |
self._start_play(timestamp, "clock_reset", self._last_clock_value)
|
| 192 |
return None
|
| 193 |
|
| 194 |
+
# Track clock stability (for potential future use)
|
| 195 |
if clock_value == self._last_clock_value:
|
| 196 |
self._clock_stable_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
else:
|
| 198 |
self._clock_stable_count = 1
|
| 199 |
|
| 200 |
+
# Note: "clock_freeze" detection disabled - was causing false positives
|
| 201 |
+
# The clock_reset detection (going to 40) is the reliable method
|
| 202 |
return None
|
| 203 |
|
| 204 |
def _handle_play_in_progress(self, timestamp: float, clock_value: int) -> Optional[PlayEvent]:
|
|
|
|
| 210 |
play_duration = timestamp - self._current_play_start_time
|
| 211 |
if play_duration > self.max_play_duration:
|
| 212 |
logger.warning("Play duration (%.1fs) exceeded max (%.1fs), forcing end", play_duration, self.max_play_duration)
|
|
|
|
| 213 |
self._direct_end_time = timestamp
|
| 214 |
+
self._countdown_history = [] # Reset countdown tracking
|
| 215 |
return self._end_play(timestamp, clock_value, "direct_detect")
|
| 216 |
|
| 217 |
+
# If clock is still at 40, the play just started and clock hasn't begun countdown yet
|
| 218 |
+
# We need to wait for the clock to drop below 40 before we can detect play end
|
| 219 |
if clock_value == 40:
|
| 220 |
+
# Clock is still at 40 after reset - waiting for countdown to begin
|
| 221 |
+
logger.debug("Play in progress at %.1fs, clock still at 40", timestamp)
|
| 222 |
+
self._countdown_history = [] # Reset countdown tracking
|
| 223 |
+
return None
|
| 224 |
|
| 225 |
+
# Track countdown history for confirming play end
|
| 226 |
+
# We require K consecutive descending ticks to confirm
|
| 227 |
+
self._countdown_history.append((timestamp, clock_value))
|
| 228 |
+
|
| 229 |
+
# Check if we have enough consecutive descending values
|
| 230 |
+
if len(self._countdown_history) >= self.required_countdown_ticks:
|
| 231 |
+
# Get last K readings
|
| 232 |
+
recent = self._countdown_history[-self.required_countdown_ticks :]
|
| 233 |
+
values = [v for _, v in recent]
|
| 234 |
+
|
| 235 |
+
# Check if values are strictly descending (or stable which means same second)
|
| 236 |
+
is_valid_countdown = True
|
| 237 |
+
for i in range(1, len(values)):
|
| 238 |
+
# Allow same value (within same second) or descending
|
| 239 |
+
if values[i] > values[i - 1]:
|
| 240 |
+
is_valid_countdown = False
|
| 241 |
+
break
|
| 242 |
+
|
| 243 |
+
if is_valid_countdown:
|
| 244 |
+
# Use the first reading in our confirmed sequence for backward calculation
|
| 245 |
+
first_timestamp, first_value = recent[0]
|
| 246 |
+
calculated_end_time = first_timestamp - (40 - first_value)
|
| 247 |
+
logger.info(
|
| 248 |
+
"Play END confirmed via %d-tick countdown: %.1fs (clock=%d→%d, observed %.1fs-%.1fs)",
|
| 249 |
+
self.required_countdown_ticks,
|
| 250 |
+
calculated_end_time,
|
| 251 |
+
values[0],
|
| 252 |
+
values[-1],
|
| 253 |
+
recent[0][0],
|
| 254 |
+
recent[-1][0],
|
| 255 |
+
)
|
| 256 |
+
self._direct_end_time = timestamp # When we confirmed the countdown
|
| 257 |
+
self._countdown_history = [] # Reset for next play
|
| 258 |
+
return self._end_play_with_backward_calc(timestamp, first_value, calculated_end_time)
|
| 259 |
|
| 260 |
return None
|
| 261 |
|
|
|
|
| 295 |
self._current_play_start_time = timestamp
|
| 296 |
self._current_play_start_method = method
|
| 297 |
self._current_play_start_clock = clock_value
|
| 298 |
+
self._countdown_history = [] # Reset countdown tracking for new play
|
| 299 |
self.state = PlayState.PLAY_IN_PROGRESS
|
| 300 |
logger.debug("Play started: time=%.1fs, method=%s, clock=%s", timestamp, method, clock_value)
|
| 301 |
|
|
|
|
| 371 |
self._current_play_start_clock = None
|
| 372 |
self._direct_end_time = None
|
| 373 |
self._clock_stable_count = 0
|
| 374 |
+
self._countdown_history = []
|
| 375 |
|
| 376 |
def _reset_state(self) -> None:
|
| 377 |
"""Fully reset state machine."""
|
src/detectors/scorebug_detector.py
CHANGED
|
@@ -5,9 +5,11 @@ This module provides functions to detect the presence and location of the scoreb
|
|
| 5 |
(score overlay) in video frames.
|
| 6 |
"""
|
| 7 |
|
|
|
|
| 8 |
import cv2
|
| 9 |
import numpy as np
|
| 10 |
import logging
|
|
|
|
| 11 |
from typing import Optional, Tuple, Dict
|
| 12 |
from dataclasses import dataclass
|
| 13 |
|
|
@@ -28,28 +30,59 @@ class ScorebugDetector:
|
|
| 28 |
"""
|
| 29 |
Detects the scorebug in video frames.
|
| 30 |
|
| 31 |
-
The detector
|
| 32 |
-
1. Template matching
|
| 33 |
-
2.
|
| 34 |
-
|
|
|
|
| 35 |
"""
|
| 36 |
|
| 37 |
-
def __init__(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
"""
|
| 39 |
Initialize the scorebug detector.
|
| 40 |
|
| 41 |
Args:
|
| 42 |
template_path: Path to a template image of the scorebug (optional)
|
| 43 |
-
|
|
|
|
| 44 |
"""
|
| 45 |
self.template = None
|
| 46 |
self.template_path = template_path
|
| 47 |
-
self.
|
|
|
|
| 48 |
|
| 49 |
if template_path:
|
| 50 |
self.load_template(template_path)
|
| 51 |
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
def load_template(self, template_path: str) -> None:
|
| 55 |
"""
|
|
@@ -69,29 +102,75 @@ class ScorebugDetector:
|
|
| 69 |
"""
|
| 70 |
Detect scorebug in a frame.
|
| 71 |
|
|
|
|
|
|
|
| 72 |
Args:
|
| 73 |
frame: Input frame (BGR format)
|
| 74 |
|
| 75 |
Returns:
|
| 76 |
ScorebugDetection object with detection results
|
| 77 |
"""
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
if detection.detected:
|
| 85 |
-
logger.debug(f"Scorebug detected with confidence {detection.confidence:.2f} using {detection.method}")
|
| 86 |
-
return detection
|
| 87 |
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
return ScorebugDetection(detected=False, confidence=0.0, method="none")
|
| 91 |
|
| 92 |
-
|
|
|
|
| 93 |
"""
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
Args:
|
| 97 |
frame: Input frame
|
|
@@ -100,9 +179,9 @@ class ScorebugDetector:
|
|
| 100 |
Detection result
|
| 101 |
"""
|
| 102 |
if self.template is None:
|
| 103 |
-
return ScorebugDetection(detected=False, confidence=0.0, method="
|
| 104 |
|
| 105 |
-
# Perform template matching
|
| 106 |
result = cv2.matchTemplate(frame, self.template, cv2.TM_CCOEFF_NORMED)
|
| 107 |
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
| 108 |
|
|
@@ -115,9 +194,64 @@ class ScorebugDetector:
|
|
| 115 |
h, w = self.template.shape[:2]
|
| 116 |
bbox = (max_loc[0], max_loc[1], w, h)
|
| 117 |
|
| 118 |
-
return ScorebugDetection(detected=True, confidence=float(max_val), bbox=bbox, method="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
else:
|
| 120 |
-
|
|
|
|
| 121 |
|
| 122 |
def _detect_by_position(self, frame: np.ndarray) -> ScorebugDetection:
|
| 123 |
"""
|
|
|
|
| 5 |
(score overlay) in video frames.
|
| 6 |
"""
|
| 7 |
|
| 8 |
+
import json
|
| 9 |
import cv2
|
| 10 |
import numpy as np
|
| 11 |
import logging
|
| 12 |
+
from pathlib import Path
|
| 13 |
from typing import Optional, Tuple, Dict
|
| 14 |
from dataclasses import dataclass
|
| 15 |
|
|
|
|
| 30 |
"""
|
| 31 |
Detects the scorebug in video frames.
|
| 32 |
|
| 33 |
+
The detector supports two modes:
|
| 34 |
+
1. Full-frame search: Template matching across entire frame (slower, use for initial detection)
|
| 35 |
+
2. Fixed-region check: Only check known location for presence (much faster)
|
| 36 |
+
|
| 37 |
+
For optimal performance, use fixed_region mode after determining scorebug location once.
|
| 38 |
"""
|
| 39 |
|
| 40 |
+
def __init__(
|
| 41 |
+
self,
|
| 42 |
+
template_path: Optional[str] = None,
|
| 43 |
+
fixed_region: Optional[Tuple[int, int, int, int]] = None,
|
| 44 |
+
fixed_region_config_path: Optional[str] = None,
|
| 45 |
+
):
|
| 46 |
"""
|
| 47 |
Initialize the scorebug detector.
|
| 48 |
|
| 49 |
Args:
|
| 50 |
template_path: Path to a template image of the scorebug (optional)
|
| 51 |
+
fixed_region: Fixed region where scorebug appears (x, y, w, h) - enables fast mode
|
| 52 |
+
fixed_region_config_path: Path to JSON config with fixed region (alternative to fixed_region)
|
| 53 |
"""
|
| 54 |
self.template = None
|
| 55 |
self.template_path = template_path
|
| 56 |
+
self.fixed_region = fixed_region
|
| 57 |
+
self._use_fixed_region = fixed_region is not None
|
| 58 |
|
| 59 |
if template_path:
|
| 60 |
self.load_template(template_path)
|
| 61 |
|
| 62 |
+
# Load fixed region from config file if provided
|
| 63 |
+
if fixed_region_config_path and not fixed_region:
|
| 64 |
+
self._load_fixed_region_config(fixed_region_config_path)
|
| 65 |
+
|
| 66 |
+
mode = "fixed_region" if self._use_fixed_region else "full_search"
|
| 67 |
+
logger.info("ScorebugDetector initialized (template: %s, mode: %s)", template_path is not None, mode)
|
| 68 |
+
if self._use_fixed_region:
|
| 69 |
+
logger.info(" Fixed region: %s", self.fixed_region)
|
| 70 |
+
|
| 71 |
+
def _load_fixed_region_config(self, config_path: str) -> None:
|
| 72 |
+
"""Load fixed region from a JSON config file."""
|
| 73 |
+
path = Path(config_path)
|
| 74 |
+
if not path.exists():
|
| 75 |
+
logger.warning("Fixed region config not found: %s", config_path)
|
| 76 |
+
return
|
| 77 |
+
|
| 78 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 79 |
+
data = json.load(f)
|
| 80 |
+
|
| 81 |
+
if "scorebug_region" in data:
|
| 82 |
+
region = data["scorebug_region"]
|
| 83 |
+
self.fixed_region = (region["x"], region["y"], region["width"], region["height"])
|
| 84 |
+
self._use_fixed_region = True
|
| 85 |
+
logger.info("Loaded fixed region from config: %s", self.fixed_region)
|
| 86 |
|
| 87 |
def load_template(self, template_path: str) -> None:
|
| 88 |
"""
|
|
|
|
| 102 |
"""
|
| 103 |
Detect scorebug in a frame.
|
| 104 |
|
| 105 |
+
Uses fixed-region mode if configured (much faster), otherwise searches entire frame.
|
| 106 |
+
|
| 107 |
Args:
|
| 108 |
frame: Input frame (BGR format)
|
| 109 |
|
| 110 |
Returns:
|
| 111 |
ScorebugDetection object with detection results
|
| 112 |
"""
|
| 113 |
+
if self.template is None:
|
| 114 |
+
logger.debug("No template loaded, cannot detect scorebug")
|
| 115 |
+
return ScorebugDetection(detected=False, confidence=0.0, method="none")
|
| 116 |
+
|
| 117 |
+
# Use fixed-region mode if configured (much faster - only checks known location)
|
| 118 |
+
if self._use_fixed_region and self.fixed_region is not None:
|
| 119 |
+
detection = self._detect_in_fixed_region(frame)
|
| 120 |
+
else:
|
| 121 |
+
# Full-frame template matching (slower, searches entire frame)
|
| 122 |
+
detection = self._detect_by_template_fullsearch(frame)
|
| 123 |
+
|
| 124 |
+
if detection.detected:
|
| 125 |
+
logger.debug("Scorebug detected with confidence %.2f using %s", detection.confidence, detection.method)
|
| 126 |
+
else:
|
| 127 |
+
logger.debug("No scorebug detected (confidence: %.2f)", detection.confidence)
|
| 128 |
+
|
| 129 |
+
return detection
|
| 130 |
+
|
| 131 |
+
def _detect_in_fixed_region(self, frame: np.ndarray) -> ScorebugDetection:
|
| 132 |
+
"""
|
| 133 |
+
Detect scorebug by checking only the fixed known location.
|
| 134 |
|
| 135 |
+
This is MUCH faster than full-frame search since we only compare
|
| 136 |
+
the template against a single position.
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
Args:
|
| 139 |
+
frame: Input frame
|
|
|
|
| 140 |
|
| 141 |
+
Returns:
|
| 142 |
+
Detection result
|
| 143 |
"""
|
| 144 |
+
x, y, w, h = self.fixed_region
|
| 145 |
+
th, tw = self.template.shape[:2]
|
| 146 |
+
|
| 147 |
+
# Validate region bounds
|
| 148 |
+
frame_h, frame_w = frame.shape[:2]
|
| 149 |
+
if x < 0 or y < 0 or x + tw > frame_w or y + th > frame_h:
|
| 150 |
+
logger.warning("Fixed region out of frame bounds")
|
| 151 |
+
return ScorebugDetection(detected=False, confidence=0.0, bbox=self.fixed_region, method="fixed_region")
|
| 152 |
+
|
| 153 |
+
# Extract the region where scorebug should be
|
| 154 |
+
region = frame[y : y + th, x : x + tw]
|
| 155 |
+
|
| 156 |
+
# Compare template to region using normalized cross-correlation
|
| 157 |
+
# This is much faster than matchTemplate on full frame since we're only comparing one location
|
| 158 |
+
result = cv2.matchTemplate(region, self.template, cv2.TM_CCOEFF_NORMED)
|
| 159 |
+
confidence = float(result[0, 0]) # Single value since region matches template size
|
| 160 |
+
|
| 161 |
+
# Use threshold to determine if scorebug is present
|
| 162 |
+
threshold = 0.8
|
| 163 |
+
if confidence >= threshold:
|
| 164 |
+
return ScorebugDetection(detected=True, confidence=confidence, bbox=(x, y, tw, th), method="fixed_region")
|
| 165 |
+
else:
|
| 166 |
+
return ScorebugDetection(detected=False, confidence=confidence, bbox=(x, y, tw, th), method="fixed_region")
|
| 167 |
+
|
| 168 |
+
def _detect_by_template_fullsearch(self, frame: np.ndarray) -> ScorebugDetection:
|
| 169 |
+
"""
|
| 170 |
+
Detect scorebug using full-frame template matching.
|
| 171 |
+
|
| 172 |
+
This searches the entire frame for the template - slower but works
|
| 173 |
+
when scorebug position is unknown.
|
| 174 |
|
| 175 |
Args:
|
| 176 |
frame: Input frame
|
|
|
|
| 179 |
Detection result
|
| 180 |
"""
|
| 181 |
if self.template is None:
|
| 182 |
+
return ScorebugDetection(detected=False, confidence=0.0, method="full_search")
|
| 183 |
|
| 184 |
+
# Perform template matching across entire frame
|
| 185 |
result = cv2.matchTemplate(frame, self.template, cv2.TM_CCOEFF_NORMED)
|
| 186 |
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
|
| 187 |
|
|
|
|
| 194 |
h, w = self.template.shape[:2]
|
| 195 |
bbox = (max_loc[0], max_loc[1], w, h)
|
| 196 |
|
| 197 |
+
return ScorebugDetection(detected=True, confidence=float(max_val), bbox=bbox, method="full_search")
|
| 198 |
+
else:
|
| 199 |
+
return ScorebugDetection(detected=False, confidence=float(max_val), method="full_search")
|
| 200 |
+
|
| 201 |
+
def set_fixed_region(self, region: Tuple[int, int, int, int]) -> None:
|
| 202 |
+
"""
|
| 203 |
+
Set a fixed region for fast detection mode.
|
| 204 |
+
|
| 205 |
+
Call this after discovering the scorebug location to switch to fast mode.
|
| 206 |
+
|
| 207 |
+
Args:
|
| 208 |
+
region: (x, y, width, height) of the scorebug location
|
| 209 |
+
"""
|
| 210 |
+
self.fixed_region = region
|
| 211 |
+
self._use_fixed_region = True
|
| 212 |
+
logger.info("Fixed region set: %s - now using fast detection mode", region)
|
| 213 |
+
|
| 214 |
+
def save_fixed_region_config(self, config_path: str) -> None:
|
| 215 |
+
"""Save the fixed region to a config file for reuse."""
|
| 216 |
+
if self.fixed_region is None:
|
| 217 |
+
logger.warning("No fixed region to save")
|
| 218 |
+
return
|
| 219 |
+
|
| 220 |
+
x, y, w, h = self.fixed_region
|
| 221 |
+
data = {"scorebug_region": {"x": x, "y": y, "width": w, "height": h}}
|
| 222 |
+
|
| 223 |
+
path = Path(config_path)
|
| 224 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 225 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 226 |
+
json.dump(data, f, indent=2)
|
| 227 |
+
|
| 228 |
+
logger.info("Saved fixed region config to: %s", config_path)
|
| 229 |
+
|
| 230 |
+
def discover_and_lock_region(self, frame: np.ndarray) -> bool:
|
| 231 |
+
"""
|
| 232 |
+
Discover scorebug location using full search, then lock to fixed region mode.
|
| 233 |
+
|
| 234 |
+
This is useful for the first frame - find the scorebug once, then use
|
| 235 |
+
fast fixed-region mode for all subsequent frames.
|
| 236 |
+
|
| 237 |
+
Args:
|
| 238 |
+
frame: Frame to search
|
| 239 |
+
|
| 240 |
+
Returns:
|
| 241 |
+
True if scorebug was found and region was locked, False otherwise
|
| 242 |
+
"""
|
| 243 |
+
# Temporarily disable fixed region to do full search
|
| 244 |
+
old_use_fixed = self._use_fixed_region
|
| 245 |
+
self._use_fixed_region = False
|
| 246 |
+
|
| 247 |
+
detection = self._detect_by_template_fullsearch(frame)
|
| 248 |
+
|
| 249 |
+
if detection.detected and detection.bbox:
|
| 250 |
+
self.set_fixed_region(detection.bbox)
|
| 251 |
+
return True
|
| 252 |
else:
|
| 253 |
+
self._use_fixed_region = old_use_fixed
|
| 254 |
+
return False
|
| 255 |
|
| 256 |
def _detect_by_position(self, frame: np.ndarray) -> ScorebugDetection:
|
| 257 |
"""
|
src/pipeline/play_detector.py
CHANGED
|
@@ -10,13 +10,14 @@ This module orchestrates the complete play detection pipeline:
|
|
| 10 |
|
| 11 |
import json
|
| 12 |
import logging
|
|
|
|
| 13 |
from dataclasses import dataclass, field
|
| 14 |
from pathlib import Path
|
| 15 |
from typing import Optional, List, Dict, Any
|
| 16 |
|
| 17 |
import cv2
|
| 18 |
|
| 19 |
-
from
|
| 20 |
|
| 21 |
logger = logging.getLogger(__name__)
|
| 22 |
|
|
@@ -30,7 +31,7 @@ class DetectionConfig:
|
|
| 30 |
clock_region_config_path: str # Path to play clock region config
|
| 31 |
start_time: float = 0.0 # Start time in seconds
|
| 32 |
end_time: Optional[float] = None # End time in seconds (None = full video)
|
| 33 |
-
frame_interval: float = 0.
|
| 34 |
|
| 35 |
|
| 36 |
@dataclass
|
|
@@ -45,6 +46,7 @@ class DetectionResult:
|
|
| 45 |
frames_with_clock: int # Frames where clock was read successfully
|
| 46 |
plays: List[Dict[str, Any]] = field(default_factory=list) # Detected plays as dicts
|
| 47 |
stats: Dict[str, Any] = field(default_factory=dict) # Summary statistics
|
|
|
|
| 48 |
|
| 49 |
|
| 50 |
class PlayDetector:
|
|
@@ -128,16 +130,23 @@ class PlayDetector:
|
|
| 128 |
start_time = self.config.start_time
|
| 129 |
end_time = self.config.end_time if self.config.end_time else duration
|
| 130 |
|
| 131 |
-
# Process frames
|
| 132 |
stats = {"total_frames": 0, "frames_with_scorebug": 0, "frames_with_clock": 0}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
current_time = start_time
|
| 135 |
while current_time < end_time:
|
| 136 |
-
# Seek to current time
|
|
|
|
| 137 |
frame_number = int(current_time * fps)
|
| 138 |
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
| 139 |
|
| 140 |
ret, frame = cap.read()
|
|
|
|
|
|
|
| 141 |
if not ret:
|
| 142 |
logger.warning("Could not read frame at %.1fs", current_time)
|
| 143 |
current_time += self.config.frame_interval
|
|
@@ -145,17 +154,44 @@ class PlayDetector:
|
|
| 145 |
|
| 146 |
stats["total_frames"] += 1
|
| 147 |
|
| 148 |
-
#
|
| 149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
# Progress logging every 30 seconds
|
| 152 |
if stats["total_frames"] % int(30 / self.config.frame_interval) == 0:
|
| 153 |
-
logger.info(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
current_time += self.config.frame_interval
|
| 156 |
|
| 157 |
cap.release()
|
| 158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
# Build result
|
| 160 |
plays = self.state_machine.get_plays()
|
| 161 |
play_stats = self.state_machine.get_stats()
|
|
@@ -169,6 +205,7 @@ class PlayDetector:
|
|
| 169 |
frames_with_clock=stats["frames_with_clock"],
|
| 170 |
plays=[self._play_to_dict(p) for p in plays],
|
| 171 |
stats=play_stats,
|
|
|
|
| 172 |
)
|
| 173 |
|
| 174 |
logger.info("Detection complete!")
|
|
@@ -179,7 +216,7 @@ class PlayDetector:
|
|
| 179 |
|
| 180 |
return result
|
| 181 |
|
| 182 |
-
def _process_frame(self, frame, timestamp: float, stats: Dict[str, int]) -> None:
|
| 183 |
"""
|
| 184 |
Process a single frame through the detection pipeline.
|
| 185 |
|
|
@@ -187,24 +224,35 @@ class PlayDetector:
|
|
| 187 |
frame: Video frame (BGR)
|
| 188 |
timestamp: Current timestamp in seconds
|
| 189 |
stats: Statistics dictionary to update
|
|
|
|
|
|
|
| 190 |
"""
|
| 191 |
# Detect scorebug
|
|
|
|
| 192 |
scorebug = self.scorebug_detector.detect(frame)
|
|
|
|
|
|
|
| 193 |
|
| 194 |
if scorebug.detected:
|
| 195 |
stats["frames_with_scorebug"] += 1
|
| 196 |
|
| 197 |
-
# Read play clock
|
|
|
|
| 198 |
clock = self.clock_reader.read(frame, scorebug.bbox)
|
|
|
|
| 199 |
|
| 200 |
if clock.detected:
|
| 201 |
stats["frames_with_clock"] += 1
|
| 202 |
else:
|
| 203 |
-
# No scorebug - create empty clock reading
|
| 204 |
-
|
|
|
|
|
|
|
| 205 |
|
| 206 |
# Update state machine
|
|
|
|
| 207 |
self.state_machine.update(timestamp, scorebug, clock)
|
|
|
|
| 208 |
|
| 209 |
def _play_to_dict(self, play: PlayEvent) -> Dict[str, Any]:
|
| 210 |
"""Convert PlayEvent to dictionary for JSON serialization."""
|
|
@@ -240,6 +288,7 @@ class PlayDetector:
|
|
| 240 |
"frames_with_scorebug": result.frames_with_scorebug,
|
| 241 |
"frames_with_clock": result.frames_with_clock,
|
| 242 |
},
|
|
|
|
| 243 |
"plays": result.plays,
|
| 244 |
"stats": result.stats,
|
| 245 |
}
|
|
|
|
| 10 |
|
| 11 |
import json
|
| 12 |
import logging
|
| 13 |
+
import time
|
| 14 |
from dataclasses import dataclass, field
|
| 15 |
from pathlib import Path
|
| 16 |
from typing import Optional, List, Dict, Any
|
| 17 |
|
| 18 |
import cv2
|
| 19 |
|
| 20 |
+
from detectors import ScorebugDetector, PlayClockReader, PlayStateMachine, PlayEvent
|
| 21 |
|
| 22 |
logger = logging.getLogger(__name__)
|
| 23 |
|
|
|
|
| 31 |
clock_region_config_path: str # Path to play clock region config
|
| 32 |
start_time: float = 0.0 # Start time in seconds
|
| 33 |
end_time: Optional[float] = None # End time in seconds (None = full video)
|
| 34 |
+
frame_interval: float = 0.5 # Interval between frame samples (seconds) - 2 fps is sufficient since play clock changes once/sec
|
| 35 |
|
| 36 |
|
| 37 |
@dataclass
|
|
|
|
| 46 |
frames_with_clock: int # Frames where clock was read successfully
|
| 47 |
plays: List[Dict[str, Any]] = field(default_factory=list) # Detected plays as dicts
|
| 48 |
stats: Dict[str, Any] = field(default_factory=dict) # Summary statistics
|
| 49 |
+
timing: Dict[str, float] = field(default_factory=dict) # Timing breakdown by section
|
| 50 |
|
| 51 |
|
| 52 |
class PlayDetector:
|
|
|
|
| 130 |
start_time = self.config.start_time
|
| 131 |
end_time = self.config.end_time if self.config.end_time else duration
|
| 132 |
|
| 133 |
+
# Process frames with timing tracking
|
| 134 |
stats = {"total_frames": 0, "frames_with_scorebug": 0, "frames_with_clock": 0}
|
| 135 |
+
timing = {"scorebug_detection": 0.0, "playclock_ocr": 0.0, "state_machine": 0.0, "video_io": 0.0}
|
| 136 |
+
|
| 137 |
+
# Flag to track if we've locked the scorebug region
|
| 138 |
+
scorebug_region_locked = False
|
| 139 |
|
| 140 |
current_time = start_time
|
| 141 |
while current_time < end_time:
|
| 142 |
+
# Seek to current time (video I/O)
|
| 143 |
+
t_io_start = time.perf_counter()
|
| 144 |
frame_number = int(current_time * fps)
|
| 145 |
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
| 146 |
|
| 147 |
ret, frame = cap.read()
|
| 148 |
+
timing["video_io"] += time.perf_counter() - t_io_start
|
| 149 |
+
|
| 150 |
if not ret:
|
| 151 |
logger.warning("Could not read frame at %.1fs", current_time)
|
| 152 |
current_time += self.config.frame_interval
|
|
|
|
| 154 |
|
| 155 |
stats["total_frames"] += 1
|
| 156 |
|
| 157 |
+
# On first successful scorebug detection, lock to fixed region for speed
|
| 158 |
+
if not scorebug_region_locked:
|
| 159 |
+
t_start = time.perf_counter()
|
| 160 |
+
if self.scorebug_detector.discover_and_lock_region(frame):
|
| 161 |
+
scorebug_region_locked = True
|
| 162 |
+
logger.info("Scorebug region locked at %s - using fast detection mode", self.scorebug_detector.fixed_region)
|
| 163 |
+
timing["scorebug_detection"] += time.perf_counter() - t_start
|
| 164 |
+
# Still process this frame normally
|
| 165 |
+
self._process_frame(frame, current_time, stats, timing, skip_scorebug_timing=True)
|
| 166 |
+
else:
|
| 167 |
+
# Run detection pipeline with timing
|
| 168 |
+
self._process_frame(frame, current_time, stats, timing)
|
| 169 |
|
| 170 |
# Progress logging every 30 seconds
|
| 171 |
if stats["total_frames"] % int(30 / self.config.frame_interval) == 0:
|
| 172 |
+
logger.info(
|
| 173 |
+
"Progress: %.1fs / %.1fs (%.0f%%), %d plays detected",
|
| 174 |
+
current_time,
|
| 175 |
+
end_time,
|
| 176 |
+
100 * (current_time - start_time) / (end_time - start_time),
|
| 177 |
+
len(self.state_machine.get_plays()),
|
| 178 |
+
)
|
| 179 |
|
| 180 |
current_time += self.config.frame_interval
|
| 181 |
|
| 182 |
cap.release()
|
| 183 |
|
| 184 |
+
# Log timing breakdown
|
| 185 |
+
total_time = sum(timing.values())
|
| 186 |
+
logger.info("=" * 50)
|
| 187 |
+
logger.info("TIMING BREAKDOWN")
|
| 188 |
+
logger.info("=" * 50)
|
| 189 |
+
for section, duration in timing.items():
|
| 190 |
+
pct = 100 * duration / total_time if total_time > 0 else 0
|
| 191 |
+
logger.info(" %s: %.2fs (%.1f%%)", section, duration, pct)
|
| 192 |
+
logger.info(" TOTAL: %.2fs", total_time)
|
| 193 |
+
logger.info("=" * 50)
|
| 194 |
+
|
| 195 |
# Build result
|
| 196 |
plays = self.state_machine.get_plays()
|
| 197 |
play_stats = self.state_machine.get_stats()
|
|
|
|
| 205 |
frames_with_clock=stats["frames_with_clock"],
|
| 206 |
plays=[self._play_to_dict(p) for p in plays],
|
| 207 |
stats=play_stats,
|
| 208 |
+
timing=timing,
|
| 209 |
)
|
| 210 |
|
| 211 |
logger.info("Detection complete!")
|
|
|
|
| 216 |
|
| 217 |
return result
|
| 218 |
|
| 219 |
+
def _process_frame(self, frame, timestamp: float, stats: Dict[str, int], timing: Dict[str, float], skip_scorebug_timing: bool = False) -> None:
|
| 220 |
"""
|
| 221 |
Process a single frame through the detection pipeline.
|
| 222 |
|
|
|
|
| 224 |
frame: Video frame (BGR)
|
| 225 |
timestamp: Current timestamp in seconds
|
| 226 |
stats: Statistics dictionary to update
|
| 227 |
+
timing: Timing dictionary to update
|
| 228 |
+
skip_scorebug_timing: If True, don't add to scorebug timing (already counted in region discovery)
|
| 229 |
"""
|
| 230 |
# Detect scorebug
|
| 231 |
+
t_start = time.perf_counter()
|
| 232 |
scorebug = self.scorebug_detector.detect(frame)
|
| 233 |
+
if not skip_scorebug_timing:
|
| 234 |
+
timing["scorebug_detection"] += time.perf_counter() - t_start
|
| 235 |
|
| 236 |
if scorebug.detected:
|
| 237 |
stats["frames_with_scorebug"] += 1
|
| 238 |
|
| 239 |
+
# Read play clock (OCR - most expensive operation)
|
| 240 |
+
t_start = time.perf_counter()
|
| 241 |
clock = self.clock_reader.read(frame, scorebug.bbox)
|
| 242 |
+
timing["playclock_ocr"] += time.perf_counter() - t_start
|
| 243 |
|
| 244 |
if clock.detected:
|
| 245 |
stats["frames_with_clock"] += 1
|
| 246 |
else:
|
| 247 |
+
# No scorebug - create empty clock reading (no OCR needed)
|
| 248 |
+
from detectors import PlayClockReading
|
| 249 |
+
|
| 250 |
+
clock = PlayClockReading(detected=False, value=None, confidence=0.0, raw_text="NO_SCOREBUG")
|
| 251 |
|
| 252 |
# Update state machine
|
| 253 |
+
t_start = time.perf_counter()
|
| 254 |
self.state_machine.update(timestamp, scorebug, clock)
|
| 255 |
+
timing["state_machine"] += time.perf_counter() - t_start
|
| 256 |
|
| 257 |
def _play_to_dict(self, play: PlayEvent) -> Dict[str, Any]:
|
| 258 |
"""Convert PlayEvent to dictionary for JSON serialization."""
|
|
|
|
| 288 |
"frames_with_scorebug": result.frames_with_scorebug,
|
| 289 |
"frames_with_clock": result.frames_with_clock,
|
| 290 |
},
|
| 291 |
+
"timing": result.timing,
|
| 292 |
"plays": result.plays,
|
| 293 |
"stats": result.stats,
|
| 294 |
}
|