andytaylor-smg commited on
Commit
c90b51c
·
1 Parent(s): 220e5fb

better clock reading, faster

Browse files
scripts/benchmark_ocr.py ADDED
@@ -0,0 +1,540 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Benchmark different OCR methods for play clock reading.
4
+
5
+ This script compares:
6
+ 1. Tesseract (current method)
7
+ 2. EasyOCR (deep learning based)
8
+ 3. Template matching (custom digit templates)
9
+
10
+ Usage:
11
+ python scripts/benchmark_ocr.py
12
+ """
13
+
14
+ import logging
15
+ import sys
16
+ import time
17
+ from pathlib import Path
18
+ from typing import List, Tuple, Optional, Dict, Any
19
+
20
+ import cv2
21
+ import numpy as np
22
+
23
+ # Add src to path for imports
24
+ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
25
+
26
+ # pylint: disable=wrong-import-position
27
+ from detectors import ScorebugDetector
28
+
29
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # Constants
33
+ VIDEO_PATH = Path(__file__).parent.parent / "full_videos" / "OSU vs Tenn 12.21.24.mkv"
34
+ TEMPLATE_PATH = Path(__file__).parent.parent / "data" / "templates" / "scorebug_template_main.png"
35
+ CONFIG_PATH = Path(__file__).parent.parent / "data" / "config" / "play_clock_region.json"
36
+ DIGIT_TEMPLATES_DIR = Path(__file__).parent.parent / "data" / "templates" / "digits"
37
+
38
+ # Test segment - sample frames with known clock values (30 frames)
39
+ TEST_TIMESTAMPS = [2320.0 + i for i in range(30)]
40
+ # Expected values based on countdown pattern: 18->17->...->12->40->40->40->39->...
41
+ # This is approximate - the real test will use Tesseract as ground truth
42
+
43
+
44
+ def load_play_clock_config() -> Tuple[int, int, int, int]:
45
+ """Load play clock region config."""
46
+ import json
47
+
48
+ with open(CONFIG_PATH, "r") as f:
49
+ data = json.load(f)
50
+ return (data["x_offset"], data["y_offset"], data["width"], data["height"])
51
+
52
+
53
+ def extract_test_frames(
54
+ video_path: Path, detector: ScorebugDetector, timestamps: List[float]
55
+ ) -> List[Tuple[float, np.ndarray, Tuple[int, int, int, int]]]:
56
+ """Extract frames with scorebug for testing."""
57
+ cap = cv2.VideoCapture(str(video_path))
58
+ if not cap.isOpened():
59
+ raise ValueError(f"Could not open video: {video_path}")
60
+
61
+ fps = cap.get(cv2.CAP_PROP_FPS)
62
+ frames = []
63
+
64
+ for ts in timestamps:
65
+ frame_number = int(ts * fps)
66
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
67
+ ret, frame = cap.read()
68
+ if not ret:
69
+ continue
70
+
71
+ detection = detector.detect(frame)
72
+ if detection.detected and detection.bbox:
73
+ frames.append((ts, frame, detection.bbox))
74
+
75
+ cap.release()
76
+ return frames
77
+
78
+
79
+ def extract_play_clock_region(frame: np.ndarray, scorebug_bbox: Tuple[int, int, int, int], config: Tuple[int, int, int, int]) -> np.ndarray:
80
+ """Extract play clock region from frame."""
81
+ sb_x, sb_y, sb_w, sb_h = scorebug_bbox
82
+ x_offset, y_offset, width, height = config
83
+
84
+ pc_x = sb_x + x_offset
85
+ pc_y = sb_y + y_offset
86
+
87
+ return frame[pc_y : pc_y + height, pc_x : pc_x + width].copy()
88
+
89
+
90
+ def preprocess_for_ocr(region: np.ndarray) -> np.ndarray:
91
+ """Standard preprocessing for OCR."""
92
+ # Convert to grayscale
93
+ gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
94
+
95
+ # Scale up
96
+ scale_factor = 4
97
+ scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
98
+
99
+ # Otsu's threshold
100
+ _, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
101
+
102
+ # Invert if needed (dark text on light background)
103
+ if np.mean(binary) < 128:
104
+ binary = cv2.bitwise_not(binary)
105
+
106
+ return binary
107
+
108
+
109
+ # ============================================================
110
+ # OCR Method 1: Tesseract (current baseline)
111
+ # ============================================================
112
+ def ocr_tesseract(region: np.ndarray) -> Tuple[Optional[int], float]:
113
+ """Read digits using Tesseract."""
114
+ import pytesseract
115
+
116
+ preprocessed = preprocess_for_ocr(region)
117
+
118
+ # Add padding
119
+ padding = 10
120
+ preprocessed = cv2.copyMakeBorder(preprocessed, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
121
+
122
+ config = "--psm 7 -c tessedit_char_whitelist=0123456789"
123
+
124
+ try:
125
+ data = pytesseract.image_to_data(preprocessed, config=config, output_type=pytesseract.Output.DICT)
126
+
127
+ best_text = ""
128
+ best_conf = 0.0
129
+
130
+ for i, text in enumerate(data["text"]):
131
+ conf = float(data["conf"][i])
132
+ if conf > best_conf and text.strip():
133
+ best_text = text.strip()
134
+ best_conf = conf
135
+
136
+ if best_text and best_text.isdigit():
137
+ value = int(best_text)
138
+ if 0 <= value <= 40:
139
+ return value, best_conf / 100.0
140
+
141
+ except Exception as e:
142
+ logger.debug(f"Tesseract error: {e}")
143
+
144
+ return None, 0.0
145
+
146
+
147
+ # ============================================================
148
+ # OCR Method 2: EasyOCR
149
+ # ============================================================
150
+ _easyocr_reader = None
151
+
152
+
153
+ def get_easyocr_reader():
154
+ """Lazy-load EasyOCR reader."""
155
+ global _easyocr_reader
156
+ if _easyocr_reader is None:
157
+ try:
158
+ import easyocr
159
+
160
+ _easyocr_reader = easyocr.Reader(["en"], gpu=False) # CPU mode for fair comparison
161
+ logger.info("EasyOCR reader initialized")
162
+ except ImportError:
163
+ logger.warning("EasyOCR not installed. Install with: pip install easyocr")
164
+ return None
165
+ return _easyocr_reader
166
+
167
+
168
+ def ocr_easyocr(region: np.ndarray) -> Tuple[Optional[int], float]:
169
+ """Read digits using EasyOCR."""
170
+ reader = get_easyocr_reader()
171
+ if reader is None:
172
+ return None, 0.0
173
+
174
+ preprocessed = preprocess_for_ocr(region)
175
+
176
+ try:
177
+ # EasyOCR expects BGR or grayscale
178
+ results = reader.readtext(preprocessed, allowlist="0123456789", detail=1)
179
+
180
+ if results:
181
+ # Get highest confidence result
182
+ best_result = max(results, key=lambda x: x[2])
183
+ text = best_result[1].strip()
184
+ conf = best_result[2]
185
+
186
+ if text.isdigit():
187
+ value = int(text)
188
+ if 0 <= value <= 40:
189
+ return value, conf
190
+
191
+ except Exception as e:
192
+ logger.debug(f"EasyOCR error: {e}")
193
+
194
+ return None, 0.0
195
+
196
+
197
+ # ============================================================
198
+ # OCR Method 3: Template Matching for Digits
199
+ # ============================================================
200
+
201
+
202
+ class DigitTemplateMatcher:
203
+ """Fast digit recognition using template matching."""
204
+
205
+ def __init__(self):
206
+ self.digit_templates: Dict[str, np.ndarray] = {}
207
+ self._calibrated = False
208
+
209
+ def calibrate_from_tesseract(self, regions: List[np.ndarray]) -> bool:
210
+ """
211
+ Calibrate digit templates using Tesseract as ground truth on first few frames.
212
+
213
+ This extracts individual digit images from frames where Tesseract successfully reads values.
214
+ """
215
+ logger.info("Calibrating digit templates from Tesseract readings...")
216
+
217
+ for region in regions:
218
+ # Get Tesseract reading as ground truth
219
+ value, conf = ocr_tesseract(region)
220
+ if value is None or conf < 0.7:
221
+ continue
222
+
223
+ # Preprocess and extract digit regions
224
+ preprocessed = preprocess_for_ocr(region)
225
+ h, w = preprocessed.shape
226
+
227
+ # Find digit contours
228
+ contours, _ = cv2.findContours(cv2.bitwise_not(preprocessed), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
229
+
230
+ if not contours:
231
+ continue
232
+
233
+ # Get bounding boxes sorted left-to-right
234
+ boxes = [cv2.boundingRect(c) for c in contours]
235
+ boxes = [(x, y, bw, bh) for x, y, bw, bh in boxes if bh > h * 0.3] # Filter small noise
236
+ boxes.sort(key=lambda b: b[0]) # Sort by x position
237
+
238
+ # Extract digits based on value
239
+ value_str = str(value)
240
+ if len(boxes) != len(value_str):
241
+ continue # Mismatch, skip
242
+
243
+ for i, (x, y, bw, bh) in enumerate(boxes):
244
+ digit = value_str[i]
245
+ # Add padding around digit
246
+ pad = 4
247
+ x1 = max(0, x - pad)
248
+ y1 = max(0, y - pad)
249
+ x2 = min(w, x + bw + pad)
250
+ y2 = min(h, y + bh + pad)
251
+
252
+ digit_img = preprocessed[y1:y2, x1:x2]
253
+
254
+ # Store template (keep best quality one per digit)
255
+ if digit not in self.digit_templates or digit_img.shape[0] * digit_img.shape[1] > self.digit_templates[digit].shape[0] * self.digit_templates[digit].shape[1]:
256
+ self.digit_templates[digit] = digit_img.copy()
257
+
258
+ # Check if we have all digits we need (0-4 for tens, 0-9 for ones)
259
+ if all(str(d) in self.digit_templates for d in range(10)):
260
+ break
261
+
262
+ logger.info(f" Calibrated templates for digits: {sorted(self.digit_templates.keys())}")
263
+ self._calibrated = len(self.digit_templates) >= 5 # At least 0-4 for play clock
264
+
265
+ return self._calibrated
266
+
267
+ def read(self, region: np.ndarray) -> Tuple[Optional[int], float]:
268
+ """Read digits using template matching."""
269
+ if not self._calibrated:
270
+ return None, 0.0
271
+
272
+ preprocessed = preprocess_for_ocr(region)
273
+ h, w = preprocessed.shape
274
+
275
+ # Find digit contours
276
+ contours, _ = cv2.findContours(cv2.bitwise_not(preprocessed), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
277
+
278
+ if not contours:
279
+ return None, 0.0
280
+
281
+ # Get bounding boxes sorted left-to-right
282
+ boxes = [cv2.boundingRect(c) for c in contours]
283
+ boxes = [(x, y, bw, bh) for x, y, bw, bh in boxes if bh > h * 0.3] # Filter noise
284
+ boxes.sort(key=lambda b: b[0])
285
+
286
+ if not boxes:
287
+ return None, 0.0
288
+
289
+ # Match each digit region to templates
290
+ digits = []
291
+ total_conf = 0.0
292
+
293
+ for x, y, bw, bh in boxes:
294
+ # Extract digit with padding
295
+ pad = 4
296
+ x1 = max(0, x - pad)
297
+ y1 = max(0, y - pad)
298
+ x2 = min(w, x + bw + pad)
299
+ y2 = min(h, y + bh + pad)
300
+
301
+ digit_img = preprocessed[y1:y2, x1:x2]
302
+
303
+ # Match against all templates
304
+ best_digit = None
305
+ best_conf = 0.0
306
+
307
+ for digit, template in self.digit_templates.items():
308
+ # Resize template to match digit height
309
+ if template.shape[0] == 0 or digit_img.shape[0] == 0:
310
+ continue
311
+
312
+ scale = digit_img.shape[0] / template.shape[0]
313
+ new_w = max(1, int(template.shape[1] * scale))
314
+ resized = cv2.resize(template, (new_w, digit_img.shape[0]), interpolation=cv2.INTER_LINEAR)
315
+
316
+ # Pad smaller image to match sizes for comparison
317
+ digit_img_padded = digit_img
318
+ if resized.shape[1] < digit_img.shape[1]:
319
+ diff = digit_img.shape[1] - resized.shape[1]
320
+ resized = cv2.copyMakeBorder(resized, 0, 0, diff // 2, diff - diff // 2, cv2.BORDER_CONSTANT, value=255)
321
+ elif digit_img.shape[1] < resized.shape[1]:
322
+ diff = resized.shape[1] - digit_img.shape[1]
323
+ digit_img_padded = cv2.copyMakeBorder(digit_img, 0, 0, diff // 2, diff - diff // 2, cv2.BORDER_CONSTANT, value=255)
324
+
325
+ # Ensure same size
326
+ min_h = min(resized.shape[0], digit_img_padded.shape[0])
327
+ min_w = min(resized.shape[1], digit_img_padded.shape[1])
328
+ resized = resized[:min_h, :min_w]
329
+ digit_img_padded = digit_img_padded[:min_h, :min_w]
330
+
331
+ # Calculate normalized cross-correlation
332
+ if resized.size == 0 or digit_img_padded.size == 0:
333
+ continue
334
+
335
+ # Simple pixel difference score
336
+ diff = np.abs(resized.astype(float) - digit_img_padded.astype(float))
337
+ score = 1.0 - (np.mean(diff) / 255.0)
338
+
339
+ if score > best_conf:
340
+ best_conf = score
341
+ best_digit = digit
342
+
343
+ if best_digit is not None and best_conf > 0.5:
344
+ digits.append(best_digit)
345
+ total_conf += best_conf
346
+
347
+ if not digits:
348
+ return None, 0.0
349
+
350
+ # Combine digits into number
351
+ try:
352
+ value = int("".join(digits))
353
+ avg_conf = total_conf / len(digits)
354
+ if 0 <= value <= 40:
355
+ return value, avg_conf
356
+ except ValueError:
357
+ pass
358
+
359
+ return None, 0.0
360
+
361
+
362
+ _digit_matcher = None
363
+
364
+
365
+ def get_digit_matcher() -> DigitTemplateMatcher:
366
+ """Get or create digit template matcher."""
367
+ global _digit_matcher
368
+ if _digit_matcher is None:
369
+ _digit_matcher = DigitTemplateMatcher()
370
+ return _digit_matcher
371
+
372
+
373
+ def ocr_template_matching(region: np.ndarray) -> Tuple[Optional[int], float]:
374
+ """Read digits using template matching."""
375
+ matcher = get_digit_matcher()
376
+ return matcher.read(region)
377
+
378
+
379
+ # ============================================================
380
+ # Benchmark Runner
381
+ # ============================================================
382
+ def run_benchmark(frames: List[Tuple[float, np.ndarray, Tuple[int, int, int, int]]], config: Tuple[int, int, int, int]) -> None:
383
+ """Run benchmark comparing OCR methods."""
384
+ logger.info("=" * 60)
385
+ logger.info("OCR BENCHMARK")
386
+ logger.info("=" * 60)
387
+ logger.info(f"Testing {len(frames)} frames")
388
+
389
+ # Extract play clock regions
390
+ regions = []
391
+ for ts, frame, scorebug_bbox in frames:
392
+ region = extract_play_clock_region(frame, scorebug_bbox, config)
393
+ regions.append((ts, region))
394
+
395
+ # Method 1: Tesseract (baseline - also used for ground truth)
396
+ logger.info("")
397
+ logger.info("-" * 60)
398
+ logger.info("Method 1: Tesseract (baseline)")
399
+ logger.info("-" * 60)
400
+
401
+ tesseract_results = []
402
+ t_start = time.perf_counter()
403
+ for ts, region in regions:
404
+ value, conf = ocr_tesseract(region)
405
+ tesseract_results.append((ts, value, conf))
406
+ tesseract_time = time.perf_counter() - t_start
407
+
408
+ tesseract_success = sum(1 for _, v, _ in tesseract_results if v is not None)
409
+ logger.info(f" Success rate: {tesseract_success}/{len(regions)} ({100*tesseract_success/len(regions):.1f}%)")
410
+ logger.info(f" Total time: {tesseract_time:.3f}s")
411
+ logger.info(f" Per-frame: {1000*tesseract_time/len(regions):.1f}ms")
412
+ logger.info(f" Values: {[v for _, v, _ in tesseract_results]}")
413
+
414
+ # Use Tesseract results as ground truth for accuracy comparison
415
+ ground_truth = {ts: v for ts, v, _ in tesseract_results if v is not None}
416
+
417
+ # Method 2: EasyOCR
418
+ logger.info("")
419
+ logger.info("-" * 60)
420
+ logger.info("Method 2: EasyOCR")
421
+ logger.info("-" * 60)
422
+
423
+ reader = get_easyocr_reader()
424
+ easyocr_time = 0
425
+ easyocr_success = 0
426
+ easyocr_accuracy = 0
427
+
428
+ if reader:
429
+ easyocr_results = []
430
+ t_start = time.perf_counter()
431
+ for ts, region in regions:
432
+ value, conf = ocr_easyocr(region)
433
+ easyocr_results.append((ts, value, conf))
434
+ easyocr_time = time.perf_counter() - t_start
435
+
436
+ easyocr_success = sum(1 for _, v, _ in easyocr_results if v is not None)
437
+ # Calculate accuracy vs ground truth
438
+ easyocr_correct = sum(1 for ts, v, _ in easyocr_results if ts in ground_truth and v == ground_truth[ts])
439
+ easyocr_accuracy = easyocr_correct / len(ground_truth) * 100 if ground_truth else 0
440
+
441
+ logger.info(f" Success rate: {easyocr_success}/{len(regions)} ({100*easyocr_success/len(regions):.1f}%)")
442
+ logger.info(f" Accuracy vs Tesseract: {easyocr_correct}/{len(ground_truth)} ({easyocr_accuracy:.1f}%)")
443
+ logger.info(f" Total time: {easyocr_time:.3f}s")
444
+ logger.info(f" Per-frame: {1000*easyocr_time/len(regions):.1f}ms")
445
+ logger.info(f" Speedup vs Tesseract: {tesseract_time/easyocr_time:.2f}x")
446
+ logger.info(f" Values: {[v for _, v, _ in easyocr_results]}")
447
+ else:
448
+ logger.info(" SKIPPED (EasyOCR not installed)")
449
+
450
+ # Method 3: Template Matching
451
+ logger.info("")
452
+ logger.info("-" * 60)
453
+ logger.info("Method 3: Template Matching")
454
+ logger.info("-" * 60)
455
+
456
+ matcher = get_digit_matcher()
457
+
458
+ # Calibrate using first 10 regions (not counted in benchmark time)
459
+ calibration_regions = [r for _, r in regions[:10]]
460
+ if matcher.calibrate_from_tesseract(calibration_regions):
461
+ template_results = []
462
+ t_start = time.perf_counter()
463
+ for ts, region in regions:
464
+ value, conf = ocr_template_matching(region)
465
+ template_results.append((ts, value, conf))
466
+ template_time = time.perf_counter() - t_start
467
+
468
+ template_success = sum(1 for _, v, _ in template_results if v is not None)
469
+ template_correct = sum(1 for ts, v, _ in template_results if ts in ground_truth and v == ground_truth[ts])
470
+ template_accuracy = template_correct / len(ground_truth) * 100 if ground_truth else 0
471
+
472
+ logger.info(f" Success rate: {template_success}/{len(regions)} ({100*template_success/len(regions):.1f}%)")
473
+ logger.info(f" Accuracy vs Tesseract: {template_correct}/{len(ground_truth)} ({template_accuracy:.1f}%)")
474
+ logger.info(f" Total time: {template_time:.3f}s")
475
+ logger.info(f" Per-frame: {1000*template_time/len(regions):.1f}ms")
476
+ logger.info(f" Speedup vs Tesseract: {tesseract_time/template_time:.2f}x")
477
+ logger.info(f" Values: {[v for _, v, _ in template_results]}")
478
+ else:
479
+ logger.info(" SKIPPED (calibration failed)")
480
+ template_time = 0
481
+ template_success = 0
482
+ template_accuracy = 0
483
+
484
+ # Summary
485
+ logger.info("")
486
+ logger.info("=" * 60)
487
+ logger.info("SUMMARY")
488
+ logger.info("=" * 60)
489
+ logger.info(f"{'Method':<20} {'Time/frame':<12} {'Success':<12} {'Accuracy':<12} {'Speedup':<10}")
490
+ logger.info("-" * 66)
491
+ logger.info(f"{'Tesseract':<20} {f'{1000*tesseract_time/len(regions):.1f}ms':<12} {f'{tesseract_success}/{len(regions)}':<12} {'(baseline)':<12} {'1.00x':<10}")
492
+ if reader and easyocr_time > 0:
493
+ logger.info(f"{'EasyOCR':<20} {f'{1000*easyocr_time/len(regions):.1f}ms':<12} {f'{easyocr_success}/{len(regions)}':<12} {f'{easyocr_accuracy:.1f}%':<12} {f'{tesseract_time/easyocr_time:.2f}x':<10}")
494
+ if template_time > 0:
495
+ logger.info(f"{'Template Matching':<20} {f'{1000*template_time/len(regions):.1f}ms':<12} {f'{template_success}/{len(regions)}':<12} {f'{template_accuracy:.1f}%':<12} {f'{tesseract_time/template_time:.2f}x':<10}")
496
+
497
+
498
+ def main():
499
+ """Main entry point."""
500
+ logger.info("OCR Benchmark Tool")
501
+ logger.info("=" * 60)
502
+
503
+ # Verify paths
504
+ if not VIDEO_PATH.exists():
505
+ logger.error(f"Video not found: {VIDEO_PATH}")
506
+ return 1
507
+
508
+ if not TEMPLATE_PATH.exists():
509
+ logger.error(f"Template not found: {TEMPLATE_PATH}")
510
+ return 1
511
+
512
+ if not CONFIG_PATH.exists():
513
+ logger.error(f"Config not found: {CONFIG_PATH}")
514
+ return 1
515
+
516
+ # Load config
517
+ config = load_play_clock_config()
518
+ logger.info(f"Play clock config: {config}")
519
+
520
+ # Initialize scorebug detector
521
+ detector = ScorebugDetector(template_path=str(TEMPLATE_PATH))
522
+
523
+ # Extract test frames
524
+ logger.info(f"Extracting {len(TEST_TIMESTAMPS)} test frames...")
525
+ frames = extract_test_frames(VIDEO_PATH, detector, TEST_TIMESTAMPS)
526
+ logger.info(f"Extracted {len(frames)} frames with scorebug")
527
+
528
+ if not frames:
529
+ logger.error("No frames with scorebug found!")
530
+ return 1
531
+
532
+ # Run benchmark
533
+ run_benchmark(frames, config)
534
+
535
+ return 0
536
+
537
+
538
+ if __name__ == "__main__":
539
+ sys.exit(main())
540
+
scripts/detect_plays.py CHANGED
@@ -95,7 +95,8 @@ def main():
95
  parser.add_argument("--output", type=str, help="Output JSON file path")
96
 
97
  # Processing options
98
- parser.add_argument("--interval", type=float, default=0.1, help="Frame sampling interval in seconds (default: 0.1)")
 
99
  parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
100
 
101
  args = parser.parse_args()
 
95
  parser.add_argument("--output", type=str, help="Output JSON file path")
96
 
97
  # Processing options
98
+ # Play clock only changes once per second, so 0.5s (2 fps) is sufficient and much faster
99
+ parser.add_argument("--interval", type=float, default=0.5, help="Frame sampling interval in seconds (default: 0.5)")
100
  parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
101
 
102
  args = parser.parse_args()
scripts/diagnose_play_clock.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Diagnostic script to visualize play clock region extraction and preprocessing.
4
+
5
+ This script extracts a few frames and saves debug images showing:
6
+ 1. The full frame with scorebug and play clock region highlighted
7
+ 2. The extracted play clock region (raw)
8
+ 3. The preprocessed play clock region (what OCR sees)
9
+
10
+ Usage:
11
+ python scripts/diagnose_play_clock.py
12
+ """
13
+
14
+ import logging
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ import cv2
19
+ import numpy as np
20
+
21
+ # Add src to path for imports
22
+ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
23
+
24
+ # pylint: disable=wrong-import-position
25
+ from detectors import ScorebugDetector, PlayClockReader
26
+
27
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
28
+ logger = logging.getLogger(__name__)
29
+
30
+ # Constants
31
+ VIDEO_PATH = Path(__file__).parent.parent / "full_videos" / "OSU vs Tenn 12.21.24.mkv"
32
+ TEMPLATE_PATH = Path(__file__).parent.parent / "data" / "templates" / "scorebug_template_main.png"
33
+ CONFIG_PATH = Path(__file__).parent.parent / "data" / "config" / "play_clock_region.json"
34
+ OUTPUT_DIR = Path(__file__).parent.parent / "output" / "debug"
35
+
36
+ # Test at 38:40 - a known segment with plays
37
+ TEST_TIMESTAMPS = [2320.0, 2321.0, 2322.0, 2325.0, 2328.0] # Sample timestamps in seconds
38
+
39
+
40
+ def extract_debug_info(video_path: Path, detector: ScorebugDetector, reader: PlayClockReader, timestamps: list) -> None:
41
+ """
42
+ Extract frames and save debug visualizations.
43
+
44
+ Args:
45
+ video_path: Path to video file
46
+ detector: ScorebugDetector instance
47
+ reader: PlayClockReader instance
48
+ timestamps: List of timestamps to analyze
49
+ """
50
+ cap = cv2.VideoCapture(str(video_path))
51
+ if not cap.isOpened():
52
+ raise ValueError("Could not open video: %s" % video_path)
53
+
54
+ fps = cap.get(cv2.CAP_PROP_FPS)
55
+ logger.info("Video FPS: %.2f", fps)
56
+
57
+ OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
58
+
59
+ for timestamp in timestamps:
60
+ # Seek to timestamp
61
+ frame_number = int(timestamp * fps)
62
+ cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
63
+
64
+ ret, frame = cap.read()
65
+ if not ret:
66
+ logger.warning("Could not read frame at %.1fs", timestamp)
67
+ continue
68
+
69
+ # Detect scorebug
70
+ detection = detector.detect(frame)
71
+ if not detection.detected or not detection.bbox:
72
+ logger.warning("No scorebug at %.1fs", timestamp)
73
+ continue
74
+
75
+ sb_x, sb_y, sb_w, sb_h = detection.bbox
76
+ logger.info("Frame %.1fs: Scorebug at (%d, %d, %d, %d) conf=%.2f", timestamp, sb_x, sb_y, sb_w, sb_h, detection.confidence)
77
+
78
+ # Get play clock config
79
+ config = reader.config
80
+ if config is None:
81
+ logger.error("No play clock config loaded")
82
+ continue
83
+
84
+ # Calculate play clock region in absolute coordinates
85
+ pc_x = sb_x + config.x_offset
86
+ pc_y = sb_y + config.y_offset
87
+ pc_w = config.width
88
+ pc_h = config.height
89
+ logger.info("Play clock region: (%d, %d, %d, %d)", pc_x, pc_y, pc_w, pc_h)
90
+
91
+ # Extract play clock region
92
+ play_clock_region = frame[pc_y : pc_y + pc_h, pc_x : pc_x + pc_w].copy()
93
+
94
+ # Preprocess for OCR (same as PlayClockReader)
95
+ preprocessed = preprocess_for_debug(play_clock_region)
96
+
97
+ # Run OCR and get result
98
+ reading = reader.read(frame, detection.bbox)
99
+
100
+ # Create debug visualization
101
+ debug_frame = frame.copy()
102
+
103
+ # Draw scorebug bbox (blue)
104
+ cv2.rectangle(debug_frame, (sb_x, sb_y), (sb_x + sb_w, sb_y + sb_h), (255, 0, 0), 2)
105
+ cv2.putText(debug_frame, "Scorebug", (sb_x, sb_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0), 2)
106
+
107
+ # Draw play clock region (green if detected, red otherwise)
108
+ pc_color = (0, 255, 0) if reading.detected else (0, 0, 255)
109
+ cv2.rectangle(debug_frame, (pc_x, pc_y), (pc_x + pc_w, pc_y + pc_h), pc_color, 2)
110
+
111
+ # Add text showing OCR result
112
+ if reading.detected:
113
+ text = "Clock: %d (%.0f%%)" % (reading.value, reading.confidence * 100)
114
+ else:
115
+ text = "FAILED: '%s'" % reading.raw_text
116
+ cv2.putText(debug_frame, text, (pc_x, pc_y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, pc_color, 2)
117
+
118
+ # Save outputs
119
+ ts_str = "%.0f" % timestamp
120
+
121
+ # Save full debug frame
122
+ cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_full.png" % ts_str)), debug_frame)
123
+
124
+ # Save cropped scorebug region
125
+ scorebug_crop = frame[sb_y : sb_y + sb_h, sb_x : sb_x + sb_w].copy()
126
+ cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_scorebug.png" % ts_str)), scorebug_crop)
127
+
128
+ # Save play clock region (raw and scaled)
129
+ cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_raw.png" % ts_str)), play_clock_region)
130
+
131
+ # Scale up raw for easier viewing
132
+ scaled_raw = cv2.resize(play_clock_region, None, fx=4, fy=4, interpolation=cv2.INTER_NEAREST)
133
+ cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_scaled.png" % ts_str)), scaled_raw)
134
+
135
+ # Save preprocessed (what OCR sees)
136
+ cv2.imwrite(str(OUTPUT_DIR / ("frame_%s_playclock_preprocessed.png" % ts_str)), preprocessed)
137
+
138
+ logger.info("Saved debug images for frame %.1fs", timestamp)
139
+ logger.info(" OCR Result: detected=%s, value=%s, conf=%.2f, raw='%s'", reading.detected, reading.value, reading.confidence, reading.raw_text)
140
+
141
+ cap.release()
142
+
143
+
144
+ def preprocess_for_debug(region: np.ndarray) -> np.ndarray:
145
+ """
146
+ Preprocess the play clock region for OCR (same as PlayClockReader).
147
+ Returns the preprocessed image for debugging.
148
+ """
149
+ # Convert to grayscale
150
+ gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
151
+
152
+ # Scale up by 4x for better OCR accuracy on small digits
153
+ scale_factor = 4
154
+ scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
155
+
156
+ # Use Otsu's thresholding - works better for high-contrast scorebug displays
157
+ _, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
158
+
159
+ # Determine if we need to invert (Tesseract prefers dark text on light background)
160
+ mean_intensity = np.mean(binary)
161
+ if mean_intensity < 128:
162
+ # Image is mostly dark (light digits on dark background) - invert for Tesseract
163
+ binary = cv2.bitwise_not(binary)
164
+
165
+ # Apply morphological operations to clean up noise
166
+ kernel = np.ones((2, 2), np.uint8)
167
+ binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
168
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
169
+
170
+ # Add padding
171
+ padding = 10
172
+ binary = cv2.copyMakeBorder(binary, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
173
+
174
+ return binary
175
+
176
+
177
+ def main():
178
+ """Main entry point for play clock diagnostic."""
179
+ logger.info("Play Clock Diagnostic Tool")
180
+ logger.info("=" * 50)
181
+
182
+ # Verify paths
183
+ if not VIDEO_PATH.exists():
184
+ logger.error("Video not found: %s", VIDEO_PATH)
185
+ return 1
186
+
187
+ if not TEMPLATE_PATH.exists():
188
+ logger.error("Template not found: %s", TEMPLATE_PATH)
189
+ return 1
190
+
191
+ if not CONFIG_PATH.exists():
192
+ logger.error("Config not found: %s", CONFIG_PATH)
193
+ return 1
194
+
195
+ # Initialize
196
+ logger.info("Initializing detectors...")
197
+ detector = ScorebugDetector(template_path=str(TEMPLATE_PATH))
198
+ reader = PlayClockReader(region_config_path=str(CONFIG_PATH))
199
+
200
+ # Run diagnostic
201
+ logger.info("Extracting debug info for %d timestamps...", len(TEST_TIMESTAMPS))
202
+ extract_debug_info(VIDEO_PATH, detector, reader, TEST_TIMESTAMPS)
203
+
204
+ logger.info("Debug images saved to: %s", OUTPUT_DIR)
205
+ logger.info("Diagnostic complete!")
206
+ return 0
207
+
208
+
209
+ if __name__ == "__main__":
210
+ sys.exit(main())
scripts/visualize_detections.py CHANGED
@@ -271,9 +271,130 @@ def create_timeline_image(plays: List[Dict], segment_start: float, segment_end:
271
  logger.info("Timeline saved to: %s", output_path)
272
 
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> None:
275
  """
276
- Generate video clips for each detected play.
277
 
278
  Args:
279
  results: Detection results
@@ -351,6 +472,37 @@ def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: st
351
  logger.info("Clip generation complete!")
352
 
353
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
354
  def main():
355
  """Main entry point."""
356
  parser = argparse.ArgumentParser(description="Visualize play detection results")
@@ -359,6 +511,8 @@ def main():
359
  parser.add_argument("--ground-truth", type=str, help="Path to ground truth JSON file")
360
  parser.add_argument("--video", type=str, help="Path to video file (for clip generation)")
361
  parser.add_argument("--generate-clips", action="store_true", help="Generate video clips for each play")
 
 
362
  parser.add_argument("--output-dir", type=str, help="Output directory for visualizations")
363
 
364
  args = parser.parse_args()
@@ -396,6 +550,7 @@ def main():
396
  create_timeline_image(results.get("plays", []), segment.get("start", 0), segment.get("end", 0), timeline_path)
397
 
398
  # Generate clips if requested
 
399
  if args.generate_clips:
400
  video_path = args.video or str(DEFAULT_VIDEO_PATH)
401
  if not Path(video_path).exists():
@@ -403,7 +558,14 @@ def main():
403
  return 1
404
 
405
  clips_dir = str(Path(output_dir) / "clips")
406
- generate_play_clips(results, video_path, clips_dir)
 
 
 
 
 
 
 
407
 
408
  return 0
409
 
 
271
  logger.info("Timeline saved to: %s", output_path)
272
 
273
 
274
+ def generate_play_clips_ffmpeg(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> Dict[str, float]:
275
+ """
276
+ Generate video clips for each detected play using ffmpeg (much faster than OpenCV).
277
+
278
+ Args:
279
+ results: Detection results
280
+ video_path: Path to source video
281
+ output_dir: Directory to save clips
282
+ padding: Seconds of padding before/after play
283
+
284
+ Returns:
285
+ Dictionary with timing information
286
+ """
287
+ import subprocess
288
+ import time
289
+
290
+ timing = {"clip_extraction": 0.0, "concatenation": 0.0}
291
+
292
+ plays = results.get("plays", [])
293
+ if not plays:
294
+ logger.warning("No plays to generate clips for")
295
+ return timing
296
+
297
+ # Create output directory
298
+ output_path = Path(output_dir)
299
+ output_path.mkdir(parents=True, exist_ok=True)
300
+
301
+ logger.info("Generating %d play clips with ffmpeg...", len(plays))
302
+ clip_paths = []
303
+
304
+ t_start = time.perf_counter()
305
+
306
+ for play in plays:
307
+ play_num = play.get("play_number", 0)
308
+ start_time = max(0, play.get("start_time", 0) - padding)
309
+ end_time = play.get("end_time", 0) + padding
310
+ duration = end_time - start_time
311
+
312
+ # Create output file
313
+ clip_path = output_path / ("play_%02d.mp4" % play_num)
314
+ clip_paths.append(clip_path)
315
+
316
+ # Use ffmpeg for fast extraction
317
+ # -ss before -i for fast seeking, -t for duration
318
+ cmd = [
319
+ "ffmpeg",
320
+ "-y", # Overwrite output
321
+ "-ss",
322
+ str(start_time),
323
+ "-i",
324
+ video_path,
325
+ "-t",
326
+ str(duration),
327
+ "-c:v",
328
+ "libx264", # Re-encode for compatibility
329
+ "-preset",
330
+ "fast",
331
+ "-crf",
332
+ "23",
333
+ "-c:a",
334
+ "aac",
335
+ "-b:a",
336
+ "128k",
337
+ "-loglevel",
338
+ "error",
339
+ str(clip_path),
340
+ ]
341
+
342
+ try:
343
+ subprocess.run(cmd, check=True, capture_output=True)
344
+ logger.info(" Created: %s (%.1fs - %.1fs, duration: %.1fs)", clip_path.name, start_time, end_time, duration)
345
+ except subprocess.CalledProcessError as e:
346
+ logger.error(" Failed to create %s: %s", clip_path.name, e.stderr.decode() if e.stderr else str(e))
347
+
348
+ timing["clip_extraction"] = time.perf_counter() - t_start
349
+ logger.info("Clip extraction complete! (%.2fs)", timing["clip_extraction"])
350
+
351
+ # Concatenate all clips into a single highlight video
352
+ if len(clip_paths) > 1:
353
+ t_start = time.perf_counter()
354
+ concat_path = output_path / "all_plays.mp4"
355
+ logger.info("Concatenating %d clips into %s...", len(clip_paths), concat_path.name)
356
+
357
+ # Create concat file list
358
+ concat_list_path = output_path / "concat_list.txt"
359
+ with open(concat_list_path, "w") as f:
360
+ for clip_path in clip_paths:
361
+ f.write("file '%s'\n" % clip_path.name)
362
+
363
+ # Use ffmpeg concat demuxer
364
+ cmd = [
365
+ "ffmpeg",
366
+ "-y",
367
+ "-f",
368
+ "concat",
369
+ "-safe",
370
+ "0",
371
+ "-i",
372
+ str(concat_list_path),
373
+ "-c",
374
+ "copy", # No re-encoding needed
375
+ "-loglevel",
376
+ "error",
377
+ str(concat_path),
378
+ ]
379
+
380
+ try:
381
+ subprocess.run(cmd, check=True, capture_output=True, cwd=str(output_path))
382
+ logger.info(" Created: %s", concat_path.name)
383
+ except subprocess.CalledProcessError as e:
384
+ logger.error(" Failed to concatenate: %s", e.stderr.decode() if e.stderr else str(e))
385
+
386
+ # Clean up concat list
387
+ concat_list_path.unlink(missing_ok=True)
388
+
389
+ timing["concatenation"] = time.perf_counter() - t_start
390
+ logger.info("Concatenation complete! (%.2fs)", timing["concatenation"])
391
+
392
+ return timing
393
+
394
+
395
  def generate_play_clips(results: Dict[str, Any], video_path: str, output_dir: str, padding: float = 2.0) -> None:
396
  """
397
+ Generate video clips for each detected play (legacy OpenCV version - slow).
398
 
399
  Args:
400
  results: Detection results
 
472
  logger.info("Clip generation complete!")
473
 
474
 
475
+ def print_timing_summary(results: Dict[str, Any], clip_timing: Optional[Dict[str, float]] = None) -> None:
476
+ """Print timing breakdown from detection and clip generation."""
477
+ timing = results.get("timing", {})
478
+
479
+ if not timing and not clip_timing:
480
+ return
481
+
482
+ logger.info("")
483
+ logger.info("=" * 60)
484
+ logger.info("TIMING BREAKDOWN")
485
+ logger.info("=" * 60)
486
+
487
+ total_detection = 0.0
488
+ if timing:
489
+ logger.info("Detection Phase:")
490
+ for section, duration in timing.items():
491
+ logger.info(" %s: %.2fs", section, duration)
492
+ total_detection += duration
493
+ logger.info(" DETECTION TOTAL: %.2fs", total_detection)
494
+
495
+ if clip_timing:
496
+ logger.info("Clip Generation Phase:")
497
+ total_clips = 0.0
498
+ for section, duration in clip_timing.items():
499
+ logger.info(" %s: %.2fs", section, duration)
500
+ total_clips += duration
501
+ logger.info(" CLIP TOTAL: %.2fs", total_clips)
502
+
503
+ logger.info("=" * 60)
504
+
505
+
506
  def main():
507
  """Main entry point."""
508
  parser = argparse.ArgumentParser(description="Visualize play detection results")
 
511
  parser.add_argument("--ground-truth", type=str, help="Path to ground truth JSON file")
512
  parser.add_argument("--video", type=str, help="Path to video file (for clip generation)")
513
  parser.add_argument("--generate-clips", action="store_true", help="Generate video clips for each play")
514
+ parser.add_argument("--use-opencv", action="store_true", help="Use OpenCV instead of ffmpeg for clip generation (slower)")
515
+ parser.add_argument("--padding", type=float, default=2.0, help="Seconds of padding before/after each play (default: 2.0)")
516
  parser.add_argument("--output-dir", type=str, help="Output directory for visualizations")
517
 
518
  args = parser.parse_args()
 
550
  create_timeline_image(results.get("plays", []), segment.get("start", 0), segment.get("end", 0), timeline_path)
551
 
552
  # Generate clips if requested
553
+ clip_timing = None
554
  if args.generate_clips:
555
  video_path = args.video or str(DEFAULT_VIDEO_PATH)
556
  if not Path(video_path).exists():
 
558
  return 1
559
 
560
  clips_dir = str(Path(output_dir) / "clips")
561
+
562
+ if args.use_opencv:
563
+ generate_play_clips(results, video_path, clips_dir, padding=args.padding)
564
+ else:
565
+ clip_timing = generate_play_clips_ffmpeg(results, video_path, clips_dir, padding=args.padding)
566
+
567
+ # Print timing summary
568
+ print_timing_summary(results, clip_timing)
569
 
570
  return 0
571
 
src/detectors/play_clock_reader.py CHANGED
@@ -184,8 +184,9 @@ class PlayClockReader:
184
  Preprocessing steps:
185
  1. Convert to grayscale
186
  2. Scale up for better digit recognition
187
- 3. Apply adaptive thresholding
188
- 4. Invert if needed (Tesseract prefers dark text on light background)
 
189
 
190
  Args:
191
  region: Play clock region (BGR format)
@@ -196,23 +197,31 @@ class PlayClockReader:
196
  # Convert to grayscale
197
  gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
198
 
199
- # Scale up by 3x for better OCR accuracy on small digits
200
- scale_factor = 3
201
- scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_CUBIC)
202
 
203
- # Apply Gaussian blur to reduce noise
204
- blurred = cv2.GaussianBlur(scaled, (3, 3), 0)
205
-
206
- # Apply adaptive thresholding for better handling of varying lighting
207
- binary = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
208
 
209
  # Determine if we need to invert (Tesseract prefers dark text on light background)
210
  # Check if the image is predominantly dark (likely light digits on dark background)
211
  mean_intensity = np.mean(binary)
212
  if mean_intensity < 128:
213
- # Invert so we have dark text on light background
214
  binary = cv2.bitwise_not(binary)
215
 
 
 
 
 
 
 
 
 
 
 
216
  return binary
217
 
218
  def _run_ocr(self, image: np.ndarray) -> Tuple[str, float]:
 
184
  Preprocessing steps:
185
  1. Convert to grayscale
186
  2. Scale up for better digit recognition
187
+ 3. Apply Otsu's thresholding (better for high-contrast scorebug displays)
188
+ 4. Invert to get dark text on light background (Tesseract preference)
189
+ 5. Apply morphological operations to clean up noise
190
 
191
  Args:
192
  region: Play clock region (BGR format)
 
197
  # Convert to grayscale
198
  gray = cv2.cvtColor(region, cv2.COLOR_BGR2GRAY)
199
 
200
+ # Scale up by 4x for better OCR accuracy on small digits
201
+ scale_factor = 4
202
+ scaled = cv2.resize(gray, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LINEAR)
203
 
204
+ # Use Otsu's thresholding - works better for high-contrast scorebug displays
205
+ # The play clock has white digits on a dark background with good contrast
206
+ _, binary = cv2.threshold(scaled, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
 
 
207
 
208
  # Determine if we need to invert (Tesseract prefers dark text on light background)
209
  # Check if the image is predominantly dark (likely light digits on dark background)
210
  mean_intensity = np.mean(binary)
211
  if mean_intensity < 128:
212
+ # Image is mostly dark (light digits on dark background) - invert for Tesseract
213
  binary = cv2.bitwise_not(binary)
214
 
215
+ # Apply morphological operations to clean up noise
216
+ # Use a small kernel to remove small noise while preserving digit shapes
217
+ kernel = np.ones((2, 2), np.uint8)
218
+ binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel) # Fill small holes
219
+ binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # Remove small noise
220
+
221
+ # Add padding around the image - helps Tesseract with edge detection
222
+ padding = 10
223
+ binary = cv2.copyMakeBorder(binary, padding, padding, padding, padding, cv2.BORDER_CONSTANT, value=255)
224
+
225
  return binary
226
 
227
  def _run_ocr(self, image: np.ndarray) -> Tuple[str, float]:
src/detectors/play_state_machine.py CHANGED
@@ -50,6 +50,7 @@ class PlayStateMachine:
50
  Detection Strategy:
51
  - Play START: Detected when play clock resets to 40 (or potentially freezes - needs validation)
52
  - Play END: **Always use backward counting** - calculate from next observed clock value after play
 
53
 
54
  Backward Counting:
55
  When the play clock reappears showing value X (where X < 40), the play end time is:
@@ -62,6 +63,7 @@ class PlayStateMachine:
62
  clock_stable_frames: int = 3 # Frames with same clock value to consider it "stable"
63
  max_play_duration: float = 15.0 # Maximum expected play duration in seconds
64
  scorebug_lost_timeout: float = 30.0 # Seconds before resetting state when scorebug lost
 
65
 
66
  # Internal state
67
  state: PlayState = field(default=PlayState.IDLE)
@@ -77,6 +79,7 @@ class PlayStateMachine:
77
  _current_play_start_clock: Optional[int] = field(default=None)
78
  _last_scorebug_timestamp: Optional[float] = field(default=None)
79
  _direct_end_time: Optional[float] = field(default=None)
 
80
 
81
  def update(self, timestamp: float, scorebug: ScorebugDetection, clock: PlayClockReading) -> Optional[PlayEvent]:
82
  """
@@ -188,20 +191,14 @@ class PlayStateMachine:
188
  self._start_play(timestamp, "clock_reset", self._last_clock_value)
189
  return None
190
 
191
- # Check if clock is stable (same value for multiple frames)
192
  if clock_value == self._last_clock_value:
193
  self._clock_stable_count += 1
194
- # If clock has been stable for a while and value is low, might be a freeze
195
- if self._clock_stable_count >= self.clock_stable_frames and clock_value <= 5:
196
- # Calculate time the clock has been at this value
197
- time_at_value = timestamp - (self._last_clock_timestamp or timestamp)
198
- if time_at_value > 1.0: # More than 1 second at same low value
199
- logger.info("Play START detected at %.1fs (clock frozen at %d for %.1fs)", timestamp, clock_value, time_at_value)
200
- self._start_play(timestamp - time_at_value, "clock_freeze", clock_value)
201
- return None
202
  else:
203
  self._clock_stable_count = 1
204
 
 
 
205
  return None
206
 
207
  def _handle_play_in_progress(self, timestamp: float, clock_value: int) -> Optional[PlayEvent]:
@@ -213,30 +210,52 @@ class PlayStateMachine:
213
  play_duration = timestamp - self._current_play_start_time
214
  if play_duration > self.max_play_duration:
215
  logger.warning("Play duration (%.1fs) exceeded max (%.1fs), forcing end", play_duration, self.max_play_duration)
216
- # Use current timestamp as direct end, but will recalculate with backward counting
217
  self._direct_end_time = timestamp
 
218
  return self._end_play(timestamp, clock_value, "direct_detect")
219
 
220
- # If we see clock at 40, play has definitely ended
 
221
  if clock_value == 40:
222
- logger.info("Play END detected at %.1fs (clock at 40)", timestamp)
223
- self._direct_end_time = timestamp
224
- # Calculate backward: play ended at this time since clock just reset
225
- return self._end_play(timestamp, clock_value, "direct_detect")
226
 
227
- # If we see clock ticking down normally, play has ended and we can backward calculate
228
- if self._last_clock_value is not None and clock_value < self._last_clock_value:
229
- # Clock is counting down - play must have ended, calculate backwards
230
- # End time = current_time - (40 - clock_value)
231
- calculated_end_time = timestamp - (40 - clock_value)
232
- logger.info(
233
- "Play END calculated via backward counting: %.1fs (clock=%d at %.1fs)",
234
- calculated_end_time,
235
- clock_value,
236
- timestamp,
237
- )
238
- self._direct_end_time = None # No direct detection
239
- return self._end_play_with_backward_calc(timestamp, clock_value, calculated_end_time)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  return None
242
 
@@ -276,6 +295,7 @@ class PlayStateMachine:
276
  self._current_play_start_time = timestamp
277
  self._current_play_start_method = method
278
  self._current_play_start_clock = clock_value
 
279
  self.state = PlayState.PLAY_IN_PROGRESS
280
  logger.debug("Play started: time=%.1fs, method=%s, clock=%s", timestamp, method, clock_value)
281
 
@@ -351,6 +371,7 @@ class PlayStateMachine:
351
  self._current_play_start_clock = None
352
  self._direct_end_time = None
353
  self._clock_stable_count = 0
 
354
 
355
  def _reset_state(self) -> None:
356
  """Fully reset state machine."""
 
50
  Detection Strategy:
51
  - Play START: Detected when play clock resets to 40 (or potentially freezes - needs validation)
52
  - Play END: **Always use backward counting** - calculate from next observed clock value after play
53
+ Requires K consecutive descending clock ticks to confirm (avoids false positives)
54
 
55
  Backward Counting:
56
  When the play clock reappears showing value X (where X < 40), the play end time is:
 
63
  clock_stable_frames: int = 3 # Frames with same clock value to consider it "stable"
64
  max_play_duration: float = 15.0 # Maximum expected play duration in seconds
65
  scorebug_lost_timeout: float = 30.0 # Seconds before resetting state when scorebug lost
66
+ required_countdown_ticks: int = 3 # Number of consecutive descending ticks required to confirm play end
67
 
68
  # Internal state
69
  state: PlayState = field(default=PlayState.IDLE)
 
79
  _current_play_start_clock: Optional[int] = field(default=None)
80
  _last_scorebug_timestamp: Optional[float] = field(default=None)
81
  _direct_end_time: Optional[float] = field(default=None)
82
+ _countdown_history: List[tuple] = field(default_factory=list) # List of (timestamp, clock_value) for countdown tracking
83
 
84
  def update(self, timestamp: float, scorebug: ScorebugDetection, clock: PlayClockReading) -> Optional[PlayEvent]:
85
  """
 
191
  self._start_play(timestamp, "clock_reset", self._last_clock_value)
192
  return None
193
 
194
+ # Track clock stability (for potential future use)
195
  if clock_value == self._last_clock_value:
196
  self._clock_stable_count += 1
 
 
 
 
 
 
 
 
197
  else:
198
  self._clock_stable_count = 1
199
 
200
+ # Note: "clock_freeze" detection disabled - was causing false positives
201
+ # The clock_reset detection (going to 40) is the reliable method
202
  return None
203
 
204
  def _handle_play_in_progress(self, timestamp: float, clock_value: int) -> Optional[PlayEvent]:
 
210
  play_duration = timestamp - self._current_play_start_time
211
  if play_duration > self.max_play_duration:
212
  logger.warning("Play duration (%.1fs) exceeded max (%.1fs), forcing end", play_duration, self.max_play_duration)
 
213
  self._direct_end_time = timestamp
214
+ self._countdown_history = [] # Reset countdown tracking
215
  return self._end_play(timestamp, clock_value, "direct_detect")
216
 
217
+ # If clock is still at 40, the play just started and clock hasn't begun countdown yet
218
+ # We need to wait for the clock to drop below 40 before we can detect play end
219
  if clock_value == 40:
220
+ # Clock is still at 40 after reset - waiting for countdown to begin
221
+ logger.debug("Play in progress at %.1fs, clock still at 40", timestamp)
222
+ self._countdown_history = [] # Reset countdown tracking
223
+ return None
224
 
225
+ # Track countdown history for confirming play end
226
+ # We require K consecutive descending ticks to confirm
227
+ self._countdown_history.append((timestamp, clock_value))
228
+
229
+ # Check if we have enough consecutive descending values
230
+ if len(self._countdown_history) >= self.required_countdown_ticks:
231
+ # Get last K readings
232
+ recent = self._countdown_history[-self.required_countdown_ticks :]
233
+ values = [v for _, v in recent]
234
+
235
+ # Check if values are strictly descending (or stable which means same second)
236
+ is_valid_countdown = True
237
+ for i in range(1, len(values)):
238
+ # Allow same value (within same second) or descending
239
+ if values[i] > values[i - 1]:
240
+ is_valid_countdown = False
241
+ break
242
+
243
+ if is_valid_countdown:
244
+ # Use the first reading in our confirmed sequence for backward calculation
245
+ first_timestamp, first_value = recent[0]
246
+ calculated_end_time = first_timestamp - (40 - first_value)
247
+ logger.info(
248
+ "Play END confirmed via %d-tick countdown: %.1fs (clock=%d→%d, observed %.1fs-%.1fs)",
249
+ self.required_countdown_ticks,
250
+ calculated_end_time,
251
+ values[0],
252
+ values[-1],
253
+ recent[0][0],
254
+ recent[-1][0],
255
+ )
256
+ self._direct_end_time = timestamp # When we confirmed the countdown
257
+ self._countdown_history = [] # Reset for next play
258
+ return self._end_play_with_backward_calc(timestamp, first_value, calculated_end_time)
259
 
260
  return None
261
 
 
295
  self._current_play_start_time = timestamp
296
  self._current_play_start_method = method
297
  self._current_play_start_clock = clock_value
298
+ self._countdown_history = [] # Reset countdown tracking for new play
299
  self.state = PlayState.PLAY_IN_PROGRESS
300
  logger.debug("Play started: time=%.1fs, method=%s, clock=%s", timestamp, method, clock_value)
301
 
 
371
  self._current_play_start_clock = None
372
  self._direct_end_time = None
373
  self._clock_stable_count = 0
374
+ self._countdown_history = []
375
 
376
  def _reset_state(self) -> None:
377
  """Fully reset state machine."""
src/detectors/scorebug_detector.py CHANGED
@@ -5,9 +5,11 @@ This module provides functions to detect the presence and location of the scoreb
5
  (score overlay) in video frames.
6
  """
7
 
 
8
  import cv2
9
  import numpy as np
10
  import logging
 
11
  from typing import Optional, Tuple, Dict
12
  from dataclasses import dataclass
13
 
@@ -28,28 +30,59 @@ class ScorebugDetector:
28
  """
29
  Detects the scorebug in video frames.
30
 
31
- The detector uses multiple strategies to identify the scorebug:
32
- 1. Template matching
33
- 2. Color-based detection
34
- 3. Position-based heuristics
 
35
  """
36
 
37
- def __init__(self, template_path: Optional[str] = None, expected_region: Optional[Tuple[int, int, int, int]] = None):
 
 
 
 
 
38
  """
39
  Initialize the scorebug detector.
40
 
41
  Args:
42
  template_path: Path to a template image of the scorebug (optional)
43
- expected_region: Expected region where scorebug appears (x, y, w, h) (optional)
 
44
  """
45
  self.template = None
46
  self.template_path = template_path
47
- self.expected_region = expected_region
 
48
 
49
  if template_path:
50
  self.load_template(template_path)
51
 
52
- logger.info(f"ScorebugDetector initialized (template: {template_path is not None}, region: {expected_region is not None})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def load_template(self, template_path: str) -> None:
55
  """
@@ -69,29 +102,75 @@ class ScorebugDetector:
69
  """
70
  Detect scorebug in a frame.
71
 
 
 
72
  Args:
73
  frame: Input frame (BGR format)
74
 
75
  Returns:
76
  ScorebugDetection object with detection results
77
  """
78
- # Only use template matching - position alone is not sufficient
79
- # The scorebug is NOT present during replays/timeouts even though
80
- # the position may have other graphics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- if self.template is not None:
83
- detection = self._detect_by_template(frame)
84
- if detection.detected:
85
- logger.debug(f"Scorebug detected with confidence {detection.confidence:.2f} using {detection.method}")
86
- return detection
87
 
88
- # If template matching fails, scorebug is NOT present
89
- logger.debug("No scorebug detected")
90
- return ScorebugDetection(detected=False, confidence=0.0, method="none")
91
 
92
- def _detect_by_template(self, frame: np.ndarray) -> ScorebugDetection:
 
93
  """
94
- Detect scorebug using template matching.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  Args:
97
  frame: Input frame
@@ -100,9 +179,9 @@ class ScorebugDetector:
100
  Detection result
101
  """
102
  if self.template is None:
103
- return ScorebugDetection(detected=False, confidence=0.0, method="template")
104
 
105
- # Perform template matching
106
  result = cv2.matchTemplate(frame, self.template, cv2.TM_CCOEFF_NORMED)
107
  min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
108
 
@@ -115,9 +194,64 @@ class ScorebugDetector:
115
  h, w = self.template.shape[:2]
116
  bbox = (max_loc[0], max_loc[1], w, h)
117
 
118
- return ScorebugDetection(detected=True, confidence=float(max_val), bbox=bbox, method="template")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  else:
120
- return ScorebugDetection(detected=False, confidence=float(max_val), method="template")
 
121
 
122
  def _detect_by_position(self, frame: np.ndarray) -> ScorebugDetection:
123
  """
 
5
  (score overlay) in video frames.
6
  """
7
 
8
+ import json
9
  import cv2
10
  import numpy as np
11
  import logging
12
+ from pathlib import Path
13
  from typing import Optional, Tuple, Dict
14
  from dataclasses import dataclass
15
 
 
30
  """
31
  Detects the scorebug in video frames.
32
 
33
+ The detector supports two modes:
34
+ 1. Full-frame search: Template matching across entire frame (slower, use for initial detection)
35
+ 2. Fixed-region check: Only check known location for presence (much faster)
36
+
37
+ For optimal performance, use fixed_region mode after determining scorebug location once.
38
  """
39
 
40
+ def __init__(
41
+ self,
42
+ template_path: Optional[str] = None,
43
+ fixed_region: Optional[Tuple[int, int, int, int]] = None,
44
+ fixed_region_config_path: Optional[str] = None,
45
+ ):
46
  """
47
  Initialize the scorebug detector.
48
 
49
  Args:
50
  template_path: Path to a template image of the scorebug (optional)
51
+ fixed_region: Fixed region where scorebug appears (x, y, w, h) - enables fast mode
52
+ fixed_region_config_path: Path to JSON config with fixed region (alternative to fixed_region)
53
  """
54
  self.template = None
55
  self.template_path = template_path
56
+ self.fixed_region = fixed_region
57
+ self._use_fixed_region = fixed_region is not None
58
 
59
  if template_path:
60
  self.load_template(template_path)
61
 
62
+ # Load fixed region from config file if provided
63
+ if fixed_region_config_path and not fixed_region:
64
+ self._load_fixed_region_config(fixed_region_config_path)
65
+
66
+ mode = "fixed_region" if self._use_fixed_region else "full_search"
67
+ logger.info("ScorebugDetector initialized (template: %s, mode: %s)", template_path is not None, mode)
68
+ if self._use_fixed_region:
69
+ logger.info(" Fixed region: %s", self.fixed_region)
70
+
71
+ def _load_fixed_region_config(self, config_path: str) -> None:
72
+ """Load fixed region from a JSON config file."""
73
+ path = Path(config_path)
74
+ if not path.exists():
75
+ logger.warning("Fixed region config not found: %s", config_path)
76
+ return
77
+
78
+ with open(path, "r", encoding="utf-8") as f:
79
+ data = json.load(f)
80
+
81
+ if "scorebug_region" in data:
82
+ region = data["scorebug_region"]
83
+ self.fixed_region = (region["x"], region["y"], region["width"], region["height"])
84
+ self._use_fixed_region = True
85
+ logger.info("Loaded fixed region from config: %s", self.fixed_region)
86
 
87
  def load_template(self, template_path: str) -> None:
88
  """
 
102
  """
103
  Detect scorebug in a frame.
104
 
105
+ Uses fixed-region mode if configured (much faster), otherwise searches entire frame.
106
+
107
  Args:
108
  frame: Input frame (BGR format)
109
 
110
  Returns:
111
  ScorebugDetection object with detection results
112
  """
113
+ if self.template is None:
114
+ logger.debug("No template loaded, cannot detect scorebug")
115
+ return ScorebugDetection(detected=False, confidence=0.0, method="none")
116
+
117
+ # Use fixed-region mode if configured (much faster - only checks known location)
118
+ if self._use_fixed_region and self.fixed_region is not None:
119
+ detection = self._detect_in_fixed_region(frame)
120
+ else:
121
+ # Full-frame template matching (slower, searches entire frame)
122
+ detection = self._detect_by_template_fullsearch(frame)
123
+
124
+ if detection.detected:
125
+ logger.debug("Scorebug detected with confidence %.2f using %s", detection.confidence, detection.method)
126
+ else:
127
+ logger.debug("No scorebug detected (confidence: %.2f)", detection.confidence)
128
+
129
+ return detection
130
+
131
+ def _detect_in_fixed_region(self, frame: np.ndarray) -> ScorebugDetection:
132
+ """
133
+ Detect scorebug by checking only the fixed known location.
134
 
135
+ This is MUCH faster than full-frame search since we only compare
136
+ the template against a single position.
 
 
 
137
 
138
+ Args:
139
+ frame: Input frame
 
140
 
141
+ Returns:
142
+ Detection result
143
  """
144
+ x, y, w, h = self.fixed_region
145
+ th, tw = self.template.shape[:2]
146
+
147
+ # Validate region bounds
148
+ frame_h, frame_w = frame.shape[:2]
149
+ if x < 0 or y < 0 or x + tw > frame_w or y + th > frame_h:
150
+ logger.warning("Fixed region out of frame bounds")
151
+ return ScorebugDetection(detected=False, confidence=0.0, bbox=self.fixed_region, method="fixed_region")
152
+
153
+ # Extract the region where scorebug should be
154
+ region = frame[y : y + th, x : x + tw]
155
+
156
+ # Compare template to region using normalized cross-correlation
157
+ # This is much faster than matchTemplate on full frame since we're only comparing one location
158
+ result = cv2.matchTemplate(region, self.template, cv2.TM_CCOEFF_NORMED)
159
+ confidence = float(result[0, 0]) # Single value since region matches template size
160
+
161
+ # Use threshold to determine if scorebug is present
162
+ threshold = 0.8
163
+ if confidence >= threshold:
164
+ return ScorebugDetection(detected=True, confidence=confidence, bbox=(x, y, tw, th), method="fixed_region")
165
+ else:
166
+ return ScorebugDetection(detected=False, confidence=confidence, bbox=(x, y, tw, th), method="fixed_region")
167
+
168
+ def _detect_by_template_fullsearch(self, frame: np.ndarray) -> ScorebugDetection:
169
+ """
170
+ Detect scorebug using full-frame template matching.
171
+
172
+ This searches the entire frame for the template - slower but works
173
+ when scorebug position is unknown.
174
 
175
  Args:
176
  frame: Input frame
 
179
  Detection result
180
  """
181
  if self.template is None:
182
+ return ScorebugDetection(detected=False, confidence=0.0, method="full_search")
183
 
184
+ # Perform template matching across entire frame
185
  result = cv2.matchTemplate(frame, self.template, cv2.TM_CCOEFF_NORMED)
186
  min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
187
 
 
194
  h, w = self.template.shape[:2]
195
  bbox = (max_loc[0], max_loc[1], w, h)
196
 
197
+ return ScorebugDetection(detected=True, confidence=float(max_val), bbox=bbox, method="full_search")
198
+ else:
199
+ return ScorebugDetection(detected=False, confidence=float(max_val), method="full_search")
200
+
201
+ def set_fixed_region(self, region: Tuple[int, int, int, int]) -> None:
202
+ """
203
+ Set a fixed region for fast detection mode.
204
+
205
+ Call this after discovering the scorebug location to switch to fast mode.
206
+
207
+ Args:
208
+ region: (x, y, width, height) of the scorebug location
209
+ """
210
+ self.fixed_region = region
211
+ self._use_fixed_region = True
212
+ logger.info("Fixed region set: %s - now using fast detection mode", region)
213
+
214
+ def save_fixed_region_config(self, config_path: str) -> None:
215
+ """Save the fixed region to a config file for reuse."""
216
+ if self.fixed_region is None:
217
+ logger.warning("No fixed region to save")
218
+ return
219
+
220
+ x, y, w, h = self.fixed_region
221
+ data = {"scorebug_region": {"x": x, "y": y, "width": w, "height": h}}
222
+
223
+ path = Path(config_path)
224
+ path.parent.mkdir(parents=True, exist_ok=True)
225
+ with open(path, "w", encoding="utf-8") as f:
226
+ json.dump(data, f, indent=2)
227
+
228
+ logger.info("Saved fixed region config to: %s", config_path)
229
+
230
+ def discover_and_lock_region(self, frame: np.ndarray) -> bool:
231
+ """
232
+ Discover scorebug location using full search, then lock to fixed region mode.
233
+
234
+ This is useful for the first frame - find the scorebug once, then use
235
+ fast fixed-region mode for all subsequent frames.
236
+
237
+ Args:
238
+ frame: Frame to search
239
+
240
+ Returns:
241
+ True if scorebug was found and region was locked, False otherwise
242
+ """
243
+ # Temporarily disable fixed region to do full search
244
+ old_use_fixed = self._use_fixed_region
245
+ self._use_fixed_region = False
246
+
247
+ detection = self._detect_by_template_fullsearch(frame)
248
+
249
+ if detection.detected and detection.bbox:
250
+ self.set_fixed_region(detection.bbox)
251
+ return True
252
  else:
253
+ self._use_fixed_region = old_use_fixed
254
+ return False
255
 
256
  def _detect_by_position(self, frame: np.ndarray) -> ScorebugDetection:
257
  """
src/pipeline/play_detector.py CHANGED
@@ -10,13 +10,14 @@ This module orchestrates the complete play detection pipeline:
10
 
11
  import json
12
  import logging
 
13
  from dataclasses import dataclass, field
14
  from pathlib import Path
15
  from typing import Optional, List, Dict, Any
16
 
17
  import cv2
18
 
19
- from ..detectors import ScorebugDetector, PlayClockReader, PlayStateMachine, PlayEvent
20
 
21
  logger = logging.getLogger(__name__)
22
 
@@ -30,7 +31,7 @@ class DetectionConfig:
30
  clock_region_config_path: str # Path to play clock region config
31
  start_time: float = 0.0 # Start time in seconds
32
  end_time: Optional[float] = None # End time in seconds (None = full video)
33
- frame_interval: float = 0.1 # Interval between frame samples (seconds)
34
 
35
 
36
  @dataclass
@@ -45,6 +46,7 @@ class DetectionResult:
45
  frames_with_clock: int # Frames where clock was read successfully
46
  plays: List[Dict[str, Any]] = field(default_factory=list) # Detected plays as dicts
47
  stats: Dict[str, Any] = field(default_factory=dict) # Summary statistics
 
48
 
49
 
50
  class PlayDetector:
@@ -128,16 +130,23 @@ class PlayDetector:
128
  start_time = self.config.start_time
129
  end_time = self.config.end_time if self.config.end_time else duration
130
 
131
- # Process frames
132
  stats = {"total_frames": 0, "frames_with_scorebug": 0, "frames_with_clock": 0}
 
 
 
 
133
 
134
  current_time = start_time
135
  while current_time < end_time:
136
- # Seek to current time
 
137
  frame_number = int(current_time * fps)
138
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
139
 
140
  ret, frame = cap.read()
 
 
141
  if not ret:
142
  logger.warning("Could not read frame at %.1fs", current_time)
143
  current_time += self.config.frame_interval
@@ -145,17 +154,44 @@ class PlayDetector:
145
 
146
  stats["total_frames"] += 1
147
 
148
- # Run detection pipeline
149
- self._process_frame(frame, current_time, stats)
 
 
 
 
 
 
 
 
 
 
150
 
151
  # Progress logging every 30 seconds
152
  if stats["total_frames"] % int(30 / self.config.frame_interval) == 0:
153
- logger.info("Progress: %.1fs / %.1fs (%.0f%%), %d plays detected", current_time, end_time, 100 * (current_time - start_time) / (end_time - start_time), len(self.state_machine.get_plays()))
 
 
 
 
 
 
154
 
155
  current_time += self.config.frame_interval
156
 
157
  cap.release()
158
 
 
 
 
 
 
 
 
 
 
 
 
159
  # Build result
160
  plays = self.state_machine.get_plays()
161
  play_stats = self.state_machine.get_stats()
@@ -169,6 +205,7 @@ class PlayDetector:
169
  frames_with_clock=stats["frames_with_clock"],
170
  plays=[self._play_to_dict(p) for p in plays],
171
  stats=play_stats,
 
172
  )
173
 
174
  logger.info("Detection complete!")
@@ -179,7 +216,7 @@ class PlayDetector:
179
 
180
  return result
181
 
182
- def _process_frame(self, frame, timestamp: float, stats: Dict[str, int]) -> None:
183
  """
184
  Process a single frame through the detection pipeline.
185
 
@@ -187,24 +224,35 @@ class PlayDetector:
187
  frame: Video frame (BGR)
188
  timestamp: Current timestamp in seconds
189
  stats: Statistics dictionary to update
 
 
190
  """
191
  # Detect scorebug
 
192
  scorebug = self.scorebug_detector.detect(frame)
 
 
193
 
194
  if scorebug.detected:
195
  stats["frames_with_scorebug"] += 1
196
 
197
- # Read play clock
 
198
  clock = self.clock_reader.read(frame, scorebug.bbox)
 
199
 
200
  if clock.detected:
201
  stats["frames_with_clock"] += 1
202
  else:
203
- # No scorebug - create empty clock reading
204
- clock = self.clock_reader.read(frame, (0, 0, 0, 0)) # Will fail gracefully
 
 
205
 
206
  # Update state machine
 
207
  self.state_machine.update(timestamp, scorebug, clock)
 
208
 
209
  def _play_to_dict(self, play: PlayEvent) -> Dict[str, Any]:
210
  """Convert PlayEvent to dictionary for JSON serialization."""
@@ -240,6 +288,7 @@ class PlayDetector:
240
  "frames_with_scorebug": result.frames_with_scorebug,
241
  "frames_with_clock": result.frames_with_clock,
242
  },
 
243
  "plays": result.plays,
244
  "stats": result.stats,
245
  }
 
10
 
11
  import json
12
  import logging
13
+ import time
14
  from dataclasses import dataclass, field
15
  from pathlib import Path
16
  from typing import Optional, List, Dict, Any
17
 
18
  import cv2
19
 
20
+ from detectors import ScorebugDetector, PlayClockReader, PlayStateMachine, PlayEvent
21
 
22
  logger = logging.getLogger(__name__)
23
 
 
31
  clock_region_config_path: str # Path to play clock region config
32
  start_time: float = 0.0 # Start time in seconds
33
  end_time: Optional[float] = None # End time in seconds (None = full video)
34
+ frame_interval: float = 0.5 # Interval between frame samples (seconds) - 2 fps is sufficient since play clock changes once/sec
35
 
36
 
37
  @dataclass
 
46
  frames_with_clock: int # Frames where clock was read successfully
47
  plays: List[Dict[str, Any]] = field(default_factory=list) # Detected plays as dicts
48
  stats: Dict[str, Any] = field(default_factory=dict) # Summary statistics
49
+ timing: Dict[str, float] = field(default_factory=dict) # Timing breakdown by section
50
 
51
 
52
  class PlayDetector:
 
130
  start_time = self.config.start_time
131
  end_time = self.config.end_time if self.config.end_time else duration
132
 
133
+ # Process frames with timing tracking
134
  stats = {"total_frames": 0, "frames_with_scorebug": 0, "frames_with_clock": 0}
135
+ timing = {"scorebug_detection": 0.0, "playclock_ocr": 0.0, "state_machine": 0.0, "video_io": 0.0}
136
+
137
+ # Flag to track if we've locked the scorebug region
138
+ scorebug_region_locked = False
139
 
140
  current_time = start_time
141
  while current_time < end_time:
142
+ # Seek to current time (video I/O)
143
+ t_io_start = time.perf_counter()
144
  frame_number = int(current_time * fps)
145
  cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
146
 
147
  ret, frame = cap.read()
148
+ timing["video_io"] += time.perf_counter() - t_io_start
149
+
150
  if not ret:
151
  logger.warning("Could not read frame at %.1fs", current_time)
152
  current_time += self.config.frame_interval
 
154
 
155
  stats["total_frames"] += 1
156
 
157
+ # On first successful scorebug detection, lock to fixed region for speed
158
+ if not scorebug_region_locked:
159
+ t_start = time.perf_counter()
160
+ if self.scorebug_detector.discover_and_lock_region(frame):
161
+ scorebug_region_locked = True
162
+ logger.info("Scorebug region locked at %s - using fast detection mode", self.scorebug_detector.fixed_region)
163
+ timing["scorebug_detection"] += time.perf_counter() - t_start
164
+ # Still process this frame normally
165
+ self._process_frame(frame, current_time, stats, timing, skip_scorebug_timing=True)
166
+ else:
167
+ # Run detection pipeline with timing
168
+ self._process_frame(frame, current_time, stats, timing)
169
 
170
  # Progress logging every 30 seconds
171
  if stats["total_frames"] % int(30 / self.config.frame_interval) == 0:
172
+ logger.info(
173
+ "Progress: %.1fs / %.1fs (%.0f%%), %d plays detected",
174
+ current_time,
175
+ end_time,
176
+ 100 * (current_time - start_time) / (end_time - start_time),
177
+ len(self.state_machine.get_plays()),
178
+ )
179
 
180
  current_time += self.config.frame_interval
181
 
182
  cap.release()
183
 
184
+ # Log timing breakdown
185
+ total_time = sum(timing.values())
186
+ logger.info("=" * 50)
187
+ logger.info("TIMING BREAKDOWN")
188
+ logger.info("=" * 50)
189
+ for section, duration in timing.items():
190
+ pct = 100 * duration / total_time if total_time > 0 else 0
191
+ logger.info(" %s: %.2fs (%.1f%%)", section, duration, pct)
192
+ logger.info(" TOTAL: %.2fs", total_time)
193
+ logger.info("=" * 50)
194
+
195
  # Build result
196
  plays = self.state_machine.get_plays()
197
  play_stats = self.state_machine.get_stats()
 
205
  frames_with_clock=stats["frames_with_clock"],
206
  plays=[self._play_to_dict(p) for p in plays],
207
  stats=play_stats,
208
+ timing=timing,
209
  )
210
 
211
  logger.info("Detection complete!")
 
216
 
217
  return result
218
 
219
+ def _process_frame(self, frame, timestamp: float, stats: Dict[str, int], timing: Dict[str, float], skip_scorebug_timing: bool = False) -> None:
220
  """
221
  Process a single frame through the detection pipeline.
222
 
 
224
  frame: Video frame (BGR)
225
  timestamp: Current timestamp in seconds
226
  stats: Statistics dictionary to update
227
+ timing: Timing dictionary to update
228
+ skip_scorebug_timing: If True, don't add to scorebug timing (already counted in region discovery)
229
  """
230
  # Detect scorebug
231
+ t_start = time.perf_counter()
232
  scorebug = self.scorebug_detector.detect(frame)
233
+ if not skip_scorebug_timing:
234
+ timing["scorebug_detection"] += time.perf_counter() - t_start
235
 
236
  if scorebug.detected:
237
  stats["frames_with_scorebug"] += 1
238
 
239
+ # Read play clock (OCR - most expensive operation)
240
+ t_start = time.perf_counter()
241
  clock = self.clock_reader.read(frame, scorebug.bbox)
242
+ timing["playclock_ocr"] += time.perf_counter() - t_start
243
 
244
  if clock.detected:
245
  stats["frames_with_clock"] += 1
246
  else:
247
+ # No scorebug - create empty clock reading (no OCR needed)
248
+ from detectors import PlayClockReading
249
+
250
+ clock = PlayClockReading(detected=False, value=None, confidence=0.0, raw_text="NO_SCOREBUG")
251
 
252
  # Update state machine
253
+ t_start = time.perf_counter()
254
  self.state_machine.update(timestamp, scorebug, clock)
255
+ timing["state_machine"] += time.perf_counter() - t_start
256
 
257
  def _play_to_dict(self, play: PlayEvent) -> Dict[str, Any]:
258
  """Convert PlayEvent to dictionary for JSON serialization."""
 
288
  "frames_with_scorebug": result.frames_with_scorebug,
289
  "frames_with_clock": result.frames_with_clock,
290
  },
291
+ "timing": result.timing,
292
  "plays": result.plays,
293
  "stats": result.stats,
294
  }