Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Benchmark OCR batching strategies for play clock reading. | |
| This script tests different OCR approaches on the 10-minute test clip: | |
| 1. Single-image OCR (current approach) | |
| 2. Batch preprocessing + sequential OCR | |
| 3. Multiprocessing with worker pool | |
| The goal is to identify if batching can reduce the ~48% of time spent on OCR. | |
| Usage: | |
| python scripts/benchmark_ocr_batching.py | |
| """ | |
| import json | |
| import logging | |
| import sys | |
| import time | |
| from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| from typing import List, Tuple, Optional | |
| import cv2 | |
| import numpy as np | |
| from detection import DetectScoreBug | |
| from readers import PlayClockReading | |
| from setup import PlayClockRegionExtractor | |
| # Path reference for constants | |
| PROJECT_ROOT = Path(__file__).parent.parent.parent | |
| # Note: _get_easyocr_reader was removed during migration to template matching | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| # Constants matching main.py testing mode | |
| VIDEO_PATH = PROJECT_ROOT / "full_videos" / "OSU vs Tenn 12.21.24.mkv" | |
| OUTPUT_DIR = PROJECT_ROOT / "output" | |
| TESTING_START_TIME = 38 * 60 + 40 # 38:40 | |
| TESTING_END_TIME = 48 * 60 + 40 # 48:40 (10 minutes) | |
| FRAME_INTERVAL = 0.5 # 2 fps sampling | |
| class FrameData: | |
| """Container for frame data needed for OCR processing.""" | |
| timestamp: float | |
| preprocessed_image: np.ndarray | |
| scorebug_bbox: Tuple[int, int, int, int] | |
| def extract_frames_sequential( | |
| video_path: str, | |
| start_time: float, | |
| end_time: float, | |
| frame_interval: float, | |
| scorebug_detector: DetectScoreBug, | |
| clock_reader: PlayClockRegionExtractor, | |
| ) -> Tuple[List[FrameData], dict]: | |
| """ | |
| Extract and preprocess frames using sequential reading (new optimized approach). | |
| Returns: | |
| Tuple of (list of FrameData, timing dict) | |
| """ | |
| logger.info("Extracting frames from %.1fs to %.1fs (interval=%.2fs)...", start_time, end_time, frame_interval) | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| raise RuntimeError(f"Could not open video: {video_path}") | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| frame_skip = int(frame_interval * fps) | |
| start_frame = int(start_time * fps) | |
| end_frame = int(end_time * fps) | |
| timing = {"video_io": 0.0, "scorebug_detection": 0.0, "preprocessing": 0.0} | |
| frames_data = [] | |
| # Seek to start (only initial seek) | |
| t_start = time.perf_counter() | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame) | |
| timing["video_io"] += time.perf_counter() - t_start | |
| # Lock scorebug region on first detection for speed | |
| scorebug_locked = False | |
| current_frame = start_frame | |
| while current_frame < end_frame: | |
| current_time = current_frame / fps | |
| # Read frame | |
| t_start = time.perf_counter() | |
| ret, frame = cap.read() | |
| timing["video_io"] += time.perf_counter() - t_start | |
| if ret: | |
| # Detect scorebug | |
| t_start = time.perf_counter() | |
| if not scorebug_locked: | |
| if scorebug_detector.discover_and_lock_region(frame): | |
| scorebug_locked = True | |
| logger.info("Scorebug locked at frame %d", current_frame) | |
| scorebug = scorebug_detector.detect(frame) | |
| timing["scorebug_detection"] += time.perf_counter() - t_start | |
| if scorebug.detected: | |
| # Preprocess for OCR | |
| t_start = time.perf_counter() | |
| play_clock_region = clock_reader._extract_region(frame, scorebug.bbox) # pylint: disable=protected-access | |
| if play_clock_region is not None: | |
| preprocessed = clock_reader._preprocess_for_ocr(play_clock_region) # pylint: disable=protected-access | |
| frames_data.append(FrameData(timestamp=current_time, preprocessed_image=preprocessed, scorebug_bbox=scorebug.bbox)) | |
| timing["preprocessing"] += time.perf_counter() - t_start | |
| # Skip frames sequentially | |
| t_start = time.perf_counter() | |
| for _ in range(frame_skip - 1): | |
| cap.grab() | |
| timing["video_io"] += time.perf_counter() - t_start | |
| current_frame += frame_skip | |
| cap.release() | |
| logger.info("Extracted %d preprocessed frames", len(frames_data)) | |
| return frames_data, timing | |
| def benchmark_single_ocr(frames_data: List[FrameData]) -> Tuple[List[PlayClockReading], float]: | |
| """ | |
| Benchmark single-image OCR (current approach). | |
| Returns: | |
| Tuple of (list of readings, total OCR time) | |
| """ | |
| logger.info("Running single-image OCR benchmark on %d frames...", len(frames_data)) | |
| reader = _get_easyocr_reader() | |
| results = [] | |
| total_time = 0.0 | |
| for fd in frames_data: | |
| t_start = time.perf_counter() | |
| ocr_results = reader.readtext(fd.preprocessed_image, allowlist="0123456789", detail=1) | |
| elapsed = time.perf_counter() - t_start | |
| total_time += elapsed | |
| # Parse result | |
| if ocr_results: | |
| best = max(ocr_results, key=lambda x: x[2]) | |
| text, confidence = best[1].strip(), best[2] | |
| else: | |
| text, confidence = "", 0.0 | |
| # Validate as play clock value | |
| try: | |
| value = int(text) if text and 0 <= int(text) <= 40 else None | |
| detected = value is not None | |
| except ValueError: | |
| value, detected = None, False | |
| results.append(PlayClockReading(detected=detected, value=value, confidence=confidence, raw_text=text)) | |
| return results, total_time | |
| def benchmark_batch_readtext(frames_data: List[FrameData], batch_size: int = 16) -> Tuple[List[PlayClockReading], float]: | |
| """ | |
| Benchmark batch OCR using EasyOCR's batch capability. | |
| EasyOCR can process multiple images but uses readtext per image. | |
| This tests if there's any benefit from batching preprocessing/inference. | |
| Returns: | |
| Tuple of (list of readings, total OCR time) | |
| """ | |
| logger.info("Running batch OCR benchmark (batch_size=%d) on %d frames...", batch_size, len(frames_data)) | |
| reader = _get_easyocr_reader() | |
| results = [] | |
| total_time = 0.0 | |
| # Process in batches | |
| for batch_start in range(0, len(frames_data), batch_size): | |
| batch = frames_data[batch_start : batch_start + batch_size] | |
| # Time the entire batch | |
| t_start = time.perf_counter() | |
| batch_results = [] | |
| for fd in batch: | |
| ocr_results = reader.readtext(fd.preprocessed_image, allowlist="0123456789", detail=1) | |
| if ocr_results: | |
| best = max(ocr_results, key=lambda x: x[2]) | |
| text, confidence = best[1].strip(), best[2] | |
| else: | |
| text, confidence = "", 0.0 | |
| batch_results.append((text, confidence)) | |
| elapsed = time.perf_counter() - t_start | |
| total_time += elapsed | |
| # Parse results | |
| for text, confidence in batch_results: | |
| try: | |
| value = int(text) if text and 0 <= int(text) <= 40 else None | |
| detected = value is not None | |
| except ValueError: | |
| value, detected = None, False | |
| results.append(PlayClockReading(detected=detected, value=value, confidence=confidence, raw_text=text)) | |
| return results, total_time | |
| def _ocr_worker_single(preprocessed_image: np.ndarray) -> Tuple[str, float]: | |
| """Worker function for thread pool OCR - processes single image.""" | |
| reader = _get_easyocr_reader() | |
| ocr_results = reader.readtext(preprocessed_image, allowlist="0123456789", detail=1) | |
| if ocr_results: | |
| best = max(ocr_results, key=lambda x: x[2]) | |
| return best[1].strip(), best[2] | |
| return "", 0.0 | |
| def benchmark_threaded_ocr(frames_data: List[FrameData], num_workers: int = 4) -> Tuple[List[PlayClockReading], float]: | |
| """ | |
| Benchmark threaded OCR using ThreadPoolExecutor. | |
| Note: Due to Python GIL, this may not provide speedup for CPU-bound tasks. | |
| However, EasyOCR may release the GIL during inference. | |
| Returns: | |
| Tuple of (list of readings, total OCR time) | |
| """ | |
| logger.info("Running threaded OCR benchmark (workers=%d) on %d frames...", num_workers, len(frames_data)) | |
| # Pre-warm the reader in main thread | |
| _get_easyocr_reader() | |
| results = [] | |
| t_start = time.perf_counter() | |
| with ThreadPoolExecutor(max_workers=num_workers) as executor: | |
| # Submit all tasks | |
| futures = [executor.submit(_ocr_worker_single, fd.preprocessed_image) for fd in frames_data] | |
| # Collect results in order | |
| for future in futures: | |
| text, confidence = future.result() | |
| try: | |
| value = int(text) if text and 0 <= int(text) <= 40 else None | |
| detected = value is not None | |
| except ValueError: | |
| value, detected = None, False | |
| results.append(PlayClockReading(detected=detected, value=value, confidence=confidence, raw_text=text)) | |
| total_time = time.perf_counter() - t_start | |
| return results, total_time | |
| def compute_accuracy(results: List[PlayClockReading]) -> Tuple[int, int, float]: | |
| """Compute detection rate.""" | |
| detected = sum(1 for r in results if r.detected) | |
| total = len(results) | |
| rate = 100 * detected / total if total > 0 else 0 | |
| return detected, total, rate | |
| def main(): | |
| """Run OCR batching benchmarks.""" | |
| logger.info("=" * 60) | |
| logger.info("OCR Batching Benchmark") | |
| logger.info("=" * 60) | |
| # Load configs from testing mode output | |
| config_path = OUTPUT_DIR / "testing_config.json" | |
| playclock_config_path = OUTPUT_DIR / "testing_playclock_config.json" | |
| template_path = OUTPUT_DIR / "testing_template.png" | |
| if not all(p.exists() for p in [config_path, playclock_config_path, template_path]): | |
| logger.error("Missing config files. Run 'python main.py --testing' first to generate them.") | |
| logger.error(" Expected: %s", config_path) | |
| logger.error(" Expected: %s", playclock_config_path) | |
| logger.error(" Expected: %s", template_path) | |
| return 1 | |
| # Load configs | |
| with open(config_path, "r", encoding="utf-8") as f: | |
| config = json.load(f) | |
| # Initialize detectors | |
| scorebug_detector = DetectScoreBug(template_path=str(template_path)) | |
| fixed_region = (config["scorebug_x"], config["scorebug_y"], config["scorebug_width"], config["scorebug_height"]) | |
| scorebug_detector.set_fixed_region(fixed_region) | |
| clock_reader = PlayClockRegionExtractor(region_config_path=str(playclock_config_path)) | |
| # Extract and preprocess all frames | |
| logger.info("\n--- Frame Extraction Phase ---") | |
| frames_data, extract_timing = extract_frames_sequential( | |
| str(VIDEO_PATH), | |
| TESTING_START_TIME, | |
| TESTING_END_TIME, | |
| FRAME_INTERVAL, | |
| scorebug_detector, | |
| clock_reader, | |
| ) | |
| logger.info("Extraction timing:") | |
| for k, v in extract_timing.items(): | |
| logger.info(" %s: %.2fs", k, v) | |
| logger.info(" Total: %.2fs", sum(extract_timing.values())) | |
| if not frames_data: | |
| logger.error("No frames extracted!") | |
| return 1 | |
| # Run benchmarks | |
| logger.info("\n--- OCR Benchmarks ---") | |
| # Benchmark 1: Single-image OCR (baseline) | |
| results_single, time_single = benchmark_single_ocr(frames_data) | |
| det_single, tot_single, rate_single = compute_accuracy(results_single) | |
| logger.info("\n[1] Single-image OCR:") | |
| logger.info(" Time: %.2fs (%.1f ms/frame)", time_single, 1000 * time_single / len(frames_data)) | |
| logger.info(" Detection: %d/%d (%.1f%%)", det_single, tot_single, rate_single) | |
| # Benchmark 2: Batch OCR (same reader, batched calls) | |
| results_batch, time_batch = benchmark_batch_readtext(frames_data, batch_size=16) | |
| det_batch, tot_batch, rate_batch = compute_accuracy(results_batch) | |
| logger.info("\n[2] Batch OCR (batch_size=16):") | |
| logger.info(" Time: %.2fs (%.1f ms/frame)", time_batch, 1000 * time_batch / len(frames_data)) | |
| logger.info(" Detection: %d/%d (%.1f%%)", det_batch, tot_batch, rate_batch) | |
| logger.info(" Speedup vs single: %.2fx", time_single / time_batch if time_batch > 0 else 0) | |
| # Benchmark 3: Threaded OCR (2 workers) | |
| results_thread2, time_thread2 = benchmark_threaded_ocr(frames_data, num_workers=2) | |
| det_thread2, tot_thread2, rate_thread2 = compute_accuracy(results_thread2) | |
| logger.info("\n[3] Threaded OCR (2 workers):") | |
| logger.info(" Time: %.2fs (%.1f ms/frame)", time_thread2, 1000 * time_thread2 / len(frames_data)) | |
| logger.info(" Detection: %d/%d (%.1f%%)", det_thread2, tot_thread2, rate_thread2) | |
| logger.info(" Speedup vs single: %.2fx", time_single / time_thread2 if time_thread2 > 0 else 0) | |
| # Benchmark 4: Threaded OCR (4 workers) | |
| results_thread4, time_thread4 = benchmark_threaded_ocr(frames_data, num_workers=4) | |
| det_thread4, tot_thread4, rate_thread4 = compute_accuracy(results_thread4) | |
| logger.info("\n[4] Threaded OCR (4 workers):") | |
| logger.info(" Time: %.2fs (%.1f ms/frame)", time_thread4, 1000 * time_thread4 / len(frames_data)) | |
| logger.info(" Detection: %d/%d (%.1f%%)", det_thread4, tot_thread4, rate_thread4) | |
| logger.info(" Speedup vs single: %.2fx", time_single / time_thread4 if time_thread4 > 0 else 0) | |
| # Summary | |
| logger.info("\n" + "=" * 60) | |
| logger.info("SUMMARY") | |
| logger.info("=" * 60) | |
| logger.info("Frames processed: %d", len(frames_data)) | |
| logger.info("\nOCR Method | Time (s) | ms/frame | Speedup | Det Rate") | |
| logger.info("-" * 60) | |
| logger.info("Single-image | %8.2f | %8.1f | %7.2fx | %6.1f%%", time_single, 1000 * time_single / len(frames_data), 1.0, rate_single) | |
| logger.info( | |
| "Batch (size=16) | %8.2f | %8.1f | %7.2fx | %6.1f%%", time_batch, 1000 * time_batch / len(frames_data), time_single / time_batch if time_batch > 0 else 0, rate_batch | |
| ) | |
| logger.info( | |
| "Threaded (2 wkrs) | %8.2f | %8.1f | %7.2fx | %6.1f%%", | |
| time_thread2, | |
| 1000 * time_thread2 / len(frames_data), | |
| time_single / time_thread2 if time_thread2 > 0 else 0, | |
| rate_thread2, | |
| ) | |
| logger.info( | |
| "Threaded (4 wkrs) | %8.2f | %8.1f | %7.2fx | %6.1f%%", | |
| time_thread4, | |
| 1000 * time_thread4 / len(frames_data), | |
| time_single / time_thread4 if time_thread4 > 0 else 0, | |
| rate_thread4, | |
| ) | |
| logger.info("=" * 60) | |
| # Recommendations | |
| best_time = min(time_single, time_batch, time_thread2, time_thread4) | |
| if best_time == time_single: | |
| logger.info("\nRecommendation: Keep single-image OCR (no speedup from batching)") | |
| elif best_time == time_batch: | |
| logger.info("\nRecommendation: Use batch OCR with batch_size=16") | |
| elif best_time == time_thread2: | |
| logger.info("\nRecommendation: Use threaded OCR with 2 workers") | |
| else: | |
| logger.info("\nRecommendation: Use threaded OCR with 4 workers") | |
| return 0 | |
| if __name__ == "__main__": | |
| sys.exit(main()) | |