Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Full video evaluation using template-based play clock reading. | |
| This test: | |
| 1. Loads pre-built digit templates | |
| 2. Runs play detection on the full video using template matching | |
| 3. Compares detected plays against v3 baseline | |
| 4. Reports accuracy metrics | |
| The goal is to verify that template-based clock reading matches or exceeds | |
| OCR-based detection quality while being significantly faster. | |
| Usage: | |
| cd /Users/andytaylor/Documents/Personal/cfb40 | |
| source .venv/bin/activate | |
| python tests/test_digit_templates/test_full_video_evaluation.py | |
| """ | |
| import json | |
| import logging | |
| import sys | |
| import time | |
| from pathlib import Path | |
| from pipeline.play_detector import DetectionConfig, PlayDetector | |
| from setup import DigitTemplateLibrary | |
| from detection import TrackTimeouts | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| # Configuration | |
| VIDEO_PATH = "full_videos/OSU vs Tenn 12.21.24.mkv" | |
| TEMPLATE_PATH = "output/OSU_vs_Tenn_12_21_24_template.png" | |
| PLAYCLOCK_CONFIG_PATH = "output/OSU_vs_Tenn_12_21_24_playclock_config.json" | |
| TIMEOUT_CONFIG_PATH = "output/OSU_vs_Tenn_12_21_24_timeout_config.json" | |
| DIGIT_TEMPLATE_PATH = "output/debug/digit_templates" | |
| V3_BASELINE_PATH = "output/benchmarks/v3_special_plays_baseline.json" | |
| # Scorebug region (from previous sessions) | |
| SCOREBUG_REGION = (128, 975, 1669, 46) | |
| # Minimum play duration filter (same as main pipeline) | |
| MIN_PLAY_DURATION = 3.0 | |
| def load_v3_baseline(): | |
| """Load v3 baseline plays for comparison.""" | |
| with open(V3_BASELINE_PATH, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| return data.get("plays", []) | |
| def find_matching_play(detected_play: dict, baseline_plays: list, tolerance: float = 5.0): | |
| """ | |
| Find a matching play in the baseline. | |
| A match is found if the detected play's start time is within tolerance | |
| of a baseline play's start time. | |
| Args: | |
| detected_play: Detected play dict with start_time | |
| baseline_plays: List of baseline plays | |
| tolerance: Time tolerance in seconds | |
| Returns: | |
| Matching baseline play or None | |
| """ | |
| detected_start = detected_play.get("start_time", 0) | |
| for baseline in baseline_plays: | |
| baseline_start = baseline.get("start_time", 0) | |
| if abs(detected_start - baseline_start) <= tolerance: | |
| return baseline | |
| return None | |
| def compare_results(detected_plays: list, baseline_plays: list): | |
| """ | |
| Compare detected plays against baseline. | |
| Returns dict with: | |
| - true_positives: Plays in both detected and baseline | |
| - false_positives: Plays detected but not in baseline | |
| - false_negatives: Plays in baseline but not detected | |
| - precision, recall, f1 | |
| """ | |
| matched_baseline = set() | |
| true_positives = [] | |
| false_positives = [] | |
| # Find matches for detected plays | |
| for detected in detected_plays: | |
| match = find_matching_play(detected, baseline_plays) | |
| if match: | |
| baseline_idx = baseline_plays.index(match) | |
| if baseline_idx not in matched_baseline: | |
| matched_baseline.add(baseline_idx) | |
| true_positives.append({"detected": detected, "baseline": match}) | |
| else: | |
| # Duplicate match - still counts as FP | |
| false_positives.append(detected) | |
| else: | |
| false_positives.append(detected) | |
| # Find unmatched baseline plays (false negatives) | |
| false_negatives = [bp for i, bp in enumerate(baseline_plays) if i not in matched_baseline] | |
| # Calculate metrics | |
| tp = len(true_positives) | |
| fp = len(false_positives) | |
| fn = len(false_negatives) | |
| precision = tp / (tp + fp) if (tp + fp) > 0 else 0 | |
| recall = tp / (tp + fn) if (tp + fn) > 0 else 0 | |
| f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0 | |
| return { | |
| "true_positives": true_positives, | |
| "false_positives": false_positives, | |
| "false_negatives": false_negatives, | |
| "counts": {"tp": tp, "fp": fp, "fn": fn}, | |
| "metrics": {"precision": precision, "recall": recall, "f1": f1}, | |
| } | |
| def run_full_video_evaluation(): | |
| """Run full video evaluation with template-based clock reading.""" | |
| logger.info("=" * 70) | |
| logger.info("FULL VIDEO EVALUATION: Template-Based Play Clock Reading") | |
| logger.info("=" * 70) | |
| # Check files exist | |
| for path, name in [ | |
| (VIDEO_PATH, "Video"), | |
| (TEMPLATE_PATH, "Scorebug template"), | |
| (PLAYCLOCK_CONFIG_PATH, "Play clock config"), | |
| (DIGIT_TEMPLATE_PATH, "Digit templates"), | |
| (V3_BASELINE_PATH, "V3 baseline"), | |
| ]: | |
| if not Path(path).exists(): | |
| logger.error("%s not found: %s", name, path) | |
| return False | |
| # Load v3 baseline | |
| logger.info("\n[Step 1] Loading v3 baseline...") | |
| baseline_plays = load_v3_baseline() | |
| logger.info("V3 baseline plays: %d", len(baseline_plays)) | |
| # Check digit templates | |
| logger.info("\n[Step 2] Loading digit templates...") | |
| template_library = DigitTemplateLibrary() | |
| if not template_library.load(DIGIT_TEMPLATE_PATH): | |
| logger.error("Failed to load digit templates from %s", DIGIT_TEMPLATE_PATH) | |
| return False | |
| coverage = template_library.get_coverage_status() | |
| logger.info("Template coverage: %d/%d", coverage["total_have"], coverage["total_needed"]) | |
| logger.info(" Ones (center): %s", coverage["ones_center_have"]) | |
| logger.info(" Ones (right): %s", coverage["ones_right_have"]) | |
| logger.info(" Tens (left): %s", coverage["tens_have"]) | |
| logger.info(" Blank: %s", "YES" if coverage["has_blank"] else "NO") | |
| # Create detection config with template path | |
| logger.info("\n[Step 3] Setting up detection pipeline...") | |
| detection_config = DetectionConfig( | |
| video_path=VIDEO_PATH, | |
| template_path=TEMPLATE_PATH, | |
| clock_region_config_path=PLAYCLOCK_CONFIG_PATH, | |
| start_time=0.0, | |
| end_time=None, # Full video | |
| frame_interval=0.5, | |
| use_template_matching=True, | |
| digit_template_path=DIGIT_TEMPLATE_PATH, | |
| ) | |
| # Initialize timeout tracker if config exists | |
| timeout_tracker = None | |
| if Path(TIMEOUT_CONFIG_PATH).exists(): | |
| timeout_tracker = TrackTimeouts(config_path=TIMEOUT_CONFIG_PATH) | |
| logger.info("Timeout tracker initialized") | |
| # Initialize detector | |
| detector = PlayDetector(detection_config, timeout_tracker=timeout_tracker) | |
| # Set fixed scorebug region | |
| detector.scorebug_detector.set_fixed_region(SCOREBUG_REGION) | |
| logger.info("Scorebug region set: %s", SCOREBUG_REGION) | |
| # Run detection | |
| logger.info("\n[Step 4] Running detection on full video...") | |
| logger.info("This may take several minutes...") | |
| start_time = time.time() | |
| result = detector.detect() | |
| elapsed = time.time() - start_time | |
| logger.info("Detection complete in %.1f seconds (%.1f minutes)", elapsed, elapsed / 60) | |
| # Filter short plays | |
| detected_plays = [] | |
| for play in result.plays: | |
| duration = play.get("duration", play.get("end_time", 0) - play.get("start_time", 0)) | |
| if duration >= MIN_PLAY_DURATION: | |
| detected_plays.append(play) | |
| logger.info("Detected plays (after filtering): %d", len(detected_plays)) | |
| # Compare against baseline | |
| logger.info("\n[Step 5] Comparing against v3 baseline...") | |
| comparison = compare_results(detected_plays, baseline_plays) | |
| # Print results | |
| counts = comparison["counts"] | |
| metrics = comparison["metrics"] | |
| logger.info("\n" + "=" * 70) | |
| logger.info("EVALUATION RESULTS") | |
| logger.info("=" * 70) | |
| logger.info("V3 Baseline plays: %d", len(baseline_plays)) | |
| logger.info("Detected plays: %d", len(detected_plays)) | |
| logger.info("") | |
| logger.info("True Positives (matched): %d", counts["tp"]) | |
| logger.info("False Positives (extra): %d", counts["fp"]) | |
| logger.info("False Negatives (missed): %d", counts["fn"]) | |
| logger.info("") | |
| logger.info("Precision: %.1f%% (detected plays that match baseline)", metrics["precision"] * 100) | |
| logger.info("Recall: %.1f%% (baseline plays that were detected)", metrics["recall"] * 100) | |
| logger.info("F1 Score: %.1f%%", metrics["f1"] * 100) | |
| logger.info("") | |
| logger.info("Total processing time: %.1f seconds (%.1f minutes)", elapsed, elapsed / 60) | |
| # Show timing breakdown | |
| if result.timing: | |
| logger.info("\nTiming breakdown:") | |
| for key, value in result.timing.items(): | |
| logger.info(" %s: %.1fs", key, value) | |
| # List false negatives (missed plays) | |
| if comparison["false_negatives"]: | |
| logger.info("\n--- MISSED PLAYS (False Negatives) ---") | |
| for i, play in enumerate(comparison["false_negatives"][:10]): | |
| start = play.get("start_time", 0) | |
| minutes = int(start // 60) | |
| seconds = start % 60 | |
| logger.info(" %d. t=%d:%05.2f (%.1fs)", i + 1, minutes, seconds, start) | |
| if len(comparison["false_negatives"]) > 10: | |
| logger.info(" ... and %d more", len(comparison["false_negatives"]) - 10) | |
| # List false positives (extra detections) | |
| if comparison["false_positives"]: | |
| logger.info("\n--- EXTRA DETECTIONS (False Positives) ---") | |
| for i, play in enumerate(comparison["false_positives"][:10]): | |
| start = play.get("start_time", 0) | |
| minutes = int(start // 60) | |
| seconds = start % 60 | |
| logger.info(" %d. t=%d:%05.2f (%.1fs)", i + 1, minutes, seconds, start) | |
| if len(comparison["false_positives"]) > 10: | |
| logger.info(" ... and %d more", len(comparison["false_positives"]) - 10) | |
| # Check if specifically testing for the missed play at 1:52:06 (6726s) | |
| target_time = 6726.0 # 1:52:06 | |
| found_target = False | |
| for play in detected_plays: | |
| if abs(play.get("start_time", 0) - target_time) <= 10: | |
| found_target = True | |
| logger.info("\n*** MILESTONE: Play near 1:52:06 (6726s) WAS DETECTED! ***") | |
| logger.info(" Start time: %.1fs", play.get("start_time", 0)) | |
| break | |
| if not found_target: | |
| logger.info("\n*** WARNING: Play near 1:52:06 (6726s) was NOT detected ***") | |
| # Save results | |
| output_path = Path("output/benchmarks/template_matching_evaluation.json") | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| results_data = { | |
| "video": VIDEO_PATH, | |
| "method": "template_matching", | |
| "baseline_plays": len(baseline_plays), | |
| "detected_plays": len(detected_plays), | |
| "counts": counts, | |
| "metrics": metrics, | |
| "elapsed_seconds": elapsed, | |
| "timing": result.timing, | |
| "plays": detected_plays, | |
| "false_negatives": [{"start_time": p.get("start_time")} for p in comparison["false_negatives"]], | |
| "false_positives": [{"start_time": p.get("start_time")} for p in comparison["false_positives"]], | |
| } | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| json.dump(results_data, f, indent=2) | |
| logger.info("\nResults saved to: %s", output_path) | |
| # Pass criteria | |
| passed = metrics["recall"] >= 0.95 and metrics["precision"] >= 0.90 | |
| if passed: | |
| logger.info("\nEVALUATION: PASSED (recall >= 95%%, precision >= 90%%)") | |
| else: | |
| logger.info("\nEVALUATION: NEEDS REVIEW (recall=%.1f%%, precision=%.1f%%)", metrics["recall"] * 100, metrics["precision"] * 100) | |
| return passed | |
| if __name__ == "__main__": | |
| success = run_full_video_evaluation() | |
| sys.exit(0 if success else 1) | |