cfb40 / scripts /archive /v2 /diagnose_confidence_distribution.py
andytaylor-smg's picture
adding some v4 stuff
f8f8a6d
#!/usr/bin/env python3
"""
Analyze scorebug confidence distribution to find optimal threshold.
Focuses on:
1. Confidence during Play 9 (when scorebug is "lost")
2. Confidence during known non-scorebug periods (commercials, replays)
3. Finding the optimal threshold that separates true positives from true negatives
"""
import logging
import sys
from pathlib import Path
import cv2
import numpy as np
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# Constants (scripts/archive/ -> project root)
PROJECT_ROOT = Path(__file__).parent.parent.parent
VIDEO_PATH = PROJECT_ROOT / "full_videos" / "OSU vs Tenn 12.21.24.mkv"
TEMPLATE_PATH = PROJECT_ROOT / "data" / "templates" / "scorebug_template_main.png"
# Known fixed region
FIXED_REGION = (96, 961, 1730, 66)
# Segment
START_TIME = 2320.0
END_TIME = 2920.0
FRAME_INTERVAL = 0.5
def get_confidence(cap: cv2.VideoCapture, fps: float, timestamp: float, template: np.ndarray) -> float:
"""Get scorebug detection confidence at a specific timestamp."""
frame_number = int(timestamp * fps)
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
ret, frame = cap.read()
if not ret:
return -1.0
x, y, _, _ = FIXED_REGION
th, tw = template.shape[:2]
region = frame[y : y + th, x : x + tw]
result = cv2.matchTemplate(region, template, cv2.TM_CCOEFF_NORMED)
return float(result[0, 0])
def main():
"""Main entry point."""
logger.info("=" * 70)
logger.info("CONFIDENCE DISTRIBUTION ANALYSIS")
logger.info("=" * 70)
# Load template and video
template = cv2.imread(str(TEMPLATE_PATH))
cap = cv2.VideoCapture(str(VIDEO_PATH))
fps = cap.get(cv2.CAP_PROP_FPS)
# Collect all confidences
logger.info("Collecting confidence values for full segment...")
all_results = []
timestamp = START_TIME
while timestamp < END_TIME:
conf = get_confidence(cap, fps, timestamp, template)
all_results.append((timestamp, conf))
timestamp += FRAME_INTERVAL
all_confidences = [c for _, c in all_results]
# Overall statistics
logger.info("")
logger.info("=" * 70)
logger.info("OVERALL CONFIDENCE DISTRIBUTION")
logger.info("=" * 70)
logger.info("Total frames: %d", len(all_confidences))
logger.info("Min: %.3f", min(all_confidences))
logger.info("Max: %.3f", max(all_confidences))
logger.info("Mean: %.3f", np.mean(all_confidences))
logger.info("Median: %.3f", np.median(all_confidences))
logger.info("Std: %.3f", np.std(all_confidences))
# Histogram buckets
logger.info("")
logger.info("Confidence histogram:")
buckets = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
for i in range(len(buckets) - 1):
low, high = buckets[i], buckets[i + 1]
count = sum(1 for c in all_confidences if low <= c < high)
pct = 100.0 * count / len(all_confidences)
histogram_bar = "#" * int(pct / 2)
logger.info(" [%.1f-%.1f): %4d (%5.1f%%) %s", low, high, count, pct, histogram_bar)
# Focus on Play 9 region (2674-2690s)
logger.info("")
logger.info("=" * 70)
logger.info("PLAY 9 ANALYSIS (2674s - 2690s)")
logger.info("=" * 70)
play9_results = [(ts, conf) for ts, conf in all_results if 2674 <= ts <= 2690]
logger.info("Frame-by-frame confidence:")
for ts, conf in play9_results:
status = "DETECTED" if conf >= 0.6 else "MISSED"
logger.info(" %.1fs: %.3f [%s at 0.6]", ts, conf, status)
play9_confidences = [c for _, c in play9_results]
logger.info("")
logger.info("Play 9 stats:")
logger.info(" Min: %.3f", min(play9_confidences))
logger.info(" Max: %.3f", max(play9_confidences))
logger.info(" Mean: %.3f", np.mean(play9_confidences))
# Find LOW confidence frames (likely true negatives - commercials, replays)
logger.info("")
logger.info("=" * 70)
logger.info("LOW CONFIDENCE FRAMES (likely true negatives)")
logger.info("=" * 70)
# Frames with confidence < 0.5 are likely true negatives
low_conf_results = [(ts, conf) for ts, conf in all_results if conf < 0.5]
logger.info("Frames with confidence < 0.5: %d", len(low_conf_results))
if low_conf_results:
low_confidences = [c for _, c in low_conf_results]
logger.info("Stats for low-confidence frames:")
logger.info(" Min: %.3f", min(low_confidences))
logger.info(" Max: %.3f", max(low_confidences))
logger.info(" Mean: %.3f", np.mean(low_confidences))
logger.info(" Median: %.3f", np.median(low_confidences))
# Show some examples
logger.info("")
logger.info("Sample low-confidence frames:")
for ts, conf in low_conf_results[:20]:
logger.info(" %.1fs: %.3f", ts, conf)
if len(low_conf_results) > 20:
logger.info(" ... and %d more", len(low_conf_results) - 20)
# Analyze the GAP between true positives and true negatives
logger.info("")
logger.info("=" * 70)
logger.info("THRESHOLD ANALYSIS")
logger.info("=" * 70)
# Find the natural gap in the distribution
sorted_conf = sorted(all_confidences)
# Look for largest gap in sorted confidences
max_gap = 0
gap_position = 0
for i in range(1, len(sorted_conf)):
gap = sorted_conf[i] - sorted_conf[i - 1]
if gap > max_gap:
max_gap = gap
gap_position = i
gap_low = sorted_conf[gap_position - 1]
gap_high = sorted_conf[gap_position]
suggested_threshold = (gap_low + gap_high) / 2
logger.info("Largest gap in confidence distribution:")
logger.info(" Gap: %.3f (between %.3f and %.3f)", max_gap, gap_low, gap_high)
logger.info(" Suggested threshold: %.3f", suggested_threshold)
# Show detection counts at various thresholds
logger.info("")
logger.info("Detection counts at various thresholds:")
for threshold in [0.3, 0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.8]:
detected = sum(1 for c in all_confidences if c >= threshold)
pct = 100.0 * detected / len(all_confidences)
marker = " <-- current" if threshold == 0.6 else ""
logger.info(" %.2f: %4d / %4d (%.1f%%)%s", threshold, detected, len(all_confidences), pct, marker)
# What threshold would catch Play 9?
play9_min = min(play9_confidences)
logger.info("")
logger.info("To catch ALL of Play 9, threshold must be <= %.3f", play9_min)
cap.release()
return 0
if __name__ == "__main__":
sys.exit(main())