Spaces:
Sleeping
Sleeping
| """ | |
| Benchmark different frame extraction methods to assess performance impact. | |
| Compares: | |
| 1. OpenCV frame-based seeking (CAP_PROP_POS_FRAMES) - current method | |
| 2. OpenCV time-based seeking (CAP_PROP_POS_MSEC) | |
| 3. FFmpeg single-frame extraction (one call per frame) | |
| 4. FFmpeg batch extraction (one call for multiple frames) | |
| 5. OpenCV sequential read with skip | |
| Usage: | |
| python scripts/benchmark_extraction_methods.py | |
| """ | |
| import json | |
| import logging | |
| import os | |
| import subprocess | |
| import sys | |
| import tempfile | |
| import time | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Optional | |
| import cv2 | |
| import numpy as np | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| logger = logging.getLogger(__name__) | |
| def load_texas_config() -> Dict[str, Any]: | |
| """Load the saved config for Texas video.""" | |
| config_path = Path("output/OSU_vs_Texas_01_10_25_config.json") | |
| with open(config_path, "r") as f: | |
| return json.load(f) | |
| # ============================================================================= | |
| # Method 1: OpenCV Frame-Based Seeking (Current Method) | |
| # ============================================================================= | |
| def benchmark_opencv_frame_seeking(video_path: str, timestamps: List[float]) -> Dict[str, Any]: | |
| """ | |
| Benchmark OpenCV's CAP_PROP_POS_FRAMES seeking. | |
| This is the current method used in the pipeline. | |
| """ | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return {"error": "Failed to open video"} | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| frames_extracted = 0 | |
| t_start = time.perf_counter() | |
| for ts in timestamps: | |
| frame_num = int(ts * fps) | |
| cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num) | |
| ret, frame = cap.read() | |
| if ret: | |
| frames_extracted += 1 | |
| t_elapsed = time.perf_counter() - t_start | |
| cap.release() | |
| return { | |
| "method": "OpenCV Frame Seeking", | |
| "frames_requested": len(timestamps), | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / len(timestamps), | |
| "fps": len(timestamps) / t_elapsed, | |
| } | |
| # ============================================================================= | |
| # Method 2: OpenCV Time-Based Seeking | |
| # ============================================================================= | |
| def benchmark_opencv_time_seeking(video_path: str, timestamps: List[float]) -> Dict[str, Any]: | |
| """ | |
| Benchmark OpenCV's CAP_PROP_POS_MSEC seeking. | |
| """ | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return {"error": "Failed to open video"} | |
| frames_extracted = 0 | |
| t_start = time.perf_counter() | |
| for ts in timestamps: | |
| cap.set(cv2.CAP_PROP_POS_MSEC, ts * 1000.0) | |
| ret, frame = cap.read() | |
| if ret: | |
| frames_extracted += 1 | |
| t_elapsed = time.perf_counter() - t_start | |
| cap.release() | |
| return { | |
| "method": "OpenCV Time Seeking", | |
| "frames_requested": len(timestamps), | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / len(timestamps), | |
| "fps": len(timestamps) / t_elapsed, | |
| } | |
| # ============================================================================= | |
| # Method 3: FFmpeg Single Frame Extraction | |
| # ============================================================================= | |
| def benchmark_ffmpeg_single_frame(video_path: str, timestamps: List[float]) -> Dict[str, Any]: | |
| """ | |
| Benchmark FFmpeg extraction, one frame at a time. | |
| This is the slowest FFmpeg approach but most straightforward. | |
| """ | |
| frames_extracted = 0 | |
| t_start = time.perf_counter() | |
| for ts in timestamps: | |
| with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp: | |
| tmp_path = tmp.name | |
| try: | |
| cmd = [ | |
| "ffmpeg", | |
| "-ss", | |
| str(ts), | |
| "-i", | |
| str(video_path), | |
| "-frames:v", | |
| "1", | |
| "-q:v", | |
| "2", | |
| "-loglevel", | |
| "error", | |
| tmp_path, | |
| "-y", | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, timeout=30) | |
| if result.returncode == 0: | |
| frame = cv2.imread(tmp_path) | |
| if frame is not None: | |
| frames_extracted += 1 | |
| finally: | |
| if os.path.exists(tmp_path): | |
| os.remove(tmp_path) | |
| t_elapsed = time.perf_counter() - t_start | |
| return { | |
| "method": "FFmpeg Single Frame", | |
| "frames_requested": len(timestamps), | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / len(timestamps), | |
| "fps": len(timestamps) / t_elapsed, | |
| } | |
| # ============================================================================= | |
| # Method 4: FFmpeg Batch Extraction (select filter) | |
| # ============================================================================= | |
| def benchmark_ffmpeg_batch_select(video_path: str, timestamps: List[float]) -> Dict[str, Any]: | |
| """ | |
| Benchmark FFmpeg batch extraction using select filter. | |
| Extracts all frames in a single ffmpeg call using timestamp expressions. | |
| """ | |
| with tempfile.TemporaryDirectory() as tmp_dir: | |
| t_start = time.perf_counter() | |
| # Build select filter expression for all timestamps | |
| # Use 'between' to select frames near each timestamp (within 0.02s = ~1 frame at 60fps) | |
| tolerance = 0.02 | |
| conditions = [f"between(t,{ts-tolerance},{ts+tolerance})" for ts in timestamps] | |
| select_expr = "+".join(conditions) | |
| cmd = [ | |
| "ffmpeg", | |
| "-i", | |
| str(video_path), | |
| "-vf", | |
| f"select='{select_expr}',setpts=N/TB", | |
| "-vsync", | |
| "vfr", | |
| "-q:v", | |
| "2", | |
| "-loglevel", | |
| "error", | |
| f"{tmp_dir}/frame_%04d.png", | |
| "-y", | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, timeout=120) | |
| t_elapsed = time.perf_counter() - t_start | |
| # Count extracted frames | |
| frames_extracted = len(list(Path(tmp_dir).glob("frame_*.png"))) | |
| return { | |
| "method": "FFmpeg Batch Select", | |
| "frames_requested": len(timestamps), | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / len(timestamps), | |
| "fps": len(timestamps) / t_elapsed, | |
| "note": "Single ffmpeg call with select filter", | |
| } | |
| # ============================================================================= | |
| # Method 5: FFmpeg Segment + Sequential Read | |
| # ============================================================================= | |
| def benchmark_ffmpeg_segment_opencv_read(video_path: str, timestamps: List[float], interval: float) -> Dict[str, Any]: | |
| """ | |
| Benchmark: Extract a video segment with ffmpeg, then read sequentially with OpenCV. | |
| This is a hybrid approach that might give best accuracy with good speed. | |
| """ | |
| if not timestamps: | |
| return {"error": "No timestamps provided"} | |
| start_ts = min(timestamps) - 1.0 # 1 second buffer | |
| end_ts = max(timestamps) + 1.0 | |
| with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp: | |
| tmp_path = tmp.name | |
| try: | |
| t_start = time.perf_counter() | |
| # Extract segment with ffmpeg (accurate seeking) | |
| cmd = [ | |
| "ffmpeg", | |
| "-ss", | |
| str(start_ts), | |
| "-i", | |
| str(video_path), | |
| "-t", | |
| str(end_ts - start_ts), | |
| "-c:v", | |
| "libx264", | |
| "-preset", | |
| "ultrafast", | |
| "-crf", | |
| "18", | |
| "-an", # No audio | |
| "-loglevel", | |
| "error", | |
| tmp_path, | |
| "-y", | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, timeout=120) | |
| if result.returncode != 0: | |
| return {"error": "FFmpeg segment extraction failed"} | |
| t_extract = time.perf_counter() - t_start | |
| # Now read sequentially from the segment | |
| cap = cv2.VideoCapture(tmp_path) | |
| if not cap.isOpened(): | |
| return {"error": "Failed to open extracted segment"} | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| frames_extracted = 0 | |
| # Read frames at the target interval | |
| t_read_start = time.perf_counter() | |
| frame_skip = max(1, int(interval * fps)) | |
| current_time = 0.0 | |
| frame_idx = 0 | |
| while current_time < (end_ts - start_ts): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| # Check if this frame is near any of our target timestamps | |
| actual_video_time = start_ts + current_time | |
| for ts in timestamps: | |
| if abs(actual_video_time - ts) < interval / 2: | |
| frames_extracted += 1 | |
| break | |
| # Skip frames | |
| for _ in range(frame_skip - 1): | |
| cap.grab() | |
| current_time += interval | |
| frame_idx += 1 | |
| cap.release() | |
| t_read = time.perf_counter() - t_read_start | |
| t_elapsed = time.perf_counter() - t_start | |
| finally: | |
| if os.path.exists(tmp_path): | |
| os.remove(tmp_path) | |
| return { | |
| "method": "FFmpeg Segment + OpenCV Read", | |
| "frames_requested": len(timestamps), | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "extraction_time": t_extract, | |
| "read_time": t_read, | |
| "time_per_frame": t_elapsed / len(timestamps), | |
| "fps": len(timestamps) / t_elapsed, | |
| } | |
| # ============================================================================= | |
| # Method 6: OpenCV Sequential Read with Skip (Baseline) | |
| # ============================================================================= | |
| def benchmark_opencv_sequential(video_path: str, start_time: float, num_frames: int, interval: float) -> Dict[str, Any]: | |
| """ | |
| Benchmark OpenCV sequential reading with frame skipping. | |
| This avoids seeking entirely but requires reading from the start of a range. | |
| """ | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return {"error": "Failed to open video"} | |
| fps = cap.get(cv2.CAP_PROP_FPS) | |
| frame_skip = max(1, int(interval * fps)) | |
| t_start = time.perf_counter() | |
| # Seek to start position once | |
| cap.set(cv2.CAP_PROP_POS_MSEC, start_time * 1000.0) | |
| frames_extracted = 0 | |
| for _ in range(num_frames): | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| frames_extracted += 1 | |
| # Skip frames | |
| for _ in range(frame_skip - 1): | |
| cap.grab() | |
| t_elapsed = time.perf_counter() - t_start | |
| cap.release() | |
| return { | |
| "method": "OpenCV Sequential Read", | |
| "frames_requested": num_frames, | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / num_frames, | |
| "fps": num_frames / t_elapsed, | |
| "note": "Single seek + sequential read with skip", | |
| } | |
| # ============================================================================= | |
| # Method 7: FFmpeg pipe to OpenCV (no temp files) | |
| # ============================================================================= | |
| def benchmark_ffmpeg_pipe(video_path: str, start_time: float, duration: float, interval: float) -> Dict[str, Any]: | |
| """ | |
| Benchmark FFmpeg piping raw frames to OpenCV. | |
| This avoids temp files and gives accurate timestamps. | |
| """ | |
| # Get video dimensions first | |
| cap = cv2.VideoCapture(video_path) | |
| if not cap.isOpened(): | |
| return {"error": "Failed to open video"} | |
| width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
| height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
| cap.release() | |
| # Calculate output fps based on interval | |
| output_fps = 1.0 / interval | |
| t_start = time.perf_counter() | |
| cmd = [ | |
| "ffmpeg", | |
| "-ss", | |
| str(start_time), | |
| "-i", | |
| str(video_path), | |
| "-t", | |
| str(duration), | |
| "-vf", | |
| f"fps={output_fps}", | |
| "-f", | |
| "rawvideo", | |
| "-pix_fmt", | |
| "bgr24", | |
| "-loglevel", | |
| "error", | |
| "-", | |
| ] | |
| process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| frame_size = width * height * 3 | |
| frames_extracted = 0 | |
| while True: | |
| raw_frame = process.stdout.read(frame_size) | |
| if len(raw_frame) != frame_size: | |
| break | |
| frame = np.frombuffer(raw_frame, dtype=np.uint8).reshape((height, width, 3)) | |
| frames_extracted += 1 | |
| process.wait() | |
| t_elapsed = time.perf_counter() - t_start | |
| expected_frames = int(duration / interval) | |
| return { | |
| "method": "FFmpeg Pipe to OpenCV", | |
| "frames_requested": expected_frames, | |
| "frames_extracted": frames_extracted, | |
| "total_time": t_elapsed, | |
| "time_per_frame": t_elapsed / max(1, frames_extracted), | |
| "fps": frames_extracted / t_elapsed if t_elapsed > 0 else 0, | |
| "note": "FFmpeg pipes raw frames, no temp files", | |
| } | |
| def main(): | |
| """Run all benchmarks and compare.""" | |
| config = load_texas_config() | |
| video_path = config["video_path"] | |
| logger.info("=" * 80) | |
| logger.info("FRAME EXTRACTION METHOD BENCHMARK") | |
| logger.info("=" * 80) | |
| logger.info("Video: %s", video_path) | |
| logger.info("") | |
| # Test parameters | |
| # Simulate typical pipeline: extract frames every 0.2s over a 60-second segment | |
| interval = 0.2 # seconds between frames | |
| segment_duration = 60.0 # seconds | |
| start_time = 5900.0 # Start in the problem area | |
| num_frames = int(segment_duration / interval) | |
| timestamps = [start_time + (i * interval) for i in range(num_frames)] | |
| logger.info("Test parameters:") | |
| logger.info(" Segment: %.1fs to %.1fs (%.1fs duration)", start_time, start_time + segment_duration, segment_duration) | |
| logger.info(" Interval: %.2fs", interval) | |
| logger.info(" Frames to extract: %d", num_frames) | |
| logger.info("") | |
| results = [] | |
| # Benchmark each method | |
| logger.info("Running benchmarks...") | |
| logger.info("-" * 40) | |
| # 1. Current method: OpenCV frame seeking | |
| logger.info(" Testing OpenCV Frame Seeking...") | |
| r1 = benchmark_opencv_frame_seeking(video_path, timestamps) | |
| results.append(r1) | |
| logger.info(" Done: %.2fs total, %.3fs/frame", r1["total_time"], r1["time_per_frame"]) | |
| # 2. OpenCV time seeking | |
| logger.info(" Testing OpenCV Time Seeking...") | |
| r2 = benchmark_opencv_time_seeking(video_path, timestamps) | |
| results.append(r2) | |
| logger.info(" Done: %.2fs total, %.3fs/frame", r2["total_time"], r2["time_per_frame"]) | |
| # 3. FFmpeg single frame (only test subset - it's slow) | |
| subset_timestamps = timestamps[:20] # Only test 20 frames | |
| logger.info(" Testing FFmpeg Single Frame (20 frames only)...") | |
| r3 = benchmark_ffmpeg_single_frame(video_path, subset_timestamps) | |
| results.append(r3) | |
| logger.info(" Done: %.2fs total, %.3fs/frame", r3["total_time"], r3["time_per_frame"]) | |
| # 4. OpenCV sequential read | |
| logger.info(" Testing OpenCV Sequential Read...") | |
| r4 = benchmark_opencv_sequential(video_path, start_time, num_frames, interval) | |
| results.append(r4) | |
| logger.info(" Done: %.2fs total, %.3fs/frame", r4["total_time"], r4["time_per_frame"]) | |
| # 5. FFmpeg pipe | |
| logger.info(" Testing FFmpeg Pipe to OpenCV...") | |
| r5 = benchmark_ffmpeg_pipe(video_path, start_time, segment_duration, interval) | |
| results.append(r5) | |
| logger.info(" Done: %.2fs total, %.3fs/frame", r5["total_time"], r5["time_per_frame"]) | |
| logger.info("") | |
| logger.info("=" * 80) | |
| logger.info("RESULTS SUMMARY") | |
| logger.info("=" * 80) | |
| logger.info("") | |
| # Sort by time per frame | |
| results_sorted = sorted(results, key=lambda x: x.get("time_per_frame", float("inf"))) | |
| # Find baseline (current method) | |
| baseline_time = r1["time_per_frame"] | |
| logger.info("%-30s %10s %10s %10s %10s", "Method", "Total(s)", "Per Frame", "FPS", "vs Current") | |
| logger.info("-" * 80) | |
| for r in results_sorted: | |
| if "error" in r: | |
| logger.info("%-30s ERROR: %s", r.get("method", "Unknown"), r["error"]) | |
| continue | |
| speedup = baseline_time / r["time_per_frame"] if r["time_per_frame"] > 0 else 0 | |
| speedup_str = f"{speedup:.2f}x" if speedup != 1.0 else "baseline" | |
| logger.info( | |
| "%-30s %10.2f %10.4f %10.1f %10s", | |
| r["method"], | |
| r["total_time"], | |
| r["time_per_frame"], | |
| r["fps"], | |
| speedup_str, | |
| ) | |
| logger.info("") | |
| logger.info("NOTES:") | |
| logger.info(" - 'FFmpeg Single Frame' tested with only 20 frames (would be %.1fs for %d frames)", r3["time_per_frame"] * num_frames, num_frames) | |
| logger.info(" - 'FFmpeg Pipe' gives accurate timestamps AND good performance") | |
| logger.info(" - 'OpenCV Sequential Read' is fastest but requires contiguous segments") | |
| logger.info("") | |
| # Recommendation | |
| fastest_accurate = None | |
| for r in results_sorted: | |
| if r["method"] in ["FFmpeg Pipe to OpenCV", "FFmpeg Segment + OpenCV Read"]: | |
| fastest_accurate = r | |
| break | |
| if fastest_accurate: | |
| speedup = baseline_time / fastest_accurate["time_per_frame"] | |
| logger.info("RECOMMENDATION:") | |
| logger.info(" Use '%s' for accurate VFR handling", fastest_accurate["method"]) | |
| logger.info(" Performance: %.2fx %s than current method", speedup, "faster" if speedup > 1 else "slower") | |
| if __name__ == "__main__": | |
| main() | |