Spaces:
Running
on
Zero
Running
on
Zero
File size: 15,322 Bytes
ebfc6b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
#!/usr/bin/env python3
"""
Split video into scenes using PySceneDetect.
This script provides a command-line interface for splitting videos into scenes using various detection algorithms.
It supports multiple detection methods, preview image generation, and customizable parameters for fine-tuning
the scene detection process.
Basic usage:
# Split video using default content-based detection
scenes_split.py input.mp4 output_dir/
# Save 3 preview images per scene
scenes_split.py input.mp4 output_dir/ --save-images 3
# Process specific duration and filter short scenes
scenes_split.py input.mp4 output_dir/ --duration 60s --filter-shorter-than 2s
Advanced usage:
# Content detection with minimum scene length and frame skip
scenes_split.py input.mp4 output_dir/ --detector content --min-scene-length 30 --frame-skip 2
# Use adaptive detection with custom detector and detector parameters
scenes_split.py input.mp4 output_dir/ --detector adaptive --threshold 3.0 --adaptive-window 10
"""
from enum import Enum
from pathlib import Path
from typing import List, Optional, Tuple
import typer
from scenedetect import (
AdaptiveDetector,
ContentDetector,
HistogramDetector,
SceneManager,
ThresholdDetector,
open_video,
)
from scenedetect.frame_timecode import FrameTimecode
from scenedetect.scene_manager import SceneDetector, write_scene_list_html
from scenedetect.scene_manager import save_images as save_scene_images
from scenedetect.stats_manager import StatsManager
from scenedetect.video_splitter import split_video_ffmpeg
app = typer.Typer(no_args_is_help=True, help="Split video into scenes using PySceneDetect.")
class DetectorType(str, Enum):
"""Available scene detection algorithms."""
CONTENT = "content" # Detects fast cuts using HSV color space
ADAPTIVE = "adaptive" # Detects fast two-phase cuts
THRESHOLD = "threshold" # Detects fast cuts/slow fades in from and out to a given threshold level
HISTOGRAM = "histogram" # Detects based on YUV histogram differences in adjacent frames
def create_detector(
detector_type: DetectorType,
threshold: Optional[float] = None,
min_scene_len: Optional[int] = None,
luma_only: Optional[bool] = None,
adaptive_window: Optional[int] = None,
fade_bias: Optional[float] = None,
) -> SceneDetector:
"""Create a scene detector based on the specified type and parameters.
Args:
detector_type: Type of detector to create
threshold: Detection threshold (meaning varies by detector)
min_scene_len: Minimum scene length in frames
luma_only: If True, only use brightness for content detection
adaptive_window: Window size for adaptive detection
fade_bias: Bias for fade in/out detection (-1.0 to 1.0)
Note: Parameters set to None will use the detector's built-in default values.
Returns:
Configured scene detector instance
"""
# Set common arguments
kwargs = {}
if threshold is not None:
kwargs["threshold"] = threshold
if min_scene_len is not None:
kwargs["min_scene_len"] = min_scene_len
match detector_type:
case DetectorType.CONTENT:
if luma_only is not None:
kwargs["luma_only"] = luma_only
return ContentDetector(**kwargs)
case DetectorType.ADAPTIVE:
if adaptive_window is not None:
kwargs["window_width"] = adaptive_window
if luma_only is not None:
kwargs["luma_only"] = luma_only
if "threshold" in kwargs:
# Special case for adaptive detector which uses different param name
kwargs["adaptive_threshold"] = kwargs.pop("threshold")
return AdaptiveDetector(**kwargs)
case DetectorType.THRESHOLD:
if fade_bias is not None:
kwargs["fade_bias"] = fade_bias
return ThresholdDetector(**kwargs)
case DetectorType.HISTOGRAM:
return HistogramDetector(**kwargs)
case _:
raise ValueError(f"Unknown detector type: {detector_type}")
def validate_output_dir(output_dir: str) -> Path:
"""Validate and create output directory if it doesn't exist.
Args:
output_dir: Path to the output directory
Returns:
Path object of the validated output directory
"""
path = Path(output_dir)
if path.exists() and not path.is_dir():
raise typer.BadParameter(f"{output_dir} exists but is not a directory")
return path
def parse_timecode(video: any, time_str: Optional[str]) -> Optional[FrameTimecode]:
"""Parse a timecode string into a FrameTimecode object.
Supports formats:
- Frames: '123'
- Seconds: '123s' or '123.45s'
- Timecode: '00:02:03' or '00:02:03.456'
Args:
video: Video object to get framerate from
time_str: String to parse, or None
Returns:
FrameTimecode object or None if input is None
"""
if time_str is None:
return None
try:
if time_str.endswith("s"):
# Seconds format
seconds = float(time_str[:-1])
return FrameTimecode(timecode=seconds, fps=video.frame_rate)
elif ":" in time_str:
# Timecode format
return FrameTimecode(timecode=time_str, fps=video.frame_rate)
else:
# Frame number format
return FrameTimecode(timecode=int(time_str), fps=video.frame_rate)
except ValueError as e:
raise typer.BadParameter(
f"Invalid timecode format: {time_str}. Use frames (123), "
f"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])",
) from e
def detect_and_split_scenes( # noqa: PLR0913
video_path: str,
output_dir: Path,
detector_type: DetectorType,
threshold: Optional[float] = None,
min_scene_len: Optional[int] = None,
max_scenes: Optional[int] = None,
filter_shorter_than: Optional[str] = None,
skip_start: Optional[int] = None, # noqa: ARG001
skip_end: Optional[int] = None, # noqa: ARG001
save_images_per_scene: int = 0,
stats_file: Optional[str] = None,
luma_only: bool = False,
adaptive_window: Optional[int] = None,
fade_bias: Optional[float] = None,
downscale_factor: Optional[int] = None,
frame_skip: int = 0,
duration: Optional[str] = None,
) -> List[Tuple[FrameTimecode, FrameTimecode]]:
"""Detect and split scenes in a video using the specified parameters.
Args:
video_path: Path to input video.
output_dir: Directory to save output split scenes.
detector_type: Type of scene detector to use.
threshold: Detection threshold.
min_scene_len: Minimum scene length in frames.
max_scenes: Maximum number of scenes to detect.
filter_shorter_than: Filter out scenes shorter than this duration (frames/seconds/timecode)
skip_start: Number of frames to skip at start.
skip_end: Number of frames to skip at end.
save_images_per_scene: Number of images to save per scene (0 to disable).
stats_file: Path to save detection statistics (optional).
luma_only: Only use brightness for content detection.
adaptive_window: Window size for adaptive detection.
fade_bias: Bias for fade detection (-1.0 to 1.0).
downscale_factor: Factor to downscale frames by during detection.
frame_skip: Number of frames to skip (i.e. process every 1 in N+1 frames,
where N is frame_skip, processing only 1/N+1 percent of the video,
speeding up the detection time at the expense of accuracy).
frame_skip must be 0 (the default) when using a StatsManager.
duration: How much of the video to process from start position.
Can be specified as frames (123), seconds (123s/123.45s),
or timecode (HH:MM:SS[.nnn]).
Returns:
List of detected scenes as (start, end) FrameTimecode pairs.
"""
# Create video stream
video = open_video(video_path, backend="opencv")
# Parse duration if specified
duration_tc = parse_timecode(video, duration)
# Parse filter_shorter_than if specified
filter_shorter_than_tc = parse_timecode(video, filter_shorter_than)
# Initialize scene manager with optional stats manager
stats_manager = StatsManager() if stats_file else None
scene_manager = SceneManager(stats_manager)
# Configure scene manager
if downscale_factor:
scene_manager.auto_downscale = False
scene_manager.downscale = downscale_factor
# Create and add detector
detector = create_detector(
detector_type=detector_type,
threshold=threshold,
min_scene_len=min_scene_len,
luma_only=luma_only,
adaptive_window=adaptive_window,
fade_bias=fade_bias,
)
scene_manager.add_detector(detector)
# Detect scenes
typer.echo("Detecting scenes...")
scene_manager.detect_scenes(
video=video,
show_progress=True,
frame_skip=frame_skip,
duration=duration_tc,
)
# Get scene list
scenes = scene_manager.get_scene_list()
# Filter out scenes that are too short if filter_shorter_than is specified
if filter_shorter_than_tc:
original_count = len(scenes)
scenes = [
(start, end)
for start, end in scenes
if (end.get_frames() - start.get_frames()) >= filter_shorter_than_tc.get_frames()
]
if len(scenes) < original_count:
typer.echo(
f"Filtered out {original_count - len(scenes)} scenes shorter "
f"than {filter_shorter_than_tc.get_seconds():.1f} seconds "
f"({filter_shorter_than_tc.get_frames()} frames)",
)
# Apply max scenes limit if specified
if max_scenes and len(scenes) > max_scenes:
typer.echo(f"Dropping last {len(scenes) - max_scenes} scenes to meet max_scenes ({max_scenes}) limit")
scenes = scenes[:max_scenes]
# Print scene information
typer.echo(f"Found {len(scenes)} scenes:")
for i, (start, end) in enumerate(scenes, 1):
typer.echo(
f"Scene {i}: {start.get_timecode()} to {end.get_timecode()} "
f"({end.get_frames() - start.get_frames()} frames)",
)
# Save stats if requested
if stats_file:
typer.echo(f"Saving detection stats to {stats_file}")
stats_manager.save_to_csv(stats_file)
# Split video into scenes
typer.echo("Splitting video into scenes...")
try:
split_video_ffmpeg(
input_video_path=video_path,
scene_list=scenes,
output_dir=output_dir,
show_progress=True,
)
typer.echo(f"Scenes have been saved to: {output_dir}")
except Exception as e:
raise typer.BadParameter(f"Error splitting video: {e}") from e
# Save preview images if requested
if save_images_per_scene > 0:
typer.echo(f"Saving {save_images_per_scene} preview images per scene...")
image_filenames = save_scene_images(
scene_list=scenes,
video=video,
num_images=save_images_per_scene,
output_dir=str(output_dir),
show_progress=True,
)
# Generate HTML report with scene information and previews
html_path = output_dir / "scene_report.html"
write_scene_list_html(
output_html_filename=str(html_path),
scene_list=scenes,
image_filenames=image_filenames,
)
typer.echo(f"Scene report saved to: {html_path}")
return scenes
@app.command()
def main( # noqa: PLR0913
video_path: Path = typer.Argument( # noqa: B008
...,
help="Path to the input video file",
exists=True,
dir_okay=False,
),
output_dir: str = typer.Argument(
...,
help="Directory where split scenes will be saved",
),
detector: DetectorType = typer.Option( # noqa: B008
DetectorType.CONTENT,
help="Scene detection algorithm to use",
),
threshold: Optional[float] = typer.Option(
None,
help="Detection threshold (meaning varies by detector)",
),
max_scenes: Optional[int] = typer.Option(
None,
help="Maximum number of scenes to produce",
),
min_scene_length: Optional[int] = typer.Option(
None,
help="Minimum scene length during detection. Forces the detector to make scenes at least this many frames. "
"This affects scene detection behavior but does not filter out short scenes.",
),
filter_shorter_than: Optional[str] = typer.Option(
None,
help="Filter out scenes shorter than this duration. Can be specified as frames (123), "
"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn]). These scenes will be detected but not saved.",
),
skip_start: Optional[int] = typer.Option(
None,
help="Number of frames to skip at the start of the video",
),
skip_end: Optional[int] = typer.Option(
None,
help="Number of frames to skip at the end of the video",
),
duration: Optional[str] = typer.Option(
None,
"-d",
help="How much of the video to process. Can be specified as frames (123), "
"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])",
),
save_images: int = typer.Option(
0,
help="Number of preview images to save per scene (0 to disable)",
),
stats_file: Optional[str] = typer.Option(
None,
help="Path to save detection statistics CSV",
),
luma_only: bool = typer.Option(
False,
help="Only use brightness for content detection",
),
adaptive_window: Optional[int] = typer.Option(
None,
help="Window size for adaptive detection",
),
fade_bias: Optional[float] = typer.Option(
None,
help="Bias for fade detection (-1.0 to 1.0)",
),
downscale: Optional[int] = typer.Option(
None,
help="Factor to downscale frames by during detection",
),
frame_skip: int = typer.Option(
0,
help="Number of frames to skip during processing",
),
) -> None:
"""Split video into scenes using PySceneDetect."""
if skip_start or skip_end:
typer.echo("Skipping start and end frames is not supported yet.")
return
# Validate output directory
output_path = validate_output_dir(output_dir)
# Detect and split scenes
detect_and_split_scenes(
video_path=str(video_path),
output_dir=output_path,
detector_type=detector,
threshold=threshold,
min_scene_len=min_scene_length,
max_scenes=max_scenes,
filter_shorter_than=filter_shorter_than,
skip_start=skip_start,
skip_end=skip_end,
duration=duration,
save_images_per_scene=save_images,
stats_file=stats_file,
luma_only=luma_only,
adaptive_window=adaptive_window,
fade_bias=fade_bias,
downscale_factor=downscale,
frame_skip=frame_skip,
)
if __name__ == "__main__":
app()
|