|
|
|
|
|
|
|
|
""" |
|
|
Split video into scenes using PySceneDetect. |
|
|
|
|
|
This script provides a command-line interface for splitting videos into scenes using various detection algorithms. |
|
|
It supports multiple detection methods, preview image generation, and customizable parameters for fine-tuning |
|
|
the scene detection process. |
|
|
|
|
|
Basic usage: |
|
|
# Split video using default content-based detection |
|
|
scenes_split.py input.mp4 output_dir/ |
|
|
|
|
|
# Save 3 preview images per scene |
|
|
scenes_split.py input.mp4 output_dir/ --save-images 3 |
|
|
|
|
|
# Process specific duration and filter short scenes |
|
|
scenes_split.py input.mp4 output_dir/ --duration 60s --filter-shorter-than 2s |
|
|
|
|
|
Advanced usage: |
|
|
# Content detection with minimum scene length and frame skip |
|
|
scenes_split.py input.mp4 output_dir/ --detector content --min-scene-length 30 --frame-skip 2 |
|
|
|
|
|
# Use adaptive detection with custom detector and detector parameters |
|
|
scenes_split.py input.mp4 output_dir/ --detector adaptive --threshold 3.0 --adaptive-window 10 |
|
|
""" |
|
|
|
|
|
from enum import Enum |
|
|
from pathlib import Path |
|
|
from typing import List, Optional, Tuple |
|
|
|
|
|
import typer |
|
|
from scenedetect import ( |
|
|
AdaptiveDetector, |
|
|
ContentDetector, |
|
|
HistogramDetector, |
|
|
SceneManager, |
|
|
ThresholdDetector, |
|
|
open_video, |
|
|
) |
|
|
from scenedetect.frame_timecode import FrameTimecode |
|
|
from scenedetect.scene_manager import SceneDetector, write_scene_list_html |
|
|
from scenedetect.scene_manager import save_images as save_scene_images |
|
|
from scenedetect.stats_manager import StatsManager |
|
|
from scenedetect.video_splitter import split_video_ffmpeg |
|
|
|
|
|
app = typer.Typer(no_args_is_help=True, help="Split video into scenes using PySceneDetect.") |
|
|
|
|
|
|
|
|
class DetectorType(str, Enum): |
|
|
"""Available scene detection algorithms.""" |
|
|
|
|
|
CONTENT = "content" |
|
|
ADAPTIVE = "adaptive" |
|
|
THRESHOLD = "threshold" |
|
|
HISTOGRAM = "histogram" |
|
|
|
|
|
|
|
|
def create_detector( |
|
|
detector_type: DetectorType, |
|
|
threshold: Optional[float] = None, |
|
|
min_scene_len: Optional[int] = None, |
|
|
luma_only: Optional[bool] = None, |
|
|
adaptive_window: Optional[int] = None, |
|
|
fade_bias: Optional[float] = None, |
|
|
) -> SceneDetector: |
|
|
"""Create a scene detector based on the specified type and parameters. |
|
|
|
|
|
Args: |
|
|
detector_type: Type of detector to create |
|
|
threshold: Detection threshold (meaning varies by detector) |
|
|
min_scene_len: Minimum scene length in frames |
|
|
luma_only: If True, only use brightness for content detection |
|
|
adaptive_window: Window size for adaptive detection |
|
|
fade_bias: Bias for fade in/out detection (-1.0 to 1.0) |
|
|
|
|
|
Note: Parameters set to None will use the detector's built-in default values. |
|
|
|
|
|
Returns: |
|
|
Configured scene detector instance |
|
|
""" |
|
|
|
|
|
kwargs = {} |
|
|
if threshold is not None: |
|
|
kwargs["threshold"] = threshold |
|
|
|
|
|
if min_scene_len is not None: |
|
|
kwargs["min_scene_len"] = min_scene_len |
|
|
|
|
|
match detector_type: |
|
|
case DetectorType.CONTENT: |
|
|
if luma_only is not None: |
|
|
kwargs["luma_only"] = luma_only |
|
|
return ContentDetector(**kwargs) |
|
|
case DetectorType.ADAPTIVE: |
|
|
if adaptive_window is not None: |
|
|
kwargs["window_width"] = adaptive_window |
|
|
if luma_only is not None: |
|
|
kwargs["luma_only"] = luma_only |
|
|
if "threshold" in kwargs: |
|
|
|
|
|
kwargs["adaptive_threshold"] = kwargs.pop("threshold") |
|
|
return AdaptiveDetector(**kwargs) |
|
|
case DetectorType.THRESHOLD: |
|
|
if fade_bias is not None: |
|
|
kwargs["fade_bias"] = fade_bias |
|
|
return ThresholdDetector(**kwargs) |
|
|
case DetectorType.HISTOGRAM: |
|
|
return HistogramDetector(**kwargs) |
|
|
case _: |
|
|
raise ValueError(f"Unknown detector type: {detector_type}") |
|
|
|
|
|
|
|
|
def validate_output_dir(output_dir: str) -> Path: |
|
|
"""Validate and create output directory if it doesn't exist. |
|
|
|
|
|
Args: |
|
|
output_dir: Path to the output directory |
|
|
|
|
|
Returns: |
|
|
Path object of the validated output directory |
|
|
""" |
|
|
path = Path(output_dir) |
|
|
|
|
|
if path.exists() and not path.is_dir(): |
|
|
raise typer.BadParameter(f"{output_dir} exists but is not a directory") |
|
|
|
|
|
return path |
|
|
|
|
|
|
|
|
def parse_timecode(video: any, time_str: Optional[str]) -> Optional[FrameTimecode]: |
|
|
"""Parse a timecode string into a FrameTimecode object. |
|
|
|
|
|
Supports formats: |
|
|
- Frames: '123' |
|
|
- Seconds: '123s' or '123.45s' |
|
|
- Timecode: '00:02:03' or '00:02:03.456' |
|
|
|
|
|
Args: |
|
|
video: Video object to get framerate from |
|
|
time_str: String to parse, or None |
|
|
|
|
|
Returns: |
|
|
FrameTimecode object or None if input is None |
|
|
""" |
|
|
if time_str is None: |
|
|
return None |
|
|
|
|
|
try: |
|
|
if time_str.endswith("s"): |
|
|
|
|
|
seconds = float(time_str[:-1]) |
|
|
return FrameTimecode(timecode=seconds, fps=video.frame_rate) |
|
|
elif ":" in time_str: |
|
|
|
|
|
return FrameTimecode(timecode=time_str, fps=video.frame_rate) |
|
|
else: |
|
|
|
|
|
return FrameTimecode(timecode=int(time_str), fps=video.frame_rate) |
|
|
except ValueError as e: |
|
|
raise typer.BadParameter( |
|
|
f"Invalid timecode format: {time_str}. Use frames (123), " |
|
|
f"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])", |
|
|
) from e |
|
|
|
|
|
|
|
|
def detect_and_split_scenes( |
|
|
video_path: str, |
|
|
output_dir: Path, |
|
|
detector_type: DetectorType, |
|
|
threshold: Optional[float] = None, |
|
|
min_scene_len: Optional[int] = None, |
|
|
max_scenes: Optional[int] = None, |
|
|
filter_shorter_than: Optional[str] = None, |
|
|
skip_start: Optional[int] = None, |
|
|
skip_end: Optional[int] = None, |
|
|
save_images_per_scene: int = 0, |
|
|
stats_file: Optional[str] = None, |
|
|
luma_only: bool = False, |
|
|
adaptive_window: Optional[int] = None, |
|
|
fade_bias: Optional[float] = None, |
|
|
downscale_factor: Optional[int] = None, |
|
|
frame_skip: int = 0, |
|
|
duration: Optional[str] = None, |
|
|
) -> List[Tuple[FrameTimecode, FrameTimecode]]: |
|
|
"""Detect and split scenes in a video using the specified parameters. |
|
|
|
|
|
Args: |
|
|
video_path: Path to input video. |
|
|
output_dir: Directory to save output split scenes. |
|
|
detector_type: Type of scene detector to use. |
|
|
threshold: Detection threshold. |
|
|
min_scene_len: Minimum scene length in frames. |
|
|
max_scenes: Maximum number of scenes to detect. |
|
|
filter_shorter_than: Filter out scenes shorter than this duration (frames/seconds/timecode) |
|
|
skip_start: Number of frames to skip at start. |
|
|
skip_end: Number of frames to skip at end. |
|
|
save_images_per_scene: Number of images to save per scene (0 to disable). |
|
|
stats_file: Path to save detection statistics (optional). |
|
|
luma_only: Only use brightness for content detection. |
|
|
adaptive_window: Window size for adaptive detection. |
|
|
fade_bias: Bias for fade detection (-1.0 to 1.0). |
|
|
downscale_factor: Factor to downscale frames by during detection. |
|
|
frame_skip: Number of frames to skip (i.e. process every 1 in N+1 frames, |
|
|
where N is frame_skip, processing only 1/N+1 percent of the video, |
|
|
speeding up the detection time at the expense of accuracy). |
|
|
frame_skip must be 0 (the default) when using a StatsManager. |
|
|
duration: How much of the video to process from start position. |
|
|
Can be specified as frames (123), seconds (123s/123.45s), |
|
|
or timecode (HH:MM:SS[.nnn]). |
|
|
|
|
|
Returns: |
|
|
List of detected scenes as (start, end) FrameTimecode pairs. |
|
|
""" |
|
|
|
|
|
video = open_video(video_path, backend="opencv") |
|
|
|
|
|
|
|
|
duration_tc = parse_timecode(video, duration) |
|
|
|
|
|
|
|
|
filter_shorter_than_tc = parse_timecode(video, filter_shorter_than) |
|
|
|
|
|
|
|
|
stats_manager = StatsManager() if stats_file else None |
|
|
scene_manager = SceneManager(stats_manager) |
|
|
|
|
|
|
|
|
if downscale_factor: |
|
|
scene_manager.auto_downscale = False |
|
|
scene_manager.downscale = downscale_factor |
|
|
|
|
|
|
|
|
detector = create_detector( |
|
|
detector_type=detector_type, |
|
|
threshold=threshold, |
|
|
min_scene_len=min_scene_len, |
|
|
luma_only=luma_only, |
|
|
adaptive_window=adaptive_window, |
|
|
fade_bias=fade_bias, |
|
|
) |
|
|
scene_manager.add_detector(detector) |
|
|
|
|
|
|
|
|
typer.echo("Detecting scenes...") |
|
|
scene_manager.detect_scenes( |
|
|
video=video, |
|
|
show_progress=True, |
|
|
frame_skip=frame_skip, |
|
|
duration=duration_tc, |
|
|
) |
|
|
|
|
|
|
|
|
scenes = scene_manager.get_scene_list() |
|
|
|
|
|
|
|
|
if filter_shorter_than_tc: |
|
|
original_count = len(scenes) |
|
|
scenes = [ |
|
|
(start, end) |
|
|
for start, end in scenes |
|
|
if (end.get_frames() - start.get_frames()) >= filter_shorter_than_tc.get_frames() |
|
|
] |
|
|
if len(scenes) < original_count: |
|
|
typer.echo( |
|
|
f"Filtered out {original_count - len(scenes)} scenes shorter " |
|
|
f"than {filter_shorter_than_tc.get_seconds():.1f} seconds " |
|
|
f"({filter_shorter_than_tc.get_frames()} frames)", |
|
|
) |
|
|
|
|
|
|
|
|
if max_scenes and len(scenes) > max_scenes: |
|
|
typer.echo(f"Dropping last {len(scenes) - max_scenes} scenes to meet max_scenes ({max_scenes}) limit") |
|
|
scenes = scenes[:max_scenes] |
|
|
|
|
|
|
|
|
typer.echo(f"Found {len(scenes)} scenes:") |
|
|
for i, (start, end) in enumerate(scenes, 1): |
|
|
typer.echo( |
|
|
f"Scene {i}: {start.get_timecode()} to {end.get_timecode()} " |
|
|
f"({end.get_frames() - start.get_frames()} frames)", |
|
|
) |
|
|
|
|
|
|
|
|
if stats_file: |
|
|
typer.echo(f"Saving detection stats to {stats_file}") |
|
|
stats_manager.save_to_csv(stats_file) |
|
|
|
|
|
|
|
|
typer.echo("Splitting video into scenes...") |
|
|
try: |
|
|
split_video_ffmpeg( |
|
|
input_video_path=video_path, |
|
|
scene_list=scenes, |
|
|
output_dir=output_dir, |
|
|
show_progress=True, |
|
|
) |
|
|
typer.echo(f"Scenes have been saved to: {output_dir}") |
|
|
except Exception as e: |
|
|
raise typer.BadParameter(f"Error splitting video: {e}") from e |
|
|
|
|
|
|
|
|
if save_images_per_scene > 0: |
|
|
typer.echo(f"Saving {save_images_per_scene} preview images per scene...") |
|
|
image_filenames = save_scene_images( |
|
|
scene_list=scenes, |
|
|
video=video, |
|
|
num_images=save_images_per_scene, |
|
|
output_dir=str(output_dir), |
|
|
show_progress=True, |
|
|
) |
|
|
|
|
|
|
|
|
html_path = output_dir / "scene_report.html" |
|
|
write_scene_list_html( |
|
|
output_html_filename=str(html_path), |
|
|
scene_list=scenes, |
|
|
image_filenames=image_filenames, |
|
|
) |
|
|
typer.echo(f"Scene report saved to: {html_path}") |
|
|
|
|
|
return scenes |
|
|
|
|
|
|
|
|
@app.command() |
|
|
def main( |
|
|
video_path: Path = typer.Argument( |
|
|
..., |
|
|
help="Path to the input video file", |
|
|
exists=True, |
|
|
dir_okay=False, |
|
|
), |
|
|
output_dir: str = typer.Argument( |
|
|
..., |
|
|
help="Directory where split scenes will be saved", |
|
|
), |
|
|
detector: DetectorType = typer.Option( |
|
|
DetectorType.CONTENT, |
|
|
help="Scene detection algorithm to use", |
|
|
), |
|
|
threshold: Optional[float] = typer.Option( |
|
|
None, |
|
|
help="Detection threshold (meaning varies by detector)", |
|
|
), |
|
|
max_scenes: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Maximum number of scenes to produce", |
|
|
), |
|
|
min_scene_length: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Minimum scene length during detection. Forces the detector to make scenes at least this many frames. " |
|
|
"This affects scene detection behavior but does not filter out short scenes.", |
|
|
), |
|
|
filter_shorter_than: Optional[str] = typer.Option( |
|
|
None, |
|
|
help="Filter out scenes shorter than this duration. Can be specified as frames (123), " |
|
|
"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn]). These scenes will be detected but not saved.", |
|
|
), |
|
|
skip_start: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Number of frames to skip at the start of the video", |
|
|
), |
|
|
skip_end: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Number of frames to skip at the end of the video", |
|
|
), |
|
|
duration: Optional[str] = typer.Option( |
|
|
None, |
|
|
"-d", |
|
|
help="How much of the video to process. Can be specified as frames (123), " |
|
|
"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])", |
|
|
), |
|
|
save_images: int = typer.Option( |
|
|
0, |
|
|
help="Number of preview images to save per scene (0 to disable)", |
|
|
), |
|
|
stats_file: Optional[str] = typer.Option( |
|
|
None, |
|
|
help="Path to save detection statistics CSV", |
|
|
), |
|
|
luma_only: bool = typer.Option( |
|
|
False, |
|
|
help="Only use brightness for content detection", |
|
|
), |
|
|
adaptive_window: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Window size for adaptive detection", |
|
|
), |
|
|
fade_bias: Optional[float] = typer.Option( |
|
|
None, |
|
|
help="Bias for fade detection (-1.0 to 1.0)", |
|
|
), |
|
|
downscale: Optional[int] = typer.Option( |
|
|
None, |
|
|
help="Factor to downscale frames by during detection", |
|
|
), |
|
|
frame_skip: int = typer.Option( |
|
|
0, |
|
|
help="Number of frames to skip during processing", |
|
|
), |
|
|
) -> None: |
|
|
"""Split video into scenes using PySceneDetect.""" |
|
|
if skip_start or skip_end: |
|
|
typer.echo("Skipping start and end frames is not supported yet.") |
|
|
return |
|
|
|
|
|
|
|
|
output_path = validate_output_dir(output_dir) |
|
|
|
|
|
|
|
|
detect_and_split_scenes( |
|
|
video_path=str(video_path), |
|
|
output_dir=output_path, |
|
|
detector_type=detector, |
|
|
threshold=threshold, |
|
|
min_scene_len=min_scene_length, |
|
|
max_scenes=max_scenes, |
|
|
filter_shorter_than=filter_shorter_than, |
|
|
skip_start=skip_start, |
|
|
skip_end=skip_end, |
|
|
duration=duration, |
|
|
save_images_per_scene=save_images, |
|
|
stats_file=stats_file, |
|
|
luma_only=luma_only, |
|
|
adaptive_window=adaptive_window, |
|
|
fade_bias=fade_bias, |
|
|
downscale_factor=downscale, |
|
|
frame_skip=frame_skip, |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app() |
|
|
|