vae / packages /ltx-trainer /scripts /split_scenes.py

Add files using upload-large-folder tool

a3c20e1 verified 16 days ago

15.3 kB

	#!/usr/bin/env python3

	"""
	Split video into scenes using PySceneDetect.
	This script provides a command-line interface for splitting videos into scenes using various detection algorithms.
	It supports multiple detection methods, preview image generation, and customizable parameters for fine-tuning
	the scene detection process.
	Basic usage:
	# Split video using default content-based detection
	scenes_split.py input.mp4 output_dir/
	# Save 3 preview images per scene
	scenes_split.py input.mp4 output_dir/ --save-images 3
	# Process specific duration and filter short scenes
	scenes_split.py input.mp4 output_dir/ --duration 60s --filter-shorter-than 2s
	Advanced usage:
	# Content detection with minimum scene length and frame skip
	scenes_split.py input.mp4 output_dir/ --detector content --min-scene-length 30 --frame-skip 2
	# Use adaptive detection with custom detector and detector parameters
	scenes_split.py input.mp4 output_dir/ --detector adaptive --threshold 3.0 --adaptive-window 10
	"""

	from enum import Enum
	from pathlib import Path
	from typing import List, Optional, Tuple

	import typer
	from scenedetect import (
	AdaptiveDetector,
	ContentDetector,
	HistogramDetector,
	SceneManager,
	ThresholdDetector,
	open_video,
	)
	from scenedetect.frame_timecode import FrameTimecode
	from scenedetect.scene_manager import SceneDetector, write_scene_list_html
	from scenedetect.scene_manager import save_images as save_scene_images
	from scenedetect.stats_manager import StatsManager
	from scenedetect.video_splitter import split_video_ffmpeg

	app = typer.Typer(no_args_is_help=True, help="Split video into scenes using PySceneDetect.")


	class DetectorType(str, Enum):
	"""Available scene detection algorithms."""

	CONTENT = "content" # Detects fast cuts using HSV color space
	ADAPTIVE = "adaptive" # Detects fast two-phase cuts
	THRESHOLD = "threshold" # Detects fast cuts/slow fades in from and out to a given threshold level
	HISTOGRAM = "histogram" # Detects based on YUV histogram differences in adjacent frames


	def create_detector(
	detector_type: DetectorType,
	threshold: Optional[float] = None,
	min_scene_len: Optional[int] = None,
	luma_only: Optional[bool] = None,
	adaptive_window: Optional[int] = None,
	fade_bias: Optional[float] = None,
	) -> SceneDetector:
	"""Create a scene detector based on the specified type and parameters.
	Args:
	detector_type: Type of detector to create
	threshold: Detection threshold (meaning varies by detector)
	min_scene_len: Minimum scene length in frames
	luma_only: If True, only use brightness for content detection
	adaptive_window: Window size for adaptive detection
	fade_bias: Bias for fade in/out detection (-1.0 to 1.0)
	Note: Parameters set to None will use the detector's built-in default values.
	Returns:
	Configured scene detector instance
	"""
	# Set common arguments
	kwargs = {}
	if threshold is not None:
	kwargs["threshold"] = threshold

	if min_scene_len is not None:
	kwargs["min_scene_len"] = min_scene_len

	match detector_type:
	case DetectorType.CONTENT:
	if luma_only is not None:
	kwargs["luma_only"] = luma_only
	return ContentDetector(**kwargs)
	case DetectorType.ADAPTIVE:
	if adaptive_window is not None:
	kwargs["window_width"] = adaptive_window
	if luma_only is not None:
	kwargs["luma_only"] = luma_only
	if "threshold" in kwargs:
	# Special case for adaptive detector which uses different param name
	kwargs["adaptive_threshold"] = kwargs.pop("threshold")
	return AdaptiveDetector(**kwargs)
	case DetectorType.THRESHOLD:
	if fade_bias is not None:
	kwargs["fade_bias"] = fade_bias
	return ThresholdDetector(**kwargs)
	case DetectorType.HISTOGRAM:
	return HistogramDetector(**kwargs)
	case _:
	raise ValueError(f"Unknown detector type: {detector_type}")


	def validate_output_dir(output_dir: str) -> Path:
	"""Validate and create output directory if it doesn't exist.
	Args:
	output_dir: Path to the output directory
	Returns:
	Path object of the validated output directory
	"""
	path = Path(output_dir)

	if path.exists() and not path.is_dir():
	raise typer.BadParameter(f"{output_dir} exists but is not a directory")

	return path


	def parse_timecode(video: any, time_str: Optional[str]) -> Optional[FrameTimecode]:
	"""Parse a timecode string into a FrameTimecode object.
	Supports formats:
	- Frames: '123'
	- Seconds: '123s' or '123.45s'
	- Timecode: '00:02:03' or '00:02:03.456'
	Args:
	video: Video object to get framerate from
	time_str: String to parse, or None
	Returns:
	FrameTimecode object or None if input is None
	"""
	if time_str is None:
	return None

	try:
	if time_str.endswith("s"):
	# Seconds format
	seconds = float(time_str[:-1])
	return FrameTimecode(timecode=seconds, fps=video.frame_rate)
	elif ":" in time_str:
	# Timecode format
	return FrameTimecode(timecode=time_str, fps=video.frame_rate)
	else:
	# Frame number format
	return FrameTimecode(timecode=int(time_str), fps=video.frame_rate)
	except ValueError as e:
	raise typer.BadParameter(
	f"Invalid timecode format: {time_str}. Use frames (123), "
	f"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])",
	) from e


	def detect_and_split_scenes( # noqa: PLR0913
	video_path: str,
	output_dir: Path,
	detector_type: DetectorType,
	threshold: Optional[float] = None,
	min_scene_len: Optional[int] = None,
	max_scenes: Optional[int] = None,
	filter_shorter_than: Optional[str] = None,
	skip_start: Optional[int] = None, # noqa: ARG001
	skip_end: Optional[int] = None, # noqa: ARG001
	save_images_per_scene: int = 0,
	stats_file: Optional[str] = None,
	luma_only: bool = False,
	adaptive_window: Optional[int] = None,
	fade_bias: Optional[float] = None,
	downscale_factor: Optional[int] = None,
	frame_skip: int = 0,
	duration: Optional[str] = None,
	) -> List[Tuple[FrameTimecode, FrameTimecode]]:
	"""Detect and split scenes in a video using the specified parameters.
	Args:
	video_path: Path to input video.
	output_dir: Directory to save output split scenes.
	detector_type: Type of scene detector to use.
	threshold: Detection threshold.
	min_scene_len: Minimum scene length in frames.
	max_scenes: Maximum number of scenes to detect.
	filter_shorter_than: Filter out scenes shorter than this duration (frames/seconds/timecode)
	skip_start: Number of frames to skip at start.
	skip_end: Number of frames to skip at end.
	save_images_per_scene: Number of images to save per scene (0 to disable).
	stats_file: Path to save detection statistics (optional).
	luma_only: Only use brightness for content detection.
	adaptive_window: Window size for adaptive detection.
	fade_bias: Bias for fade detection (-1.0 to 1.0).
	downscale_factor: Factor to downscale frames by during detection.
	frame_skip: Number of frames to skip (i.e. process every 1 in N+1 frames,
	where N is frame_skip, processing only 1/N+1 percent of the video,
	speeding up the detection time at the expense of accuracy).
	frame_skip must be 0 (the default) when using a StatsManager.
	duration: How much of the video to process from start position.
	Can be specified as frames (123), seconds (123s/123.45s),
	or timecode (HH:MM:SS[.nnn]).
	Returns:
	List of detected scenes as (start, end) FrameTimecode pairs.
	"""
	# Create video stream
	video = open_video(video_path, backend="opencv")

	# Parse duration if specified
	duration_tc = parse_timecode(video, duration)

	# Parse filter_shorter_than if specified
	filter_shorter_than_tc = parse_timecode(video, filter_shorter_than)

	# Initialize scene manager with optional stats manager
	stats_manager = StatsManager() if stats_file else None
	scene_manager = SceneManager(stats_manager)

	# Configure scene manager
	if downscale_factor:
	scene_manager.auto_downscale = False
	scene_manager.downscale = downscale_factor

	# Create and add detector
	detector = create_detector(
	detector_type=detector_type,
	threshold=threshold,
	min_scene_len=min_scene_len,
	luma_only=luma_only,
	adaptive_window=adaptive_window,
	fade_bias=fade_bias,
	)
	scene_manager.add_detector(detector)

	# Detect scenes
	typer.echo("Detecting scenes...")
	scene_manager.detect_scenes(
	video=video,
	show_progress=True,
	frame_skip=frame_skip,
	duration=duration_tc,
	)

	# Get scene list
	scenes = scene_manager.get_scene_list()

	# Filter out scenes that are too short if filter_shorter_than is specified
	if filter_shorter_than_tc:
	original_count = len(scenes)
	scenes = [
	(start, end)
	for start, end in scenes
	if (end.get_frames() - start.get_frames()) >= filter_shorter_than_tc.get_frames()
	]
	if len(scenes) < original_count:
	typer.echo(
	f"Filtered out {original_count - len(scenes)} scenes shorter "
	f"than {filter_shorter_than_tc.get_seconds():.1f} seconds "
	f"({filter_shorter_than_tc.get_frames()} frames)",
	)

	# Apply max scenes limit if specified
	if max_scenes and len(scenes) > max_scenes:
	typer.echo(f"Dropping last {len(scenes) - max_scenes} scenes to meet max_scenes ({max_scenes}) limit")
	scenes = scenes[:max_scenes]

	# Print scene information
	typer.echo(f"Found {len(scenes)} scenes:")
	for i, (start, end) in enumerate(scenes, 1):
	typer.echo(
	f"Scene {i}: {start.get_timecode()} to {end.get_timecode()} "
	f"({end.get_frames() - start.get_frames()} frames)",
	)

	# Save stats if requested
	if stats_file:
	typer.echo(f"Saving detection stats to {stats_file}")
	stats_manager.save_to_csv(stats_file)

	# Split video into scenes
	typer.echo("Splitting video into scenes...")
	try:
	split_video_ffmpeg(
	input_video_path=video_path,
	scene_list=scenes,
	output_dir=output_dir,
	show_progress=True,
	)
	typer.echo(f"Scenes have been saved to: {output_dir}")
	except Exception as e:
	raise typer.BadParameter(f"Error splitting video: {e}") from e

	# Save preview images if requested
	if save_images_per_scene > 0:
	typer.echo(f"Saving {save_images_per_scene} preview images per scene...")
	image_filenames = save_scene_images(
	scene_list=scenes,
	video=video,
	num_images=save_images_per_scene,
	output_dir=str(output_dir),
	show_progress=True,
	)

	# Generate HTML report with scene information and previews
	html_path = output_dir / "scene_report.html"
	write_scene_list_html(
	output_html_filename=str(html_path),
	scene_list=scenes,
	image_filenames=image_filenames,
	)
	typer.echo(f"Scene report saved to: {html_path}")

	return scenes


	@app.command()
	def main( # noqa: PLR0913
	video_path: Path = typer.Argument( # noqa: B008
	...,
	help="Path to the input video file",
	exists=True,
	dir_okay=False,
	),
	output_dir: str = typer.Argument(
	...,
	help="Directory where split scenes will be saved",
	),
	detector: DetectorType = typer.Option( # noqa: B008
	DetectorType.CONTENT,
	help="Scene detection algorithm to use",
	),
	threshold: Optional[float] = typer.Option(
	None,
	help="Detection threshold (meaning varies by detector)",
	),
	max_scenes: Optional[int] = typer.Option(
	None,
	help="Maximum number of scenes to produce",
	),
	min_scene_length: Optional[int] = typer.Option(
	None,
	help="Minimum scene length during detection. Forces the detector to make scenes at least this many frames. "
	"This affects scene detection behavior but does not filter out short scenes.",
	),
	filter_shorter_than: Optional[str] = typer.Option(
	None,
	help="Filter out scenes shorter than this duration. Can be specified as frames (123), "
	"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn]). These scenes will be detected but not saved.",
	),
	skip_start: Optional[int] = typer.Option(
	None,
	help="Number of frames to skip at the start of the video",
	),
	skip_end: Optional[int] = typer.Option(
	None,
	help="Number of frames to skip at the end of the video",
	),
	duration: Optional[str] = typer.Option(
	None,
	"-d",
	help="How much of the video to process. Can be specified as frames (123), "
	"seconds (123s/123.45s), or timecode (HH:MM:SS[.nnn])",
	),
	save_images: int = typer.Option(
	0,
	help="Number of preview images to save per scene (0 to disable)",
	),
	stats_file: Optional[str] = typer.Option(
	None,
	help="Path to save detection statistics CSV",
	),
	luma_only: bool = typer.Option(
	False,
	help="Only use brightness for content detection",
	),
	adaptive_window: Optional[int] = typer.Option(
	None,
	help="Window size for adaptive detection",
	),
	fade_bias: Optional[float] = typer.Option(
	None,
	help="Bias for fade detection (-1.0 to 1.0)",
	),
	downscale: Optional[int] = typer.Option(
	None,
	help="Factor to downscale frames by during detection",
	),
	frame_skip: int = typer.Option(
	0,
	help="Number of frames to skip during processing",
	),
	) -> None:
	"""Split video into scenes using PySceneDetect."""
	if skip_start or skip_end:
	typer.echo("Skipping start and end frames is not supported yet.")
	return

	# Validate output directory
	output_path = validate_output_dir(output_dir)

	# Detect and split scenes
	detect_and_split_scenes(
	video_path=str(video_path),
	output_dir=output_path,
	detector_type=detector,
	threshold=threshold,
	min_scene_len=min_scene_length,
	max_scenes=max_scenes,
	filter_shorter_than=filter_shorter_than,
	skip_start=skip_start,
	skip_end=skip_end,
	duration=duration,
	save_images_per_scene=save_images,
	stats_file=stats_file,
	luma_only=luma_only,
	adaptive_window=adaptive_window,
	fade_bias=fade_bias,
	downscale_factor=downscale,
	frame_skip=frame_skip,
	)


	if __name__ == "__main__":
	app()