Spaces:

robiul487
/

NCAkit

Sleeping

NCAkit / modules /video_creator /services /libraries /ffmpeg_utils.py

feat: Major optimizations - Quiz dynamic fonts, TTS 1.2x speed, Video stream copy (10x faster), Single API call, Fact Image dynamic fonts, Text Story position fix

ee36c8e 3 months ago

raw

history blame contribute delete

6.71 kB

	import subprocess
	import logging
	from pathlib import Path

	logger = logging.getLogger(__name__)


	class FFmpegUtils:
	"""Utilities for audio and video processing with FFmpeg"""

	@staticmethod
	def save_audio_as_wav(audio_data: bytes, output_path: Path):
	"""
	Save audio data as WAV file (normalized for Whisper)

	Args:
	audio_data: Raw audio bytes (WAV format from TTS)
	output_path: Where to save the normalized WAV
	"""
	logger.debug(f"Saving normalized WAV to {output_path}")

	# Write input data to temp file
	temp_input = output_path.parent / f"temp_{output_path.name}"
	temp_input.write_bytes(audio_data)

	try:
	# Normalize audio for Whisper (16kHz, mono, 16-bit PCM)
	subprocess.run([
	"ffmpeg",
	"-i", str(temp_input),
	"-ar", "16000", # 16kHz sample rate
	"-ac", "1", # Mono
	"-sample_fmt", "s16", # 16-bit PCM
	"-y", # Overwrite
	str(output_path)
	], check=True, capture_output=True)

	logger.debug(f"Saved normalized WAV: {output_path}")
	finally:
	# Clean up temp file
	if temp_input.exists():
	temp_input.unlink()

	@staticmethod
	def save_audio_as_mp3(audio_data: bytes, output_path: Path):
	"""
	Convert audio data to MP3

	Args:
	audio_data: Raw audio bytes (WAV format from TTS)
	output_path: Where to save the MP3
	"""
	logger.debug(f"Converting to MP3: {output_path}")

	# Write input data to temp file
	temp_input = output_path.parent / f"temp_{output_path.name}.wav"
	temp_input.write_bytes(audio_data)

	try:
	# Convert to MP3
	subprocess.run([
	"ffmpeg",
	"-i", str(temp_input),
	"-codec:a", "libmp3lame",
	"-qscale:a", "2", # High quality
	"-y", # Overwrite
	str(output_path)
	], check=True, capture_output=True)

	logger.debug(f"Saved MP3: {output_path}")
	finally:
	if temp_input.exists():
	temp_input.unlink()

	@staticmethod
	def get_video_duration(file_path: Path) -> float:
	"""
	Get duration of video file in seconds using ffprobe

	Args:
	file_path: Path to video file

	Returns:
	Duration in seconds
	"""
	try:
	cmd = [
	"ffprobe",
	"-v", "error",
	"-show_entries", "format=duration",
	"-of", "default=noprint_wrappers=1:nokey=1",
	str(file_path)
	]
	result = subprocess.run(cmd, capture_output=True, text=True, check=True)
	return float(result.stdout.strip())
	except Exception as e:
	logger.error(f"Failed to get video duration for {file_path}: {e}")
	return 0.0

	@staticmethod
	def normalize_video(input_path: Path, output_path: Path):
	"""
	Normalize video to standard format (H.264, 30fps, AAC) to fix seeking/black screen issues.

	Args:
	input_path: Path to source video
	output_path: Path to save normalized video
	"""
	logger.debug(f"Normalizing video: {input_path} -> {output_path}")

	try:
	cmd = [
	"ffmpeg",
	"-i", str(input_path),
	"-c:v", "libx264",
	"-preset", "fast",
	"-r", "30",
	"-c:a", "aac",
	"-pix_fmt", "yuv420p",
	"-y",
	str(output_path)
	]

	subprocess.run(cmd, check=True, capture_output=True)
	logger.debug(f"Normalized video saved to {output_path}")

	except subprocess.CalledProcessError as e:
	logger.error(f"Failed to normalize video {input_path}: {e.stderr.decode()}")
	raise e
	except Exception as e:
	logger.error(f"Error normalizing video {input_path}: {e}")
	raise e

	@staticmethod
	def cut_video(input_path: Path, output_path: Path, start_time: float, duration: float):
	"""
	Cut a segment from a video file using FFmpeg.
	Uses stream copy for 10x faster cutting (no re-encoding).
	Audio is removed since TTS is used separately.

	Args:
	input_path: Source video
	output_path: Destination for the segment
	start_time: Start time in seconds
	duration: Duration of the segment in seconds
	"""
	try:
	cmd = [
	"ffmpeg",
	"-ss", str(start_time), # Seek to start (before -i for fast seeking)
	"-i", str(input_path),
	"-t", str(duration),
	"-c:v", "copy", # Stream copy - no re-encode (10x faster!)
	"-an", # Remove audio (TTS is used)
	"-y",
	str(output_path)
	]

	subprocess.run(cmd, check=True, capture_output=True)

	except subprocess.CalledProcessError as e:
	logger.error(f"Failed to cut video {input_path}: {e.stderr.decode()}")
	raise e

	@staticmethod
	def image_to_video(input_path: Path, output_path: Path, duration: float):
	"""
	Convert image to video of specific duration

	Args:
	input_path: Path to source image (jpg, png, etc.)
	output_path: Path to save the output video
	duration: Duration of the video in seconds
	"""
	try:
	cmd = [
	"ffmpeg",
	"-loop", "1",
	"-i", str(input_path),
	"-t", str(duration),
	"-c:v", "libx264",
	"-pix_fmt", "yuv420p",
	"-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2",
	"-r", "30",
	"-y",
	str(output_path)
	]
	subprocess.run(cmd, check=True, capture_output=True)
	logger.debug(f"Created video from image: {output_path}")
	except subprocess.CalledProcessError as e:
	logger.error(f"Failed to convert image to video: {e.stderr.decode()}")
	raise e