ismdrobiul489's picture
feat: Major optimizations - Quiz dynamic fonts, TTS 1.2x speed, Video stream copy (10x faster), Single API call, Fact Image dynamic fonts, Text Story position fix
ee36c8e
import subprocess
import logging
from pathlib import Path
logger = logging.getLogger(__name__)
class FFmpegUtils:
"""Utilities for audio and video processing with FFmpeg"""
@staticmethod
def save_audio_as_wav(audio_data: bytes, output_path: Path):
"""
Save audio data as WAV file (normalized for Whisper)
Args:
audio_data: Raw audio bytes (WAV format from TTS)
output_path: Where to save the normalized WAV
"""
logger.debug(f"Saving normalized WAV to {output_path}")
# Write input data to temp file
temp_input = output_path.parent / f"temp_{output_path.name}"
temp_input.write_bytes(audio_data)
try:
# Normalize audio for Whisper (16kHz, mono, 16-bit PCM)
subprocess.run([
"ffmpeg",
"-i", str(temp_input),
"-ar", "16000", # 16kHz sample rate
"-ac", "1", # Mono
"-sample_fmt", "s16", # 16-bit PCM
"-y", # Overwrite
str(output_path)
], check=True, capture_output=True)
logger.debug(f"Saved normalized WAV: {output_path}")
finally:
# Clean up temp file
if temp_input.exists():
temp_input.unlink()
@staticmethod
def save_audio_as_mp3(audio_data: bytes, output_path: Path):
"""
Convert audio data to MP3
Args:
audio_data: Raw audio bytes (WAV format from TTS)
output_path: Where to save the MP3
"""
logger.debug(f"Converting to MP3: {output_path}")
# Write input data to temp file
temp_input = output_path.parent / f"temp_{output_path.name}.wav"
temp_input.write_bytes(audio_data)
try:
# Convert to MP3
subprocess.run([
"ffmpeg",
"-i", str(temp_input),
"-codec:a", "libmp3lame",
"-qscale:a", "2", # High quality
"-y", # Overwrite
str(output_path)
], check=True, capture_output=True)
logger.debug(f"Saved MP3: {output_path}")
finally:
if temp_input.exists():
temp_input.unlink()
@staticmethod
def get_video_duration(file_path: Path) -> float:
"""
Get duration of video file in seconds using ffprobe
Args:
file_path: Path to video file
Returns:
Duration in seconds
"""
try:
cmd = [
"ffprobe",
"-v", "error",
"-show_entries", "format=duration",
"-of", "default=noprint_wrappers=1:nokey=1",
str(file_path)
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
return float(result.stdout.strip())
except Exception as e:
logger.error(f"Failed to get video duration for {file_path}: {e}")
return 0.0
@staticmethod
def normalize_video(input_path: Path, output_path: Path):
"""
Normalize video to standard format (H.264, 30fps, AAC) to fix seeking/black screen issues.
Args:
input_path: Path to source video
output_path: Path to save normalized video
"""
logger.debug(f"Normalizing video: {input_path} -> {output_path}")
try:
cmd = [
"ffmpeg",
"-i", str(input_path),
"-c:v", "libx264",
"-preset", "fast",
"-r", "30",
"-c:a", "aac",
"-pix_fmt", "yuv420p",
"-y",
str(output_path)
]
subprocess.run(cmd, check=True, capture_output=True)
logger.debug(f"Normalized video saved to {output_path}")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to normalize video {input_path}: {e.stderr.decode()}")
raise e
except Exception as e:
logger.error(f"Error normalizing video {input_path}: {e}")
raise e
@staticmethod
def cut_video(input_path: Path, output_path: Path, start_time: float, duration: float):
"""
Cut a segment from a video file using FFmpeg.
Uses stream copy for 10x faster cutting (no re-encoding).
Audio is removed since TTS is used separately.
Args:
input_path: Source video
output_path: Destination for the segment
start_time: Start time in seconds
duration: Duration of the segment in seconds
"""
try:
cmd = [
"ffmpeg",
"-ss", str(start_time), # Seek to start (before -i for fast seeking)
"-i", str(input_path),
"-t", str(duration),
"-c:v", "copy", # Stream copy - no re-encode (10x faster!)
"-an", # Remove audio (TTS is used)
"-y",
str(output_path)
]
subprocess.run(cmd, check=True, capture_output=True)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to cut video {input_path}: {e.stderr.decode()}")
raise e
@staticmethod
def image_to_video(input_path: Path, output_path: Path, duration: float):
"""
Convert image to video of specific duration
Args:
input_path: Path to source image (jpg, png, etc.)
output_path: Path to save the output video
duration: Duration of the video in seconds
"""
try:
cmd = [
"ffmpeg",
"-loop", "1",
"-i", str(input_path),
"-t", str(duration),
"-c:v", "libx264",
"-pix_fmt", "yuv420p",
"-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2",
"-r", "30",
"-y",
str(output_path)
]
subprocess.run(cmd, check=True, capture_output=True)
logger.debug(f"Created video from image: {output_path}")
except subprocess.CalledProcessError as e:
logger.error(f"Failed to convert image to video: {e.stderr.decode()}")
raise e