Spaces:

robiul487
/

NCAkit

Sleeping

File size: 6,711 Bytes

import subprocess
import logging
from pathlib import Path

logger = logging.getLogger(__name__)


class FFmpegUtils:
    """Utilities for audio and video processing with FFmpeg"""
    
    @staticmethod
    def save_audio_as_wav(audio_data: bytes, output_path: Path):
        """
        Save audio data as WAV file (normalized for Whisper)
        
        Args:
            audio_data: Raw audio bytes (WAV format from TTS)
            output_path: Where to save the normalized WAV
        """
        logger.debug(f"Saving normalized WAV to {output_path}")
        
        # Write input data to temp file
        temp_input = output_path.parent / f"temp_{output_path.name}"
        temp_input.write_bytes(audio_data)
        
        try:
            # Normalize audio for Whisper (16kHz, mono, 16-bit PCM)
            subprocess.run([
                "ffmpeg",
                "-i", str(temp_input),
                "-ar", "16000",  # 16kHz sample rate
                "-ac", "1",      # Mono
                "-sample_fmt", "s16",  # 16-bit PCM
                "-y",            # Overwrite
                str(output_path)
            ], check=True, capture_output=True)
            
            logger.debug(f"Saved normalized WAV: {output_path}")
        finally:
            # Clean up temp file
            if temp_input.exists():
                temp_input.unlink()
    
    @staticmethod
    def save_audio_as_mp3(audio_data: bytes, output_path: Path):
        """
        Convert audio data to MP3
        
        Args:
            audio_data: Raw audio bytes (WAV format from TTS)
            output_path: Where to save the MP3
        """
        logger.debug(f"Converting to MP3: {output_path}")
        
        # Write input data to temp file
        temp_input = output_path.parent / f"temp_{output_path.name}.wav"
        temp_input.write_bytes(audio_data)
        
        try:
            # Convert to MP3
            subprocess.run([
                "ffmpeg",
                "-i", str(temp_input),
                "-codec:a", "libmp3lame",
                "-qscale:a", "2",  # High quality
                "-y",              # Overwrite
                str(output_path)
            ], check=True, capture_output=True)
            
            logger.debug(f"Saved MP3: {output_path}")
        finally:
            if temp_input.exists():
                temp_input.unlink()
    
    @staticmethod
    def get_video_duration(file_path: Path) -> float:
        """
        Get duration of video file in seconds using ffprobe
        
        Args:
            file_path: Path to video file
            
        Returns:
            Duration in seconds
        """
        try:
            cmd = [
                "ffprobe",
                "-v", "error",
                "-show_entries", "format=duration",
                "-of", "default=noprint_wrappers=1:nokey=1",
                str(file_path)
            ]
            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
            return float(result.stdout.strip())
        except Exception as e:
            logger.error(f"Failed to get video duration for {file_path}: {e}")
            return 0.0

    @staticmethod
    def normalize_video(input_path: Path, output_path: Path):
        """
        Normalize video to standard format (H.264, 30fps, AAC) to fix seeking/black screen issues.
        
        Args:
            input_path: Path to source video
            output_path: Path to save normalized video
        """
        logger.debug(f"Normalizing video: {input_path} -> {output_path}")
        
        try:
            cmd = [
                "ffmpeg",
                "-i", str(input_path),
                "-c:v", "libx264",
                "-preset", "fast",
                "-r", "30",
                "-c:a", "aac",
                "-pix_fmt", "yuv420p",
                "-y",
                str(output_path)
            ]
            
            subprocess.run(cmd, check=True, capture_output=True)
            logger.debug(f"Normalized video saved to {output_path}")
            
        except subprocess.CalledProcessError as e:
            logger.error(f"Failed to normalize video {input_path}: {e.stderr.decode()}")
            raise e
        except Exception as e:
            logger.error(f"Error normalizing video {input_path}: {e}")
            raise e

    @staticmethod
    def cut_video(input_path: Path, output_path: Path, start_time: float, duration: float):
        """
        Cut a segment from a video file using FFmpeg.
        Uses stream copy for 10x faster cutting (no re-encoding).
        Audio is removed since TTS is used separately.
        
        Args:
            input_path: Source video
            output_path: Destination for the segment
            start_time: Start time in seconds
            duration: Duration of the segment in seconds
        """
        try:
            cmd = [
                "ffmpeg",
                "-ss", str(start_time),   # Seek to start (before -i for fast seeking)
                "-i", str(input_path),
                "-t", str(duration),
                "-c:v", "copy",           # Stream copy - no re-encode (10x faster!)
                "-an",                    # Remove audio (TTS is used)
                "-y",
                str(output_path)
            ]
            
            subprocess.run(cmd, check=True, capture_output=True)
            
        except subprocess.CalledProcessError as e:
            logger.error(f"Failed to cut video {input_path}: {e.stderr.decode()}")
            raise e

    @staticmethod
    def image_to_video(input_path: Path, output_path: Path, duration: float):
        """
        Convert image to video of specific duration
        
        Args:
            input_path: Path to source image (jpg, png, etc.)
            output_path: Path to save the output video
            duration: Duration of the video in seconds
        """
        try:
            cmd = [
                "ffmpeg",
                "-loop", "1",
                "-i", str(input_path),
                "-t", str(duration),
                "-c:v", "libx264",
                "-pix_fmt", "yuv420p",
                "-vf", "scale=1080:1920:force_original_aspect_ratio=decrease,pad=1080:1920:(ow-iw)/2:(oh-ih)/2",
                "-r", "30",
                "-y",
                str(output_path)
            ]
            subprocess.run(cmd, check=True, capture_output=True)
            logger.debug(f"Created video from image: {output_path}")
        except subprocess.CalledProcessError as e:
            logger.error(f"Failed to convert image to video: {e.stderr.decode()}")
            raise e