Spaces:

vasugo05
/

AudioDubbAi

Sleeping

File size: 6,109 Bytes

fad5c32

"""

Audio utilities for processing and file I/O

Handles loading, saving, and processing audio files

"""

import logging
import numpy as np
import soundfile as sf
import librosa
from typing import Tuple, Optional
import tempfile
import os

logger = logging.getLogger(__name__)


class AudioProcessor:
    """Handles audio file operations and processing"""
    
    SUPPORTED_FORMATS = ['wav', 'mp3', 'm4a', 'flac', 'ogg']
    DEFAULT_SAMPLE_RATE = 24000  # For XTTS-v2
    
    @staticmethod
    def load_audio(

        file_path: str,

        sr: Optional[int] = None,

        mono: bool = True

    ) -> Tuple[np.ndarray, int]:
        """

        Load audio file

        

        Args:

            file_path: Path to audio file

            sr: Target sample rate (None = original)

            mono: Convert to mono if True

        

        Returns:

            Tuple of (audio_waveform, sample_rate)

        """
        logger.info(f"Loading audio from: {file_path}")
        
        try:
            # Load with librosa for flexibility
            audio, sample_rate = librosa.load(
                file_path,
                sr=sr,
                mono=mono
            )
            logger.info(f"Audio loaded. Shape: {audio.shape}, SR: {sample_rate}")
            return audio, sample_rate
            
        except Exception as e:
            logger.error(f"Error loading audio: {str(e)}")
            raise
    
    @staticmethod
    def save_audio(

        audio_waveform: np.ndarray,

        sample_rate: int,

        output_path: str,

        subtype: str = 'PCM_16'

    ) -> str:
        """

        Save audio to WAV file

        

        Args:

            audio_waveform: Audio waveform array

            sample_rate: Sample rate

            output_path: Output file path

            subtype: Audio subtype (PCM_16, PCM_24, PCM_32, FLOAT)

        

        Returns:

            Path to saved file

        """
        logger.info(f"Saving audio to: {output_path}")
        
        try:
            # Ensure output directory exists
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            
            # Save audio
            sf.write(output_path, audio_waveform, sample_rate, subtype=subtype)
            
            logger.info(f"Audio saved successfully. Size: {os.path.getsize(output_path)} bytes")
            return output_path
            
        except Exception as e:
            logger.error(f"Error saving audio: {str(e)}")
            raise
    
    @staticmethod
    def resample_audio(

        audio: np.ndarray,

        orig_sr: int,

        target_sr: int

    ) -> np.ndarray:
        """

        Resample audio to target sample rate

        

        Args:

            audio: Audio waveform

            orig_sr: Original sample rate

            target_sr: Target sample rate

        

        Returns:

            Resampled audio

        """
        if orig_sr == target_sr:
            return audio
        
        logger.info(f"Resampling from {orig_sr} to {target_sr}")
        return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)
    
    @staticmethod
    def concatenate_audio(*audio_arrays) -> np.ndarray:
        """

        Concatenate multiple audio arrays

        

        Args:

            *audio_arrays: Variable number of audio arrays

        

        Returns:

            Concatenated audio array

        """
        logger.info(f"Concatenating {len(audio_arrays)} audio segments")
        return np.concatenate(audio_arrays)
    
    @staticmethod
    def get_audio_duration(audio: np.ndarray, sr: int) -> float:
        """Get duration of audio in seconds"""
        return len(audio) / sr
    
    @staticmethod
    def validate_audio_file(file_path: str) -> bool:
        """

        Validate if file is a supported audio format

        

        Args:

            file_path: Path to audio file

        

        Returns:

            True if valid, False otherwise

        """
        ext = file_path.split('.')[-1].lower()
        is_valid = ext in AudioProcessor.SUPPORTED_FORMATS
        
        if not is_valid:
            logger.warning(f"Unsupported format: {ext}")
        
        return is_valid
    
    @staticmethod
    def create_temp_audio_file(suffix: str = '.wav') -> str:
        """

        Create a temporary audio file

        

        Returns:

            Path to temporary file

        """
        temp_file = tempfile.NamedTemporaryFile(
            suffix=suffix,
            delete=False
        )
        logger.info(f"Created temporary file: {temp_file.name}")
        return temp_file.name
    
    @staticmethod
    def cleanup_temp_file(file_path: str):
        """

        Delete temporary file safely

        

        Args:

            file_path: Path to file to delete

        """
        try:
            if os.path.exists(file_path):
                os.remove(file_path)
                logger.info(f"Deleted temporary file: {file_path}")
        except Exception as e:
            logger.warning(f"Could not delete file {file_path}: {str(e)}")
    
    @staticmethod
    def normalize_audio(audio: np.ndarray, target_db: float = -20.0) -> np.ndarray:
        """

        Normalize audio to target loudness

        

        Args:

            audio: Audio waveform

            target_db: Target loudness in dB

        

        Returns:

            Normalized audio

        """
        # Calculate RMS
        rms = np.sqrt(np.mean(audio ** 2))
        
        if rms == 0:
            return audio
        
        # Convert target db to linear scale
        target_linear = 10 ** (target_db / 20.0)
        
        # Scale audio
        normalized = audio * (target_linear / rms)
        
        # Clip to prevent clipping
        normalized = np.clip(normalized, -1.0, 1.0)
        
        logger.info(f"Audio normalized to {target_db} dB")
        return normalized