Spaces:

Kremon96
/

Demusics

Sleeping

File size: 3,944 Bytes

a4c2add

# audio_processor.py
import librosa
import numpy as np
import torch
import torchaudio
import soundfile as sf
from pathlib import Path
import warnings
from config import SAMPLE_RATE, SUPPORTED_FORMATS, MAX_FILE_SIZE_MB

warnings.filterwarnings("ignore")

class AudioProcessor:
    def __init__(self):
        self.sample_rate = SAMPLE_RATE
        
    def validate_audio_file(self, file_path):
        """Validate audio file before processing"""
        file_path = Path(file_path)
        
        if not file_path.exists():
            raise FileNotFoundError(f"Audio file not found: {file_path}")
            
        if file_path.suffix.lower() not in SUPPORTED_FORMATS:
            raise ValueError(f"Unsupported format: {file_path.suffix}. Supported: {SUPPORTED_FORMATS}")
            
        file_size_mb = file_path.stat().st_size / (1024 * 1024)
        if file_size_mb > MAX_FILE_SIZE_MB:
            raise ValueError(f"File too large: {file_size_mb:.1f}MB > {MAX_FILE_SIZE_MB}MB limit")
            
        return True
    
    def load_audio(self, file_path, target_sr=None):
        """Load audio file with proper error handling and resampling"""
        if target_sr is None:
            target_sr = self.sample_rate
            
        try:
            self.validate_audio_file(file_path)
            
            # Load audio with librosa (handles most formats)
            waveform, sr = librosa.load(file_path, sr=target_sr, mono=False)
            
            # Convert mono to stereo if needed
            if waveform.ndim == 1:
                waveform = np.stack([waveform, waveform])
            elif waveform.ndim == 2 and waveform.shape[0] > 2:
                # Take first two channels if multi-channel
                waveform = waveform[:2, :]
            
            # Ensure correct shape: (channels, samples)
            if waveform.shape[0] > waveform.shape[1]:
                waveform = waveform.T
                
            # Convert to tensor and normalize
            waveform = torch.FloatTensor(waveform)
            waveform = waveform / (waveform.abs().max() + 1e-8)  # Normalize to [-1, 1]
            
            return waveform, target_sr
            
        except Exception as e:
            raise Exception(f"Error loading audio {file_path}: {str(e)}")
    
    def save_audio(self, waveform, file_path, sample_rate=None):
        """Save waveform to file with proper formatting"""
        if sample_rate is None:
            sample_rate = self.sample_rate
            
        try:
            # Convert tensor to numpy
            if isinstance(waveform, torch.Tensor):
                waveform = waveform.detach().cpu().numpy()
            
            # Ensure proper shape: (samples, channels)
            if waveform.ndim == 2 and waveform.shape[0] == 2:  # (channels, samples)
                waveform = waveform.T  # Convert to (samples, channels)
            
            # Normalize before saving
            waveform = waveform / (np.max(np.abs(waveform)) + 1e-8)
            waveform = np.clip(waveform * 0.95, -1, 1)  # Prevent clipping
            
            # Ensure directory exists
            Path(file_path).parent.mkdir(parents=True, exist_ok=True)
            
            sf.write(file_path, waveform, sample_rate, subtype='PCM_16')
            return True
            
        except Exception as e:
            raise Exception(f"Error saving audio {file_path}: {str(e)}")
    
    def get_audio_info(self, file_path):
        """Get audio file information"""
        try:
            self.validate_audio_file(file_path)
            info = sf.info(file_path)
            return {
                'duration': info.duration,
                'sample_rate': info.samplerate,
                'channels': info.channels,
                'format': info.format
            }
        except Exception as e:
            raise Exception(f"Error getting audio info: {str(e)}")