Spaces:
Running
Running
File size: 7,209 Bytes
60d4850 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 | """
Audio Handler Utilities
Handles audio file upload, validation, and preprocessing.
"""
import io
import librosa
import soundfile as sf
import numpy as np
from pathlib import Path
from typing import Tuple, BinaryIO
import logging
import tempfile
import subprocess
import shutil
from app.config import settings
logger = logging.getLogger(__name__)
class AudioHandler:
"""Handler for audio file operations."""
SUPPORTED_FORMATS = ['.wav', '.mp3', '.m4a', '.flac', '.ogg', '.webm']
@staticmethod
def validate_audio_file(file_path: str) -> bool:
"""
Validate audio file format and existence.
Args:
file_path: Path to audio file
Returns:
True if valid, False otherwise
"""
path = Path(file_path)
if not path.exists():
logger.error(f"Audio file not found: {file_path}")
return False
if path.suffix.lower() not in AudioHandler.SUPPORTED_FORMATS:
logger.error(f"Unsupported audio format: {path.suffix}")
return False
return True
@staticmethod
def convert_webm_to_wav(input_path: str) -> str:
"""
Convert WebM to WAV using FFmpeg.
Args:
input_path: Path to WebM file
Returns:
Path to converted WAV file
"""
# Check if ffmpeg is available
ffmpeg_path = shutil.which('ffmpeg')
if not ffmpeg_path:
raise RuntimeError("FFmpeg not found. Please install FFmpeg for WebM support.")
# Create temp WAV file
output_path = input_path.replace('.webm', '.wav')
if output_path == input_path:
output_path = input_path + '.wav'
logger.info(f"Converting WebM to WAV: {input_path} -> {output_path}")
try:
result = subprocess.run([
'ffmpeg', '-y', '-i', input_path,
'-acodec', 'pcm_s16le',
'-ar', str(settings.audio_sample_rate),
'-ac', '1', # Mono
output_path
], capture_output=True, text=True, timeout=30)
if result.returncode != 0:
logger.error(f"FFmpeg error: {result.stderr}")
raise RuntimeError(f"FFmpeg conversion failed: {result.stderr}")
logger.info("WebM to WAV conversion successful")
return output_path
except subprocess.TimeoutExpired:
raise RuntimeError("FFmpeg conversion timed out")
except Exception as e:
logger.error(f"Conversion failed: {e}")
raise
@staticmethod
def load_audio(
file_path: str = None,
file_bytes: bytes = None,
target_sr: int = None
) -> Tuple[np.ndarray, int]:
"""
Load audio file and convert to target sample rate.
Args:
file_path: Path to audio file
file_bytes: Audio file bytes (alternative to file_path)
target_sr: Target sample rate (default from settings)
Returns:
Tuple of (audio_array, sample_rate)
"""
if target_sr is None:
target_sr = settings.audio_sample_rate
converted_path = None
try:
if file_path:
logger.info(f"Loading audio from file: {file_path}")
# Handle WebM format by converting to WAV first
if file_path.lower().endswith('.webm'):
converted_path = AudioHandler.convert_webm_to_wav(file_path)
audio, sr = sf.read(converted_path, dtype='float32')
else:
# Use soundfile for standard formats
audio, sr = sf.read(file_path, dtype='float32')
elif file_bytes:
logger.info("Loading audio from bytes")
audio_io = io.BytesIO(file_bytes)
audio, sr = sf.read(audio_io, dtype='float32')
else:
raise ValueError("Either file_path or file_bytes must be provided")
# Convert stereo to mono if needed
if audio.ndim > 1:
audio = audio.mean(axis=1)
# Resample if needed
if sr != target_sr:
logger.info(f"Resampling from {sr}Hz to {target_sr}Hz")
audio = librosa.resample(audio, orig_sr=sr, target_sr=target_sr)
sr = target_sr
# Validate audio is not empty
if audio is None or len(audio) == 0:
raise RuntimeError("Loaded audio is empty")
duration = len(audio) / sr
logger.info(f"Audio loaded: {duration:.2f}s duration, {sr}Hz sample rate")
# Validate duration
if duration > settings.max_audio_duration_seconds:
raise ValueError(
f"Audio duration ({duration:.1f}s) exceeds maximum "
f"({settings.max_audio_duration_seconds}s)"
)
if duration < 0.5:
raise ValueError(f"Audio too short ({duration:.1f}s), minimum 0.5s required")
return audio, sr
except Exception as e:
logger.error(f"Failed to load audio: {e}")
raise
finally:
# Cleanup converted file
if converted_path and Path(converted_path).exists():
try:
Path(converted_path).unlink()
except:
pass
@staticmethod
def save_audio(audio: np.ndarray, sr: int, output_path: str):
"""
Save audio array to file.
Args:
audio: Audio array
sr: Sample rate
output_path: Output file path
"""
try:
sf.write(output_path, audio, sr)
logger.info(f"Audio saved to: {output_path}")
except Exception as e:
logger.error(f"Failed to save audio: {e}")
raise
@staticmethod
def get_audio_info(file_path: str) -> dict:
"""
Get audio file information without loading full file.
Args:
file_path: Path to audio file
Returns:
Dictionary with audio metadata
"""
try:
info = sf.info(file_path)
return {
"duration": info.duration,
"sample_rate": info.samplerate,
"channels": info.channels,
"format": info.format,
"subtype": info.subtype
}
except Exception as e:
logger.error(f"Failed to get audio info: {e}")
raise
|