"""Audio utility functions for format conversion and processing.""" import io import numpy as np import soundfile as sf def to_mono(audio: np.ndarray) -> np.ndarray: """ Convert stereo audio to mono by averaging channels. Args: audio: Audio array, shape (samples,) for mono or (samples, channels) for stereo Returns: Mono audio array, shape (samples,) """ if audio.ndim == 2: return np.mean(audio, axis=1) return audio def to_float32(audio: np.ndarray) -> np.ndarray: """ Ensure audio is float32 in [-1, 1] range. Args: audio: Audio array in any numeric format Returns: Audio array as float32 normalized to [-1, 1] """ audio = audio.astype(np.float32) # Check if already normalized max_val = np.max(np.abs(audio)) if max_val > 1.0: audio = audio / max_val return audio def normalize(audio: np.ndarray, peak: float = 0.95) -> np.ndarray: """ Normalize audio so peak amplitude equals given value. Args: audio: Audio array peak: Target peak amplitude (default 0.95 to avoid clipping) Returns: Normalized audio array """ max_val = np.max(np.abs(audio)) if max_val > 0: audio = audio / max_val * peak return audio def pad_or_trim(audio: np.ndarray, target_length: int) -> np.ndarray: """ Pad with zeros or trim audio to target length. Args: audio: Audio array target_length: Desired length in samples Returns: Audio array with exactly target_length samples """ current_length = len(audio) if current_length == target_length: return audio elif current_length > target_length: return audio[:target_length] else: # Pad with zeros padding = np.zeros(target_length - current_length, dtype=audio.dtype) return np.concatenate([audio, padding]) def encode_wav_to_bytes(audio: np.ndarray, sr: int) -> bytes: """ Encode numpy array to WAV bytes. Args: audio: Audio array sr: Sample rate Returns: WAV file as bytes """ buf = io.BytesIO() sf.write(buf, audio, sr, format='WAV') buf.seek(0) return buf.read() def encode_flac_to_bytes(audio: np.ndarray, sr: int) -> bytes: """ Encode numpy array to FLAC bytes (lossless, ~50% smaller than WAV). Args: audio: Audio array sr: Sample rate Returns: FLAC file as bytes """ buf = io.BytesIO() # Convert float32 to int16 for FLAC (better compression) if audio.dtype == np.float32 or audio.dtype == np.float64: audio_int = (audio * 32767).astype(np.int16) else: audio_int = audio sf.write(buf, audio_int, sr, format='FLAC') buf.seek(0) return buf.read()