Vanta / vanta /utils /audio.py
Komalpreet Kaur
Initial deploy: Vanta TSE backend (FastAPI + trained checkpoint)
32de4f6 unverified
"""Audio I/O and level/SNR helpers."""
from __future__ import annotations
from pathlib import Path
import numpy as np
import soundfile as sf
EPS = 1e-8
def load_audio(path: str | Path, sr: int) -> np.ndarray:
"""Load a mono waveform at `sr` Hz. Resamples if needed."""
wav, file_sr = sf.read(str(path), dtype="float32", always_2d=False)
if wav.ndim > 1:
wav = wav.mean(axis=1)
if file_sr != sr:
wav = _resample(wav, file_sr, sr)
return wav.astype(np.float32, copy=False)
def save_audio(path: str | Path, wav: np.ndarray, sr: int) -> None:
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
sf.write(str(path), wav, sr, subtype="PCM_16")
def _resample(wav: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray:
import soxr
return soxr.resample(wav, src_sr, dst_sr, quality="HQ")
def rms(wav: np.ndarray) -> float:
return float(np.sqrt(np.mean(wav**2) + EPS))
def fix_length(wav: np.ndarray, n_samples: int) -> np.ndarray:
"""Pad with zeros or right-trim so the length equals n_samples."""
if len(wav) >= n_samples:
return wav[:n_samples]
out = np.zeros(n_samples, dtype=wav.dtype)
out[: len(wav)] = wav
return out
def random_crop(
wav: np.ndarray, n_samples: int, rng: np.random.Generator
) -> np.ndarray:
"""Crop a random n_samples window, or pad if too short."""
if len(wav) <= n_samples:
return fix_length(wav, n_samples)
start = int(rng.integers(0, len(wav) - n_samples + 1))
return wav[start : start + n_samples]
def scale_to_snr(
signal: np.ndarray, noise: np.ndarray, snr_db: float
) -> np.ndarray:
"""Return `noise` scaled so that signal-vs-noise SNR equals `snr_db`.
SNR = 10 * log10(P_signal / P_noise), so:
P_noise_target = P_signal / 10^(SNR/10)
scale = sqrt(P_noise_target / P_noise)
"""
p_sig = np.mean(signal**2) + EPS
p_noise = np.mean(noise**2) + EPS
target_p_noise = p_sig / (10 ** (snr_db / 10))
scale = np.sqrt(target_p_noise / p_noise)
return noise * scale
def peak_normalize(wav: np.ndarray, peak: float = 0.95) -> np.ndarray:
"""Scale so that max|wav| == peak. Prevents clipping on save."""
m = np.max(np.abs(wav))
if m < EPS:
return wav
return wav * (peak / m)