Spaces:
Running
Running
| """Audio I/O and level/SNR helpers.""" | |
| from __future__ import annotations | |
| from pathlib import Path | |
| import numpy as np | |
| import soundfile as sf | |
| EPS = 1e-8 | |
| def load_audio(path: str | Path, sr: int) -> np.ndarray: | |
| """Load a mono waveform at `sr` Hz. Resamples if needed.""" | |
| wav, file_sr = sf.read(str(path), dtype="float32", always_2d=False) | |
| if wav.ndim > 1: | |
| wav = wav.mean(axis=1) | |
| if file_sr != sr: | |
| wav = _resample(wav, file_sr, sr) | |
| return wav.astype(np.float32, copy=False) | |
| def save_audio(path: str | Path, wav: np.ndarray, sr: int) -> None: | |
| path = Path(path) | |
| path.parent.mkdir(parents=True, exist_ok=True) | |
| sf.write(str(path), wav, sr, subtype="PCM_16") | |
| def _resample(wav: np.ndarray, src_sr: int, dst_sr: int) -> np.ndarray: | |
| import soxr | |
| return soxr.resample(wav, src_sr, dst_sr, quality="HQ") | |
| def rms(wav: np.ndarray) -> float: | |
| return float(np.sqrt(np.mean(wav**2) + EPS)) | |
| def fix_length(wav: np.ndarray, n_samples: int) -> np.ndarray: | |
| """Pad with zeros or right-trim so the length equals n_samples.""" | |
| if len(wav) >= n_samples: | |
| return wav[:n_samples] | |
| out = np.zeros(n_samples, dtype=wav.dtype) | |
| out[: len(wav)] = wav | |
| return out | |
| def random_crop( | |
| wav: np.ndarray, n_samples: int, rng: np.random.Generator | |
| ) -> np.ndarray: | |
| """Crop a random n_samples window, or pad if too short.""" | |
| if len(wav) <= n_samples: | |
| return fix_length(wav, n_samples) | |
| start = int(rng.integers(0, len(wav) - n_samples + 1)) | |
| return wav[start : start + n_samples] | |
| def scale_to_snr( | |
| signal: np.ndarray, noise: np.ndarray, snr_db: float | |
| ) -> np.ndarray: | |
| """Return `noise` scaled so that signal-vs-noise SNR equals `snr_db`. | |
| SNR = 10 * log10(P_signal / P_noise), so: | |
| P_noise_target = P_signal / 10^(SNR/10) | |
| scale = sqrt(P_noise_target / P_noise) | |
| """ | |
| p_sig = np.mean(signal**2) + EPS | |
| p_noise = np.mean(noise**2) + EPS | |
| target_p_noise = p_sig / (10 ** (snr_db / 10)) | |
| scale = np.sqrt(target_p_noise / p_noise) | |
| return noise * scale | |
| def peak_normalize(wav: np.ndarray, peak: float = 0.95) -> np.ndarray: | |
| """Scale so that max|wav| == peak. Prevents clipping on save.""" | |
| m = np.max(np.abs(wav)) | |
| if m < EPS: | |
| return wav | |
| return wav * (peak / m) | |