| from __future__ import annotations |
|
|
| import subprocess |
| import tempfile |
| from pathlib import Path |
|
|
| import soundfile as sf |
|
|
|
|
| def _convert_to_wav(input_path: str, output_path: str, sample_rate: int) -> None: |
| command = [ |
| "ffmpeg", |
| "-y", |
| "-i", |
| input_path, |
| "-vn", |
| "-ac", |
| "1", |
| "-ar", |
| str(sample_rate), |
| "-f", |
| "wav", |
| output_path, |
| ] |
| subprocess.run( |
| command, |
| check=True, |
| stdout=subprocess.DEVNULL, |
| stderr=subprocess.PIPE, |
| text=True, |
| ) |
|
|
|
|
| def load_audio(path: str, sample_rate: int): |
| suffix = Path(path).suffix or ".wav" |
| with tempfile.TemporaryDirectory(prefix="moss-audio-load-") as temp_dir: |
| wav_path = str(Path(temp_dir) / f"normalized{suffix}.wav") |
| _convert_to_wav(path, wav_path, sample_rate) |
| waveform, _ = sf.read(wav_path, dtype="float32", always_2d=False) |
|
|
| if getattr(waveform, "ndim", 1) > 1: |
| waveform = waveform.mean(axis=1) |
| return waveform |
|
|