File size: 1,036 Bytes
15d6fac
 
57eb872
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15d6fac
 
 
57eb872
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from __future__ import annotations

import subprocess
import tempfile
from pathlib import Path

import soundfile as sf


def _convert_to_wav(input_path: str, output_path: str, sample_rate: int) -> None:
    command = [
        "ffmpeg",
        "-y",
        "-i",
        input_path,
        "-vn",
        "-ac",
        "1",
        "-ar",
        str(sample_rate),
        "-f",
        "wav",
        output_path,
    ]
    subprocess.run(
        command,
        check=True,
        stdout=subprocess.DEVNULL,
        stderr=subprocess.PIPE,
        text=True,
    )


def load_audio(path: str, sample_rate: int):
    suffix = Path(path).suffix or ".wav"
    with tempfile.TemporaryDirectory(prefix="moss-audio-load-") as temp_dir:
        wav_path = str(Path(temp_dir) / f"normalized{suffix}.wav")
        _convert_to_wav(path, wav_path, sample_rate)
        waveform, _ = sf.read(wav_path, dtype="float32", always_2d=False)

    if getattr(waveform, "ndim", 1) > 1:
        waveform = waveform.mean(axis=1)
    return waveform