File size: 2,316 Bytes
7224c63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from __future__ import annotations

from pathlib import Path

import numpy as np
import soundfile as sf

try:
    from scipy.signal import resample_poly
except Exception:  # pragma: no cover - dependency fallback for tiny runtimes
    resample_poly = None


def load_audio(path: str) -> tuple[np.ndarray, int]:
    audio, sr = sf.read(path, always_2d=True)
    return audio.astype(np.float32), int(sr)


def save_audio(path: str, audio: np.ndarray, sr: int) -> str:
    Path(path).parent.mkdir(parents=True, exist_ok=True)
    sf.write(path, normalize_audio(ensure_2d_audio(audio)), sr)
    return path


def normalize_audio(audio: np.ndarray, peak: float = 0.92) -> np.ndarray:
    audio = np.asarray(audio, dtype=np.float32)
    max_amp = float(np.max(np.abs(audio))) if audio.size else 0.0
    if max_amp > 0:
        audio = audio / max_amp * min(peak, max_amp if max_amp < peak else peak)
    return np.clip(audio, -1.0, 1.0)


def ensure_2d_audio(audio: np.ndarray) -> np.ndarray:
    audio = np.asarray(audio, dtype=np.float32)
    if audio.ndim == 1:
        return np.stack([audio, audio], axis=1)
    if audio.shape[1] == 1:
        return np.repeat(audio, 2, axis=1)
    return audio[:, :2]


def resample_if_needed(audio: np.ndarray, orig_sr: int, target_sr: int) -> np.ndarray:
    if orig_sr == target_sr:
        return ensure_2d_audio(audio)
    if resample_poly is None:
        audio = ensure_2d_audio(audio)
        old_x = np.linspace(0.0, 1.0, len(audio), endpoint=False)
        new_len = max(1, int(len(audio) * target_sr / orig_sr))
        new_x = np.linspace(0.0, 1.0, new_len, endpoint=False)
        left = np.interp(new_x, old_x, audio[:, 0])
        right = np.interp(new_x, old_x, audio[:, 1])
        return np.stack([left, right], axis=1).astype(np.float32)
    gcd = np.gcd(orig_sr, target_sr)
    return resample_poly(ensure_2d_audio(audio), target_sr // gcd, orig_sr // gcd, axis=0).astype(np.float32)


def add_silence(duration_seconds: float, sr: int) -> np.ndarray:
    return np.zeros((int(duration_seconds * sr), 2), dtype=np.float32)


def concatenate_audio(parts: list[np.ndarray], sr: int) -> np.ndarray:
    usable = [ensure_2d_audio(part) for part in parts if part is not None and len(part)]
    return np.concatenate(usable, axis=0) if usable else add_silence(0.5, sr)