File size: 3,437 Bytes
b6f9c90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# -*- coding: utf-8 -*-
"""

音频处理模块 - 加载、保存和处理音频文件

"""
import numpy as np
import librosa
import soundfile as sf
from typing import Tuple, Optional


def load_audio(path: str, sr: int = 16000) -> np.ndarray:
    """

    加载音频文件并重采样



    Args:

        path: 音频文件路径

        sr: 目标采样率 (默认 16000)



    Returns:

        np.ndarray: 音频数据 (float32, 单声道)

    """
    audio, orig_sr = librosa.load(path, sr=None, mono=True)

    if orig_sr != sr:
        audio = librosa.resample(audio, orig_sr=orig_sr, target_sr=sr)

    return audio.astype(np.float32)


def save_audio(path: str, audio: np.ndarray, sr: int = 48000):
    """

    保存音频到文件



    Args:

        path: 输出文件路径

        audio: 音频数据

        sr: 采样率 (默认 48000)

    """
    # 确保音频在 [-1, 1] 范围内
    audio = np.clip(audio, -1.0, 1.0)
    sf.write(path, audio, sr)


def soft_clip(
    audio: np.ndarray,
    threshold: float = 0.9,
    ceiling: float = 0.99,
) -> np.ndarray:
    """
    使用平滑软削波抑制峰值,尽量保留主体响度。

    Args:
        audio: 输入音频
        threshold: 开始压缩的阈值
        ceiling: 软削波上限

    Returns:
        np.ndarray: 处理后的音频
    """
    audio = np.asarray(audio, dtype=np.float32)

    if threshold <= 0:
        raise ValueError("threshold 必须大于 0")
    if ceiling <= threshold:
        raise ValueError("ceiling 必须大于 threshold")

    result = audio.copy()
    abs_audio = np.abs(result)
    mask = abs_audio > threshold
    if not np.any(mask):
        return result

    overshoot = (abs_audio[mask] - threshold) / (ceiling - threshold + 1e-8)
    compressed = threshold + (ceiling - threshold) * np.tanh(overshoot)
    result[mask] = np.sign(result[mask]) * compressed
    return result.astype(np.float32, copy=False)


def soft_clip_array(
    audio: np.ndarray,
    threshold: float = 0.9,
    ceiling: float = 0.99,
) -> np.ndarray:
    """软削波数组版本,支持单声道/多声道。"""
    return soft_clip(audio, threshold=threshold, ceiling=ceiling)


def get_audio_info(path: str) -> dict:
    """

    获取音频文件信息



    Args:

        path: 音频文件路径



    Returns:

        dict: 音频信息

    """
    info = sf.info(path)
    return {
        "duration": info.duration,
        "sample_rate": info.samplerate,
        "channels": info.channels,
        "format": info.format
    }


def normalize_audio(audio: np.ndarray, target_db: float = -20.0) -> np.ndarray:
    """

    音频响度归一化



    Args:

        audio: 输入音频

        target_db: 目标响度 (dB)



    Returns:

        np.ndarray: 归一化后的音频

    """
    rms = np.sqrt(np.mean(audio ** 2))
    if rms > 0:
        target_rms = 10 ** (target_db / 20)
        audio = audio * (target_rms / rms)
    return np.clip(audio, -1.0, 1.0)


def trim_silence(audio: np.ndarray, sr: int = 16000,

                 top_db: int = 30) -> np.ndarray:
    """

    去除音频首尾静音



    Args:

        audio: 输入音频

        sr: 采样率

        top_db: 静音阈值 (dB)



    Returns:

        np.ndarray: 去除静音后的音频

    """
    trimmed, _ = librosa.effects.trim(audio, top_db=top_db)
    return trimmed