File size: 5,230 Bytes
c687a5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
import numpy as np
from scipy.io import wavfile
from scipy import signal
import tempfile
import subprocess
from pedalboard import Pedalboard, Chorus

# --- 核心处理函数 (支持立体声) ---
def full_audio_processing(input_audio, vibrato_intensity):
    """
    一个完整的处理流程,高质量处理立体声音频:滤波 -> 颤音 -> 变调
    """
    if input_audio is None:
        return "请先上传一个WAV文件", None

    # --- 1. 加载并预处理音频 ---
    fs, data = input_audio
    
    # 转换为浮点数,保持声道结构
    if data.dtype == np.int16: audio_float = data.astype(np.float32) / 32768.0
    elif data.dtype == np.int32: audio_float = data.astype(np.float32) / 2147483648.0
    else: audio_float = data

    # 检查声道数
    if audio_float.ndim > 1:
        num_channels = audio_float.shape[1]
        status_prefix = f"检测到立体声音频 ({num_channels} 声道)。"
    else:
        num_channels = 1
        status_prefix = "检测到单声道音频。"

    processed_audio = audio_float

    # --- 2. 步骤 A: 频率处理 (低通 + 陷波) ---
    # a. 低通滤波 (scipy 会自动为每个声道应用)
    LOWPASS_CUTOFF_HZ = 20000.0
    nyquist = 0.5 * fs
    b, a = signal.butter(N=8, Wn=min(LOWPASS_CUTOFF_HZ, nyquist-1) / nyquist, btype='low')
    processed_audio = signal.filtfilt(b, a, processed_audio, axis=0)

    # b. 毛刺检测 (临时混合为单声道进行分析)
    SPIKE_FREQ_HZ = 16000.0
    SPIKE_THRESHOLD_DB = -25.0
    if num_channels > 1:
        mono_for_analysis = processed_audio.mean(axis=1)
    else:
        mono_for_analysis = processed_audio
    
    n_fft = 4096
    freqs = np.fft.fftfreq(n_fft, 1/fs)
    fft_vals = np.fft.fft(mono_for_analysis[:n_fft])
    target_freq_index = np.argmin(np.abs(freqs - SPIKE_FREQ_HZ))
    magnitude_db = 20 * np.log10(np.abs(fft_vals[target_freq_index]) / n_fft)
    
    # c. 如果检测到毛刺,将陷波滤波器应用到原始的立体声/单声道信号上
    if magnitude_db > SPIKE_THRESHOLD_DB:
        FILTER_GAIN_DB = -3.0
        FILTER_Q = 20.0
        w0 = 2 * np.pi * SPIKE_FREQ_HZ / fs
        A = 10**(FILTER_GAIN_DB / 40.0)
        alpha = np.sin(w0) / (2.0 * FILTER_Q)
        b0, b1, b2 = 1 + alpha * A, -2 * np.cos(w0), 1 - alpha * A
        a0, a1, a2 = 1 + alpha / A, -2 * np.cos(w0), 1 - alpha / A
        b_peak, a_peak = np.array([b0, b1, b2]) / a0, np.array([a0, a1, a2]) / a0
        processed_audio = signal.filtfilt(b_peak, a_peak, processed_audio, axis=0)

    # --- 3. 步骤 B: 添加颤音效果 (Vibrato) ---
    if vibrato_intensity > 0:
        vibrato_depth = vibrato_intensity / 10.0
        board = Pedalboard([Chorus(rate_hz=5.0, depth=vibrato_depth, feedback=0.0, mix=1.0)])
        
        # pedalboard 需要 (声道数, 样本数) 格式
        # 我们需要转置数组,处理完再转置回来
        if num_channels > 1:
            audio_transposed = processed_audio.T
            effected_transposed = board(audio_transposed, fs)
            processed_audio = effected_transposed.T
        else: # 单声道情况
            # 保持之前的 reshape 逻辑
            effected_mono = board(processed_audio.reshape(1, -1), fs)
            processed_audio = effected_mono.flatten()

    # --- 4. 步骤 C: 变调 (SoundStretch) 并保存 ---
    # 将处理后的浮点数音频转换回16位整数,保持立体声结构
    audio_for_stretch = np.int16(np.clip(processed_audio * 32767.0, -32768, 32767))
    
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_intermediate:
        wavfile.write(tmpfile_intermediate.name, fs, audio_for_stretch)
        intermediate_filepath = tmpfile_intermediate.name

    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_final:
        final_output_path = tmpfile_final.name
    
    PITCH_SEMITONES = 0.25
    cmd = ["soundstretch", intermediate_filepath, final_output_path, f"-pitch={PITCH_SEMITONES}"]
    try:
        subprocess.run(cmd, check=True, capture_output=True, text=True)
    except FileNotFoundError:
        return "错误: 'soundstretch' 命令未找到。", None
    except subprocess.CalledProcessError as e:
        return f"soundstretch 执行出错: {e.stderr}", None

    return f"{status_prefix} 处理完成。", final_output_path

# --- Gradio 界面 (无需改动) ---
with gr.Blocks() as app:
    gr.Markdown("# suno去水印")
    
    with gr.Column():
        input_audio = gr.Audio(type="numpy", label="上传WAV音频")
        vibrato_slider = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label="颤音强度 (音分抖动)", info="为声音添加自然的音高抖动效果 (0为关闭)")
        process_button = gr.Button("开始处理", variant="primary")
        status_text = gr.Textbox(label="状态", interactive=False)
        output_file = gr.File(label="下载处理后的音频")

    process_button.click(
        fn=full_audio_processing,
        inputs=[input_audio, vibrato_slider],
        outputs=[status_text, output_file]
    )

# --- 启动应用 ---
if __name__ == "__main__":
    app.launch(server_name="0.0.0.0", server_port=7866, share=True)