import gradio as gr import numpy as np from scipy.io import wavfile from scipy import signal import tempfile import subprocess from pedalboard import Pedalboard, Chorus # --- 核心处理函数 (支持立体声) --- def full_audio_processing(input_audio, vibrato_intensity): """ 一个完整的处理流程,高质量处理立体声音频:滤波 -> 颤音 -> 变调 """ if input_audio is None: return "请先上传一个WAV文件", None # --- 1. 加载并预处理音频 --- fs, data = input_audio # 转换为浮点数,保持声道结构 if data.dtype == np.int16: audio_float = data.astype(np.float32) / 32768.0 elif data.dtype == np.int32: audio_float = data.astype(np.float32) / 2147483648.0 else: audio_float = data # 检查声道数 if audio_float.ndim > 1: num_channels = audio_float.shape[1] status_prefix = f"检测到立体声音频 ({num_channels} 声道)。" else: num_channels = 1 status_prefix = "检测到单声道音频。" processed_audio = audio_float # --- 2. 步骤 2: 频率处理 (低通 + 陷波) --- # a. 低通滤波 (scipy 会自动为每个声道应用) LOWPASS_CUTOFF_HZ = 20000.0 nyquist = 0.5 * fs b, a = signal.butter(N=8, Wn=min(LOWPASS_CUTOFF_HZ, nyquist-1) / nyquist, btype='low') processed_audio = signal.filtfilt(b, a, processed_audio, axis=0) # b. 毛刺检测 (临时混合为单声道进行分析) SPIKE_FREQ_HZ = 16000.0 SPIKE_THRESHOLD_DB = -25.0 if num_channels > 1: mono_for_analysis = processed_audio.mean(axis=1) else: mono_for_analysis = processed_audio n_fft = 4096 freqs = np.fft.fftfreq(n_fft, 1/fs) fft_vals = np.fft.fft(mono_for_analysis[:n_fft]) target_freq_index = np.argmin(np.abs(freqs - SPIKE_FREQ_HZ)) magnitude_db = 20 * np.log10(np.abs(fft_vals[target_freq_index]) / n_fft) # c. 如果检测到毛刺,将陷波滤波器应用到原始的立体声/单声道信号上 if magnitude_db > SPIKE_THRESHOLD_DB: FILTER_GAIN_DB = -3.0 FILTER_Q = 20.0 w0 = 2 * np.pi * SPIKE_FREQ_HZ / fs A = 10**(FILTER_GAIN_DB / 40.0) alpha = np.sin(w0) / (2.0 * FILTER_Q) b0, b1, b2 = 1 + alpha * A, -2 * np.cos(w0), 1 - alpha * A a0, a1, a2 = 1 + alpha / A, -2 * np.cos(w0), 1 - alpha / A b_peak, a_peak = np.array([b0, b1, b2]) / a0, np.array([a0, a1, a2]) / a0 processed_audio = signal.filtfilt(b_peak, a_peak, processed_audio, axis=0) # --- 3. 步骤 : 添加颤音效果 (Vibrato) --- if vibrato_intensity > 0: vibrato_depth = vibrato_intensity / 10.0 board = Pedalboard([Chorus(rate_hz=5.0, depth=vibrato_depth, feedback=0.0, mix=1.0)]) # pedalboard 需要 (声道数, 样本数) 格式 # 我们需要转置数组,处理完再转置回来 if num_channels > 1: audio_transposed = processed_audio.T effected_transposed = board(audio_transposed, fs) processed_audio = effected_transposed.T else: # 单声道情况 # 保持之前的 reshape 逻辑 effected_mono = board(processed_audio.reshape(1, -1), fs) processed_audio = effected_mono.flatten() # --- 4. 步骤 : 扩展空白音频 (extend) --- EXTEND_DURATION_SEC = 0.5 num_extend_samples = int(EXTEND_DURATION_SEC * fs) if num_channels > 1: silence_extension = np.zeros((num_extend_samples, num_channels), dtype=processed_audio.dtype) else: silence_extension = np.zeros((num_extend_samples,), dtype=processed_audio.dtype) processed_audio = np.concatenate((processed_audio, silence_extension), axis=0) # --- 5. 步骤 : 归一化音量 (normal) --- processed_audio = processed_audio/np.max(np.abs(processed_audio)) # pedalboard compressor 会引入轻微的增益变化,归一化处理以防止削波 # --- 6. 步骤 : 变速 (speed change) --- # 将处理后的浮点数音频转换回16位整数,保持立体声结构 audio_for_stretch = np.int16(np.clip(processed_audio * 32767.0, -32768, 32767)) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_intermediate: wavfile.write(tmpfile_intermediate.name, fs, audio_for_stretch) intermediate_filepath = tmpfile_intermediate.name with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_final: final_output_path = tmpfile_final.name # PITCH_SEMITONES = 0.5 # cmd = ["soundstretch", intermediate_filepath, final_output_path, f"-pitch={PITCH_SEMITONES}"] # try: # subprocess.run(cmd, check=True, capture_output=True, text=True) # except FileNotFoundError: # return "错误: 'soundstretch' 命令未找到。", None # except subprocess.CalledProcessError as e: # return f"soundstretch 执行出错: {e.stderr}", None return f"{status_prefix} 处理完成。", final_output_path # --- Gradio 界面 (无需改动) --- with gr.Blocks() as app: gr.Markdown("# suno去水印") with gr.Column(): input_audio = gr.Audio(type="numpy", label="上传WAV音频") vibrato_slider = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label="颤音强度 (音分抖动)", info="为声音添加自然的音高抖动效果 (0为关闭)") process_button = gr.Button("开始处理", variant="primary") status_text = gr.Textbox(label="状态", interactive=False) output_file = gr.File(label="下载处理后的音频") process_button.click( fn=full_audio_processing, inputs=[input_audio, vibrato_slider], outputs=[status_text, output_file] ) # --- 启动应用 --- if __name__ == "__main__": # app.launch(server_name="0.0.0.0", server_port=7866, share=True) app.launch()