Spaces:
Build error
Build error
| import gradio as gr | |
| import numpy as np | |
| from scipy.io import wavfile | |
| from scipy import signal | |
| import tempfile | |
| import subprocess | |
| from pedalboard import Pedalboard, Chorus | |
| # --- 核心处理函数 (支持立体声) --- | |
| def full_audio_processing(input_audio, vibrato_intensity): | |
| """ | |
| 一个完整的处理流程,高质量处理立体声音频:滤波 -> 颤音 -> 变调 | |
| """ | |
| if input_audio is None: | |
| return "请先上传一个WAV文件", None | |
| # --- 1. 加载并预处理音频 --- | |
| fs, data = input_audio | |
| # 转换为浮点数,保持声道结构 | |
| if data.dtype == np.int16: audio_float = data.astype(np.float32) / 32768.0 | |
| elif data.dtype == np.int32: audio_float = data.astype(np.float32) / 2147483648.0 | |
| else: audio_float = data | |
| # 检查声道数 | |
| if audio_float.ndim > 1: | |
| num_channels = audio_float.shape[1] | |
| status_prefix = f"检测到立体声音频 ({num_channels} 声道)。" | |
| else: | |
| num_channels = 1 | |
| status_prefix = "检测到单声道音频。" | |
| processed_audio = audio_float | |
| # --- 2. 步骤 A: 频率处理 (低通 + 陷波) --- | |
| # a. 低通滤波 (scipy 会自动为每个声道应用) | |
| LOWPASS_CUTOFF_HZ = 20000.0 | |
| nyquist = 0.5 * fs | |
| b, a = signal.butter(N=8, Wn=min(LOWPASS_CUTOFF_HZ, nyquist-1) / nyquist, btype='low') | |
| processed_audio = signal.filtfilt(b, a, processed_audio, axis=0) | |
| # b. 毛刺检测 (临时混合为单声道进行分析) | |
| SPIKE_FREQ_HZ = 16000.0 | |
| SPIKE_THRESHOLD_DB = -25.0 | |
| if num_channels > 1: | |
| mono_for_analysis = processed_audio.mean(axis=1) | |
| else: | |
| mono_for_analysis = processed_audio | |
| n_fft = 4096 | |
| freqs = np.fft.fftfreq(n_fft, 1/fs) | |
| fft_vals = np.fft.fft(mono_for_analysis[:n_fft]) | |
| target_freq_index = np.argmin(np.abs(freqs - SPIKE_FREQ_HZ)) | |
| magnitude_db = 20 * np.log10(np.abs(fft_vals[target_freq_index]) / n_fft) | |
| # c. 如果检测到毛刺,将陷波滤波器应用到原始的立体声/单声道信号上 | |
| if magnitude_db > SPIKE_THRESHOLD_DB: | |
| FILTER_GAIN_DB = -3.0 | |
| FILTER_Q = 20.0 | |
| w0 = 2 * np.pi * SPIKE_FREQ_HZ / fs | |
| A = 10**(FILTER_GAIN_DB / 40.0) | |
| alpha = np.sin(w0) / (2.0 * FILTER_Q) | |
| b0, b1, b2 = 1 + alpha * A, -2 * np.cos(w0), 1 - alpha * A | |
| a0, a1, a2 = 1 + alpha / A, -2 * np.cos(w0), 1 - alpha / A | |
| b_peak, a_peak = np.array([b0, b1, b2]) / a0, np.array([a0, a1, a2]) / a0 | |
| processed_audio = signal.filtfilt(b_peak, a_peak, processed_audio, axis=0) | |
| # --- 3. 步骤 B: 添加颤音效果 (Vibrato) --- | |
| if vibrato_intensity > 0: | |
| vibrato_depth = vibrato_intensity / 10.0 | |
| board = Pedalboard([Chorus(rate_hz=5.0, depth=vibrato_depth, feedback=0.0, mix=1.0)]) | |
| # pedalboard 需要 (声道数, 样本数) 格式 | |
| # 我们需要转置数组,处理完再转置回来 | |
| if num_channels > 1: | |
| audio_transposed = processed_audio.T | |
| effected_transposed = board(audio_transposed, fs) | |
| processed_audio = effected_transposed.T | |
| else: # 单声道情况 | |
| # 保持之前的 reshape 逻辑 | |
| effected_mono = board(processed_audio.reshape(1, -1), fs) | |
| processed_audio = effected_mono.flatten() | |
| # --- 4. 步骤 C: 变调 (SoundStretch) 并保存 --- | |
| # 将处理后的浮点数音频转换回16位整数,保持立体声结构 | |
| audio_for_stretch = np.int16(np.clip(processed_audio * 32767.0, -32768, 32767)) | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_intermediate: | |
| wavfile.write(tmpfile_intermediate.name, fs, audio_for_stretch) | |
| intermediate_filepath = tmpfile_intermediate.name | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_final: | |
| final_output_path = tmpfile_final.name | |
| PITCH_SEMITONES = 0.25 | |
| cmd = ["soundstretch", intermediate_filepath, final_output_path, f"-pitch={PITCH_SEMITONES}"] | |
| try: | |
| subprocess.run(cmd, check=True, capture_output=True, text=True) | |
| except FileNotFoundError: | |
| return "错误: 'soundstretch' 命令未找到。", None | |
| except subprocess.CalledProcessError as e: | |
| return f"soundstretch 执行出错: {e.stderr}", None | |
| return f"{status_prefix} 处理完成。", final_output_path | |
| # --- Gradio 界面 (无需改动) --- | |
| with gr.Blocks() as app: | |
| gr.Markdown("# suno去水印") | |
| with gr.Column(): | |
| input_audio = gr.Audio(type="numpy", label="上传WAV音频") | |
| vibrato_slider = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label="颤音强度 (音分抖动)", info="为声音添加自然的音高抖动效果 (0为关闭)") | |
| process_button = gr.Button("开始处理", variant="primary") | |
| status_text = gr.Textbox(label="状态", interactive=False) | |
| output_file = gr.File(label="下载处理后的音频") | |
| process_button.click( | |
| fn=full_audio_processing, | |
| inputs=[input_audio, vibrato_slider], | |
| outputs=[status_text, output_file] | |
| ) | |
| # --- 启动应用 --- | |
| if __name__ == "__main__": | |
| app.launch(server_name="0.0.0.0", server_port=7866, share=True) |