| | import gradio as gr |
| | import numpy as np |
| | from scipy.io import wavfile |
| | from scipy import signal |
| | import tempfile |
| | import subprocess |
| | from pedalboard import Pedalboard, Chorus |
| |
|
| | |
| | def full_audio_processing(input_audio, vibrato_intensity): |
| | """ |
| | 一个完整的处理流程,高质量处理立体声音频:滤波 -> 颤音 -> 变调 |
| | """ |
| | if input_audio is None: |
| | return "请先上传一个WAV文件", None |
| |
|
| | |
| | fs, data = input_audio |
| | |
| | |
| | if data.dtype == np.int16: audio_float = data.astype(np.float32) / 32768.0 |
| | elif data.dtype == np.int32: audio_float = data.astype(np.float32) / 2147483648.0 |
| | else: audio_float = data |
| |
|
| | |
| | if audio_float.ndim > 1: |
| | num_channels = audio_float.shape[1] |
| | status_prefix = f"检测到立体声音频 ({num_channels} 声道)。" |
| | else: |
| | num_channels = 1 |
| | status_prefix = "检测到单声道音频。" |
| |
|
| | processed_audio = audio_float |
| |
|
| | |
| | |
| | LOWPASS_CUTOFF_HZ = 20000.0 |
| | nyquist = 0.5 * fs |
| | b, a = signal.butter(N=8, Wn=min(LOWPASS_CUTOFF_HZ, nyquist-1) / nyquist, btype='low') |
| | processed_audio = signal.filtfilt(b, a, processed_audio, axis=0) |
| |
|
| | |
| | SPIKE_FREQ_HZ = 16000.0 |
| | SPIKE_THRESHOLD_DB = -25.0 |
| | if num_channels > 1: |
| | mono_for_analysis = processed_audio.mean(axis=1) |
| | else: |
| | mono_for_analysis = processed_audio |
| | |
| | n_fft = 4096 |
| | freqs = np.fft.fftfreq(n_fft, 1/fs) |
| | fft_vals = np.fft.fft(mono_for_analysis[:n_fft]) |
| | target_freq_index = np.argmin(np.abs(freqs - SPIKE_FREQ_HZ)) |
| | magnitude_db = 20 * np.log10(np.abs(fft_vals[target_freq_index]) / n_fft) |
| | |
| | |
| | if magnitude_db > SPIKE_THRESHOLD_DB: |
| | FILTER_GAIN_DB = -3.0 |
| | FILTER_Q = 20.0 |
| | w0 = 2 * np.pi * SPIKE_FREQ_HZ / fs |
| | A = 10**(FILTER_GAIN_DB / 40.0) |
| | alpha = np.sin(w0) / (2.0 * FILTER_Q) |
| | b0, b1, b2 = 1 + alpha * A, -2 * np.cos(w0), 1 - alpha * A |
| | a0, a1, a2 = 1 + alpha / A, -2 * np.cos(w0), 1 - alpha / A |
| | b_peak, a_peak = np.array([b0, b1, b2]) / a0, np.array([a0, a1, a2]) / a0 |
| | processed_audio = signal.filtfilt(b_peak, a_peak, processed_audio, axis=0) |
| |
|
| | |
| | if vibrato_intensity > 0: |
| | vibrato_depth = vibrato_intensity / 10.0 |
| | board = Pedalboard([Chorus(rate_hz=5.0, depth=vibrato_depth, feedback=0.0, mix=1.0)]) |
| | |
| | |
| | |
| | if num_channels > 1: |
| | audio_transposed = processed_audio.T |
| | effected_transposed = board(audio_transposed, fs) |
| | processed_audio = effected_transposed.T |
| | else: |
| | |
| | effected_mono = board(processed_audio.reshape(1, -1), fs) |
| | processed_audio = effected_mono.flatten() |
| |
|
| | |
| | |
| | audio_for_stretch = np.int16(np.clip(processed_audio * 32767.0, -32768, 32767)) |
| | |
| | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_intermediate: |
| | wavfile.write(tmpfile_intermediate.name, fs, audio_for_stretch) |
| | intermediate_filepath = tmpfile_intermediate.name |
| |
|
| | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_final: |
| | final_output_path = tmpfile_final.name |
| | |
| | PITCH_SEMITONES = 0.25 |
| | cmd = ["soundstretch", intermediate_filepath, final_output_path, f"-pitch={PITCH_SEMITONES}"] |
| | try: |
| | subprocess.run(cmd, check=True, capture_output=True, text=True) |
| | except FileNotFoundError: |
| | return "错误: 'soundstretch' 命令未找到。", None |
| | except subprocess.CalledProcessError as e: |
| | return f"soundstretch 执行出错: {e.stderr}", None |
| |
|
| | return f"{status_prefix} 处理完成。", final_output_path |
| |
|
| | |
| | with gr.Blocks() as app: |
| | gr.Markdown("# suno去水印") |
| | |
| | with gr.Column(): |
| | input_audio = gr.Audio(type="numpy", label="上传WAV音频") |
| | vibrato_slider = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label="颤音强度 (音分抖动)", info="为声音添加自然的音高抖动效果 (0为关闭)") |
| | process_button = gr.Button("开始处理", variant="primary") |
| | status_text = gr.Textbox(label="状态", interactive=False) |
| | output_file = gr.File(label="下载处理后的音频") |
| |
|
| | process_button.click( |
| | fn=full_audio_processing, |
| | inputs=[input_audio, vibrato_slider], |
| | outputs=[status_text, output_file] |
| | ) |
| |
|
| | |
| | if __name__ == "__main__": |
| | app.launch(server_name="0.0.0.0", server_port=7866, share=True) |