desuno / app.py
revol's picture
Update app.py
35088c9 verified
import gradio as gr
import numpy as np
from scipy.io import wavfile
from scipy import signal
import tempfile
import subprocess
from pedalboard import Pedalboard, Chorus
# --- 核心处理函数 (支持立体声) ---
def full_audio_processing(input_audio, vibrato_intensity):
"""
一个完整的处理流程,高质量处理立体声音频:滤波 -> 颤音 -> 变调
"""
if input_audio is None:
return "请先上传一个WAV文件", None
# --- 1. 加载并预处理音频 ---
fs, data = input_audio
# 转换为浮点数,保持声道结构
if data.dtype == np.int16: audio_float = data.astype(np.float32) / 32768.0
elif data.dtype == np.int32: audio_float = data.astype(np.float32) / 2147483648.0
else: audio_float = data
# 检查声道数
if audio_float.ndim > 1:
num_channels = audio_float.shape[1]
status_prefix = f"检测到立体声音频 ({num_channels} 声道)。"
else:
num_channels = 1
status_prefix = "检测到单声道音频。"
processed_audio = audio_float
# --- 2. 步骤 A: 频率处理 (低通 + 陷波) ---
# a. 低通滤波 (scipy 会自动为每个声道应用)
LOWPASS_CUTOFF_HZ = 20000.0
nyquist = 0.5 * fs
b, a = signal.butter(N=8, Wn=min(LOWPASS_CUTOFF_HZ, nyquist-1) / nyquist, btype='low')
processed_audio = signal.filtfilt(b, a, processed_audio, axis=0)
# b. 毛刺检测 (临时混合为单声道进行分析)
SPIKE_FREQ_HZ = 16000.0
SPIKE_THRESHOLD_DB = -25.0
if num_channels > 1:
mono_for_analysis = processed_audio.mean(axis=1)
else:
mono_for_analysis = processed_audio
n_fft = 4096
freqs = np.fft.fftfreq(n_fft, 1/fs)
fft_vals = np.fft.fft(mono_for_analysis[:n_fft])
target_freq_index = np.argmin(np.abs(freqs - SPIKE_FREQ_HZ))
magnitude_db = 20 * np.log10(np.abs(fft_vals[target_freq_index]) / n_fft)
# c. 如果检测到毛刺,将陷波滤波器应用到原始的立体声/单声道信号上
if magnitude_db > SPIKE_THRESHOLD_DB:
FILTER_GAIN_DB = -3.0
FILTER_Q = 20.0
w0 = 2 * np.pi * SPIKE_FREQ_HZ / fs
A = 10**(FILTER_GAIN_DB / 40.0)
alpha = np.sin(w0) / (2.0 * FILTER_Q)
b0, b1, b2 = 1 + alpha * A, -2 * np.cos(w0), 1 - alpha * A
a0, a1, a2 = 1 + alpha / A, -2 * np.cos(w0), 1 - alpha / A
b_peak, a_peak = np.array([b0, b1, b2]) / a0, np.array([a0, a1, a2]) / a0
processed_audio = signal.filtfilt(b_peak, a_peak, processed_audio, axis=0)
# --- 3. 步骤 B: 添加颤音效果 (Vibrato) ---
if vibrato_intensity > 0:
vibrato_depth = vibrato_intensity / 10.0
board = Pedalboard([Chorus(rate_hz=5.0, depth=vibrato_depth, feedback=0.0, mix=1.0)])
# pedalboard 需要 (声道数, 样本数) 格式
# 我们需要转置数组,处理完再转置回来
if num_channels > 1:
audio_transposed = processed_audio.T
effected_transposed = board(audio_transposed, fs)
processed_audio = effected_transposed.T
else: # 单声道情况
# 保持之前的 reshape 逻辑
effected_mono = board(processed_audio.reshape(1, -1), fs)
processed_audio = effected_mono.flatten()
# --- 4. 步骤 C: 变调 (SoundStretch) 并保存 ---
# 将处理后的浮点数音频转换回16位整数,保持立体声结构
audio_for_stretch = np.int16(np.clip(processed_audio * 32767.0, -32768, 32767))
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_intermediate:
wavfile.write(tmpfile_intermediate.name, fs, audio_for_stretch)
intermediate_filepath = tmpfile_intermediate.name
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmpfile_final:
final_output_path = tmpfile_final.name
PITCH_SEMITONES = 0.25
cmd = ["soundstretch", intermediate_filepath, final_output_path, f"-pitch={PITCH_SEMITONES}"]
try:
subprocess.run(cmd, check=True, capture_output=True, text=True)
except FileNotFoundError:
return "错误: 'soundstretch' 命令未找到。", None
except subprocess.CalledProcessError as e:
return f"soundstretch 执行出错: {e.stderr}", None
return f"{status_prefix} 处理完成。", final_output_path
# --- Gradio 界面 (无需改动) ---
with gr.Blocks() as app:
gr.Markdown("# suno去水印")
with gr.Column():
input_audio = gr.Audio(type="numpy", label="上传WAV音频")
vibrato_slider = gr.Slider(minimum=0.0, maximum=10.0, value=0.0, step=0.1, label="颤音强度 (音分抖动)", info="为声音添加自然的音高抖动效果 (0为关闭)")
process_button = gr.Button("开始处理", variant="primary")
status_text = gr.Textbox(label="状态", interactive=False)
output_file = gr.File(label="下载处理后的音频")
process_button.click(
fn=full_audio_processing,
inputs=[input_audio, vibrato_slider],
outputs=[status_text, output_file]
)
# --- 启动应用 ---
if __name__ == "__main__":
app.launch(server_name="0.0.0.0", server_port=7866, share=True)