# -*- coding: utf-8 -*- """ 混音模块 - 人声与伴奏混合 """ import numpy as np import librosa import soundfile as sf from pathlib import Path from typing import Optional from lib.audio import soft_clip_array try: from lib.logger import log except ImportError: log = None try: from pedalboard import Pedalboard, Reverb, Compressor, Gain PEDALBOARD_AVAILABLE = True except ImportError: PEDALBOARD_AVAILABLE = False def _probe_sample_rate(path: str, fallback: int = 44100) -> int: """Probe sample rate from file metadata.""" try: return int(sf.info(path).samplerate) except Exception: return int(fallback) def load_audio_for_mix(path: str, target_sr: Optional[int] = None) -> tuple: """ 加载音频用于混音。 Args: path: 音频路径 target_sr: 目标采样率;为 None 时保持原始采样率 Returns: tuple: (audio_data, sample_rate) """ if log: log.detail(f"加载音频: {Path(path).name}") audio, sr = librosa.load(path, sr=target_sr, mono=False) if audio.ndim == 1: audio = np.stack([audio, audio]) if log: log.detail("单声道已扩展为双声道") if log: log.detail(f"音频形状: {audio.shape}, 采样率: {sr}Hz") return audio, sr def apply_reverb( audio: np.ndarray, sr: int, room_size: float = 0.3, wet_level: float = 0.2, ) -> np.ndarray: """对人声应用混响效果。""" if not PEDALBOARD_AVAILABLE: if log: log.warning("Pedalboard 不可用,跳过混响处理") return audio if log: log.detail(f"应用混响: room_size={room_size}, wet_level={wet_level}") if audio.ndim == 1: audio = audio.reshape(1, -1) board = Pedalboard([ Reverb(room_size=room_size, wet_level=wet_level, dry_level=1.0 - wet_level) ]) processed = board(audio, sr) if log: log.detail("混响处理完成") return processed def adjust_audio_length(audio: np.ndarray, target_length: int) -> np.ndarray: """将音频裁切/补零到目标长度。""" current_length = audio.shape[-1] if current_length == target_length: return audio if current_length > target_length: return audio[..., :target_length] pad_amount = target_length - current_length if audio.ndim == 1: return np.pad(audio, (0, pad_amount)) return np.pad(audio, ((0, 0), (0, pad_amount))) def mix_vocals_and_accompaniment( vocals_path: str, accompaniment_path: str, output_path: str, vocals_volume: float = 1.0, accompaniment_volume: float = 1.0, reverb_amount: float = 0.0, target_sr: Optional[int] = None, ) -> str: """ 混合人声和伴奏。 Args: vocals_path: 人声音频路径 accompaniment_path: 伴奏音频路径 output_path: 输出路径 vocals_volume: 人声音量 (0-2) accompaniment_volume: 伴奏音量 (0-2) reverb_amount: 人声混响量 (0-1) target_sr: 目标采样率;None 时自动采用两轨中更高采样率 Returns: str: 输出文件路径 """ if target_sr is None or target_sr <= 0: vocals_sr = _probe_sample_rate(vocals_path) accompaniment_sr = _probe_sample_rate(accompaniment_path) target_sr = max(vocals_sr, accompaniment_sr) if log: log.progress("开始混音处理...") log.audio(f"人声文件: {Path(vocals_path).name}") log.audio(f"伴奏文件: {Path(accompaniment_path).name}") log.config(f"人声音量: {vocals_volume}, 伴奏音量: {accompaniment_volume}") log.config(f"混响量: {reverb_amount}, 目标采样率: {target_sr}Hz") if log: log.detail("加载人声音频...") vocals, sr = load_audio_for_mix(vocals_path, target_sr) if log: log.detail("加载伴奏音频...") accompaniment, _ = load_audio_for_mix(accompaniment_path, target_sr) if reverb_amount > 0 and PEDALBOARD_AVAILABLE: if log: log.progress("应用人声混响...") vocals = apply_reverb(vocals, sr, room_size=0.4, wet_level=reverb_amount) elif reverb_amount > 0 and log: log.warning("Pedalboard 不可用,跳过混响") vocals = soft_clip_array(vocals * vocals_volume, threshold=0.85, ceiling=0.95) accompaniment = soft_clip_array( accompaniment * accompaniment_volume, threshold=0.85, ceiling=0.95, ) vocals_len = vocals.shape[-1] accompaniment_len = accompaniment.shape[-1] target_len = max(vocals_len, accompaniment_len) if target_len <= 0: raise ValueError("混音失败:音频长度为 0") if log: log.detail(f"人声长度: {vocals_len}, 伴奏长度: {accompaniment_len}") if vocals_len != accompaniment_len: log.detail(f"长度不一致,已补齐到最长长度: {target_len}") vocals = adjust_audio_length(vocals, target_len) accompaniment = adjust_audio_length(accompaniment, target_len) if log: log.progress("混合音轨...") mixed = vocals + accompaniment max_val = float(np.max(np.abs(mixed))) if log: log.detail(f"混合后峰值: {max_val:.4f}") mixed = soft_clip_array(mixed, threshold=0.90, ceiling=0.98) if log: final_peak = float(np.max(np.abs(mixed))) log.detail(f"软削波后峰值: {final_peak:.4f}") if mixed.ndim == 2: mixed = mixed.T output_dir = Path(output_path).parent output_dir.mkdir(parents=True, exist_ok=True) if log: log.progress(f"保存混音文件: {output_path}") sf.write(output_path, mixed, sr) output_size = Path(output_path).stat().st_size duration = target_len / sr if log: log.success("混音完成") log.audio(f"输出时长: {duration:.2f}秒") log.audio(f"输出大小: {output_size / 1024 / 1024:.2f} MB") return output_path def check_pedalboard_available() -> bool: """检查 pedalboard 是否可用。""" return PEDALBOARD_AVAILABLE