File size: 6,342 Bytes
b6f9c90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 | # -*- coding: utf-8 -*-
"""
混音模块 - 人声与伴奏混合
"""
import numpy as np
import librosa
import soundfile as sf
from pathlib import Path
from typing import Optional
from lib.audio import soft_clip_array
try:
from lib.logger import log
except ImportError:
log = None
try:
from pedalboard import Pedalboard, Reverb, Compressor, Gain
PEDALBOARD_AVAILABLE = True
except ImportError:
PEDALBOARD_AVAILABLE = False
def _probe_sample_rate(path: str, fallback: int = 44100) -> int:
"""Probe sample rate from file metadata."""
try:
return int(sf.info(path).samplerate)
except Exception:
return int(fallback)
def load_audio_for_mix(path: str, target_sr: Optional[int] = None) -> tuple:
"""
加载音频用于混音。
Args:
path: 音频路径
target_sr: 目标采样率;为 None 时保持原始采样率
Returns:
tuple: (audio_data, sample_rate)
"""
if log:
log.detail(f"加载音频: {Path(path).name}")
audio, sr = librosa.load(path, sr=target_sr, mono=False)
if audio.ndim == 1:
audio = np.stack([audio, audio])
if log:
log.detail("单声道已扩展为双声道")
if log:
log.detail(f"音频形状: {audio.shape}, 采样率: {sr}Hz")
return audio, sr
def apply_reverb(
audio: np.ndarray,
sr: int,
room_size: float = 0.3,
wet_level: float = 0.2,
) -> np.ndarray:
"""对人声应用混响效果。"""
if not PEDALBOARD_AVAILABLE:
if log:
log.warning("Pedalboard 不可用,跳过混响处理")
return audio
if log:
log.detail(f"应用混响: room_size={room_size}, wet_level={wet_level}")
if audio.ndim == 1:
audio = audio.reshape(1, -1)
board = Pedalboard([
Reverb(room_size=room_size, wet_level=wet_level, dry_level=1.0 - wet_level)
])
processed = board(audio, sr)
if log:
log.detail("混响处理完成")
return processed
def adjust_audio_length(audio: np.ndarray, target_length: int) -> np.ndarray:
"""将音频裁切/补零到目标长度。"""
current_length = audio.shape[-1]
if current_length == target_length:
return audio
if current_length > target_length:
return audio[..., :target_length]
pad_amount = target_length - current_length
if audio.ndim == 1:
return np.pad(audio, (0, pad_amount))
return np.pad(audio, ((0, 0), (0, pad_amount)))
def mix_vocals_and_accompaniment(
vocals_path: str,
accompaniment_path: str,
output_path: str,
vocals_volume: float = 1.0,
accompaniment_volume: float = 1.0,
reverb_amount: float = 0.0,
target_sr: Optional[int] = None,
) -> str:
"""
混合人声和伴奏。
Args:
vocals_path: 人声音频路径
accompaniment_path: 伴奏音频路径
output_path: 输出路径
vocals_volume: 人声音量 (0-2)
accompaniment_volume: 伴奏音量 (0-2)
reverb_amount: 人声混响量 (0-1)
target_sr: 目标采样率;None 时自动采用两轨中更高采样率
Returns:
str: 输出文件路径
"""
if target_sr is None or target_sr <= 0:
vocals_sr = _probe_sample_rate(vocals_path)
accompaniment_sr = _probe_sample_rate(accompaniment_path)
target_sr = max(vocals_sr, accompaniment_sr)
if log:
log.progress("开始混音处理...")
log.audio(f"人声文件: {Path(vocals_path).name}")
log.audio(f"伴奏文件: {Path(accompaniment_path).name}")
log.config(f"人声音量: {vocals_volume}, 伴奏音量: {accompaniment_volume}")
log.config(f"混响量: {reverb_amount}, 目标采样率: {target_sr}Hz")
if log:
log.detail("加载人声音频...")
vocals, sr = load_audio_for_mix(vocals_path, target_sr)
if log:
log.detail("加载伴奏音频...")
accompaniment, _ = load_audio_for_mix(accompaniment_path, target_sr)
if reverb_amount > 0 and PEDALBOARD_AVAILABLE:
if log:
log.progress("应用人声混响...")
vocals = apply_reverb(vocals, sr, room_size=0.4, wet_level=reverb_amount)
elif reverb_amount > 0 and log:
log.warning("Pedalboard 不可用,跳过混响")
vocals = soft_clip_array(vocals * vocals_volume, threshold=0.85, ceiling=0.95)
accompaniment = soft_clip_array(
accompaniment * accompaniment_volume,
threshold=0.85,
ceiling=0.95,
)
vocals_len = vocals.shape[-1]
accompaniment_len = accompaniment.shape[-1]
target_len = max(vocals_len, accompaniment_len)
if target_len <= 0:
raise ValueError("混音失败:音频长度为 0")
if log:
log.detail(f"人声长度: {vocals_len}, 伴奏长度: {accompaniment_len}")
if vocals_len != accompaniment_len:
log.detail(f"长度不一致,已补齐到最长长度: {target_len}")
vocals = adjust_audio_length(vocals, target_len)
accompaniment = adjust_audio_length(accompaniment, target_len)
if log:
log.progress("混合音轨...")
mixed = vocals + accompaniment
max_val = float(np.max(np.abs(mixed)))
if log:
log.detail(f"混合后峰值: {max_val:.4f}")
mixed = soft_clip_array(mixed, threshold=0.90, ceiling=0.98)
if log:
final_peak = float(np.max(np.abs(mixed)))
log.detail(f"软削波后峰值: {final_peak:.4f}")
if mixed.ndim == 2:
mixed = mixed.T
output_dir = Path(output_path).parent
output_dir.mkdir(parents=True, exist_ok=True)
if log:
log.progress(f"保存混音文件: {output_path}")
sf.write(output_path, mixed, sr)
output_size = Path(output_path).stat().st_size
duration = target_len / sr
if log:
log.success("混音完成")
log.audio(f"输出时长: {duration:.2f}秒")
log.audio(f"输出大小: {output_size / 1024 / 1024:.2f} MB")
return output_path
def check_pedalboard_available() -> bool:
"""检查 pedalboard 是否可用。"""
return PEDALBOARD_AVAILABLE
|