| """
|
| Audio Mixer Module
|
| Mixes multiple voice tracks into a single output
|
| """
|
|
|
| import numpy as np
|
| from typing import List, Optional
|
|
|
|
|
| def mix_voices(
|
| voice_audios: List[np.ndarray],
|
| method: str = "sum",
|
| normalize: bool = True
|
| ) -> np.ndarray:
|
| """
|
| Mix multiple voice audio tracks.
|
|
|
| Args:
|
| voice_audios: List of audio arrays (one per voice)
|
| method: Mixing method ("sum", "average", "weighted")
|
| normalize: Whether to normalize output
|
|
|
| Returns:
|
| Mixed audio array
|
| """
|
| if not voice_audios:
|
| return np.zeros(44100)
|
|
|
| if len(voice_audios) == 1:
|
| audio = voice_audios[0]
|
| if normalize:
|
| audio = normalize_audio(audio)
|
| return audio
|
|
|
|
|
| max_length = max(len(audio) for audio in voice_audios)
|
|
|
|
|
| padded_audios = []
|
| for audio in voice_audios:
|
| if len(audio) < max_length:
|
| padding = np.zeros(max_length - len(audio))
|
| padded_audio = np.concatenate([audio, padding])
|
| else:
|
| padded_audio = audio
|
| padded_audios.append(padded_audio)
|
|
|
|
|
| if method == "sum":
|
| mixed = np.sum(padded_audios, axis=0)
|
| elif method == "average":
|
| mixed = np.mean(padded_audios, axis=0)
|
| elif method == "weighted":
|
|
|
| energies = [np.sum(audio ** 2) for audio in padded_audios]
|
| weights = [1.0 / (e + 1e-10) for e in energies]
|
| total_weight = sum(weights)
|
| weights = [w / total_weight for w in weights]
|
|
|
| mixed = np.zeros(max_length)
|
| for audio, weight in zip(padded_audios, weights):
|
| mixed += audio * weight
|
| else:
|
| mixed = np.sum(padded_audios, axis=0)
|
|
|
|
|
| if normalize:
|
| mixed = normalize_audio(mixed)
|
|
|
| return mixed
|
|
|
|
|
| def normalize_audio(audio: np.ndarray, target_db: float = -3.0) -> np.ndarray:
|
| """
|
| Normalize audio to target dB level.
|
|
|
| Args:
|
| audio: Audio array
|
| target_db: Target dB level (default -3.0 dB)
|
|
|
| Returns:
|
| Normalized audio
|
| """
|
|
|
| rms = np.sqrt(np.mean(audio ** 2))
|
|
|
| if rms < 1e-10:
|
| return audio
|
|
|
|
|
| target_rms = 10 ** (target_db / 20) * 0.1
|
|
|
|
|
| gain = target_rms / rms
|
| normalized = audio * gain
|
|
|
|
|
| normalized = np.clip(normalized, -1.0, 1.0)
|
|
|
| return normalized
|
|
|
|
|
| def apply_fade(audio: np.ndarray, fade_in: float = 0.01, fade_out: float = 0.01, sample_rate: int = 44100) -> np.ndarray:
|
| """
|
| Apply fade in/out to audio.
|
|
|
| Args:
|
| audio: Audio array
|
| fade_in: Fade in duration (seconds)
|
| fade_out: Fade out duration (seconds)
|
| sample_rate: Sample rate
|
|
|
| Returns:
|
| Audio with fades applied
|
| """
|
| audio = audio.copy()
|
|
|
|
|
| fade_in_samples = int(fade_in * sample_rate)
|
| if fade_in_samples > 0 and fade_in_samples < len(audio):
|
| fade_in_curve = np.linspace(0, 1, fade_in_samples)
|
| audio[:fade_in_samples] *= fade_in_curve
|
|
|
|
|
| fade_out_samples = int(fade_out * sample_rate)
|
| if fade_out_samples > 0 and fade_out_samples < len(audio):
|
| fade_out_curve = np.linspace(1, 0, fade_out_samples)
|
| audio[-fade_out_samples:] *= fade_out_curve
|
|
|
| return audio
|
|
|