Spaces:

AJ50
/

voice-cloning-backend

Sleeping

App Files Files Community

voice-cloning-backend / backend /app /song_conversion /audio_mixer.py

AJ50

Add song generation backend: Demucs vocal separation + voice synthesis + audio mixing

e049981 20 days ago

raw

history blame contribute delete

5.86 kB

	"""Audio mixing and effects for song generation."""

	import numpy as np
	import soundfile as sf
	from pathlib import Path
	from typing import Tuple, Optional
	import subprocess
	import sys


	class AudioMixer:
	"""Mixes vocals with instrumental and applies effects."""

	@staticmethod
	def normalize_audio(audio: np.ndarray, target_db: float = -3.0) -> np.ndarray:
	"""
	Normalize audio to target dB level.

	Args:
	audio: Audio array
	target_db: Target peak level in dB (default -3dB is professional standard)

	Returns:
	Normalized audio
	"""
	# Convert dB to linear
	target_linear = 10 ** (target_db / 20.0)

	# Find current peak
	current_peak = np.max(np.abs(audio))

	if current_peak > 0:
	# Scale to target
	audio = audio * (target_linear / current_peak)

	# Clip to prevent distortion
	audio = np.clip(audio, -1.0, 1.0)

	return audio

	@staticmethod
	def add_reverb(audio: np.ndarray, sr: int = 16000, room_scale: float = 0.3,
	delay_ms: float = 50) -> np.ndarray:
	"""
	Add simple reverb effect.

	Args:
	audio: Input audio
	sr: Sample rate
	room_scale: Reverb amount (0-1)
	delay_ms: Delay in milliseconds

	Returns:
	Audio with reverb
	"""
	delay_samples = int((delay_ms / 1000.0) * sr)

	# Create delayed version
	delayed = np.zeros_like(audio)
	if delay_samples < len(audio):
	delayed[delay_samples:] = audio[:-delay_samples]

	# Mix original with delayed
	reverb = audio + room_scale * delayed

	return reverb

	@staticmethod
	def compress_audio(audio: np.ndarray, threshold: float = 0.6, ratio: float = 4.0) -> np.ndarray:
	"""
	Apply dynamic range compression.

	Args:
	audio: Input audio
	threshold: Compression threshold (0-1)
	ratio: Compression ratio

	Returns:
	Compressed audio
	"""
	# Simple peak compression
	abs_audio = np.abs(audio)

	# Find samples above threshold
	mask = abs_audio > threshold

	# Apply compression to loud parts
	audio[mask] = np.sign(audio[mask]) * (threshold + (abs_audio[mask] - threshold) / ratio)

	return audio

	@staticmethod
	def mix_audio(vocal: np.ndarray, instrumental: np.ndarray,
	vocal_level: float = 0.7, instrumental_level: float = 0.3,
	add_reverb: bool = True, add_compression: bool = True,
	sr: int = 16000) -> np.ndarray:
	"""
	Mix vocals and instrumental with effects.

	Args:
	vocal: Vocal audio
	instrumental: Instrumental audio
	vocal_level: Vocal volume level (0-1)
	instrumental_level: Instrumental volume level (0-1)
	add_reverb: Whether to add reverb to vocals
	add_compression: Whether to add compression
	sr: Sample rate

	Returns:
	Mixed audio
	"""
	print("[AudioMixer] Normalizing tracks...")

	# Normalize individual tracks
	vocal = AudioMixer.normalize_audio(vocal, -6.0) # Vocals a bit quieter initially
	instrumental = AudioMixer.normalize_audio(instrumental, -6.0)

	print("[AudioMixer] Adding effects...")

	# Add reverb to vocals
	if add_reverb:
	vocal = AudioMixer.add_reverb(vocal, sr, room_scale=0.2, delay_ms=40)

	# Apply compression
	if add_compression:
	vocal = AudioMixer.compress_audio(vocal, threshold=0.5, ratio=3.0)

	print("[AudioMixer] Mixing tracks...")

	# Ensure same length
	min_len = min(len(vocal), len(instrumental))
	vocal = vocal[:min_len]
	instrumental = instrumental[:min_len]

	# Mix with specified levels
	mixed = vocal_level * vocal + instrumental_level * instrumental

	# Normalize final mix
	mixed = AudioMixer.normalize_audio(mixed, -3.0)

	print(f"[AudioMixer] Mix complete - Peak: {np.max(np.abs(mixed)):.4f}")

	return mixed

	@staticmethod
	def save_audio(audio: np.ndarray, output_path: Path, sr: int = 16000) -> None:
	"""
	Save audio to file.

	Args:
	audio: Audio array
	output_path: Output file path
	sr: Sample rate
	"""
	output_path = Path(output_path)
	output_path.parent.mkdir(parents=True, exist_ok=True)

	print(f"[AudioMixer] Saving to {output_path}")
	sf.write(output_path, audio, sr)
	print(f"[AudioMixer] Saved successfully")

	@staticmethod
	def mix_and_save(vocal: np.ndarray, instrumental: np.ndarray,
	output_path: Path, sr: int = 16000,
	add_effects: bool = True) -> Path:
	"""
	Mix audio and save to file.

	Args:
	vocal: Vocal audio
	instrumental: Instrumental audio
	output_path: Output file path
	sr: Sample rate
	add_effects: Whether to add effects

	Returns:
	Output file path
	"""
	mixed = AudioMixer.mix_audio(
	vocal, instrumental,
	add_reverb=add_effects,
	add_compression=add_effects,
	sr=sr
	)

	AudioMixer.save_audio(mixed, output_path, sr)

	return Path(output_path)