Spaces:

JeffreyZhou798
/

SolfegeScore-Singer-01

Paused

App Files Files Community

SolfegeScore-Singer-01 / backend /audio_mixer.py

JeffreyZhou798

Upload 8 files

ecadc11 verified about 1 month ago

raw

history blame contribute delete

3.7 kB

	"""
	Audio Mixer Module
	Mixes multiple voice tracks into a single output
	"""

	import numpy as np
	from typing import List, Optional


	def mix_voices(
	voice_audios: List[np.ndarray],
	method: str = "sum",
	normalize: bool = True
	) -> np.ndarray:
	"""
	Mix multiple voice audio tracks.

	Args:
	voice_audios: List of audio arrays (one per voice)
	method: Mixing method ("sum", "average", "weighted")
	normalize: Whether to normalize output

	Returns:
	Mixed audio array
	"""
	if not voice_audios:
	return np.zeros(44100) # 1 second silence

	if len(voice_audios) == 1:
	audio = voice_audios[0]
	if normalize:
	audio = normalize_audio(audio)
	return audio

	# Find maximum length
	max_length = max(len(audio) for audio in voice_audios)

	# Pad shorter audios with silence
	padded_audios = []
	for audio in voice_audios:
	if len(audio) < max_length:
	padding = np.zeros(max_length - len(audio))
	padded_audio = np.concatenate([audio, padding])
	else:
	padded_audio = audio
	padded_audios.append(padded_audio)

	# Mix
	if method == "sum":
	mixed = np.sum(padded_audios, axis=0)
	elif method == "average":
	mixed = np.mean(padded_audios, axis=0)
	elif method == "weighted":
	# Weight by inverse of energy (quieter voices get higher weight)
	energies = [np.sum(audio ** 2) for audio in padded_audios]
	weights = [1.0 / (e + 1e-10) for e in energies]
	total_weight = sum(weights)
	weights = [w / total_weight for w in weights]

	mixed = np.zeros(max_length)
	for audio, weight in zip(padded_audios, weights):
	mixed += audio * weight
	else:
	mixed = np.sum(padded_audios, axis=0)

	# Normalize
	if normalize:
	mixed = normalize_audio(mixed)

	return mixed


	def normalize_audio(audio: np.ndarray, target_db: float = -3.0) -> np.ndarray:
	"""
	Normalize audio to target dB level.

	Args:
	audio: Audio array
	target_db: Target dB level (default -3.0 dB)

	Returns:
	Normalized audio
	"""
	# Calculate current RMS
	rms = np.sqrt(np.mean(audio ** 2))

	if rms < 1e-10:
	return audio # Avoid division by zero

	# Calculate target RMS
	target_rms = 10 ** (target_db / 20) * 0.1

	# Apply gain
	gain = target_rms / rms
	normalized = audio * gain

	# Clip to prevent overflow
	normalized = np.clip(normalized, -1.0, 1.0)

	return normalized


	def apply_fade(audio: np.ndarray, fade_in: float = 0.01, fade_out: float = 0.01, sample_rate: int = 44100) -> np.ndarray:
	"""
	Apply fade in/out to audio.

	Args:
	audio: Audio array
	fade_in: Fade in duration (seconds)
	fade_out: Fade out duration (seconds)
	sample_rate: Sample rate

	Returns:
	Audio with fades applied
	"""
	audio = audio.copy()

	# Fade in
	fade_in_samples = int(fade_in * sample_rate)
	if fade_in_samples > 0 and fade_in_samples < len(audio):
	fade_in_curve = np.linspace(0, 1, fade_in_samples)
	audio[:fade_in_samples] *= fade_in_curve

	# Fade out
	fade_out_samples = int(fade_out * sample_rate)
	if fade_out_samples > 0 and fade_out_samples < len(audio):
	fade_out_curve = np.linspace(1, 0, fade_out_samples)
	audio[-fade_out_samples:] *= fade_out_curve

	return audio