Spaces:

MusoraProductDepartment
/

jam-tracks

Sleeping

jam-tracks / backend /utils /audio_utils.py

Mina Emadi

updated the MVP-Initial upload

a0fcd39 2 months ago

2.83 kB

	"""Audio utility functions for format conversion and processing."""

	import io
	import numpy as np
	import soundfile as sf


	def to_mono(audio: np.ndarray) -> np.ndarray:
	"""
	Convert stereo audio to mono by averaging channels.

	Args:
	audio: Audio array, shape (samples,) for mono or (samples, channels) for stereo

	Returns:
	Mono audio array, shape (samples,)
	"""
	if audio.ndim == 2:
	return np.mean(audio, axis=1)
	return audio


	def to_float32(audio: np.ndarray) -> np.ndarray:
	"""
	Ensure audio is float32 in [-1, 1] range.

	Args:
	audio: Audio array in any numeric format

	Returns:
	Audio array as float32 normalized to [-1, 1]
	"""
	audio = audio.astype(np.float32)

	# Check if already normalized
	max_val = np.max(np.abs(audio))
	if max_val > 1.0:
	audio = audio / max_val

	return audio


	def normalize(audio: np.ndarray, peak: float = 0.95) -> np.ndarray:
	"""
	Normalize audio so peak amplitude equals given value.

	Args:
	audio: Audio array
	peak: Target peak amplitude (default 0.95 to avoid clipping)

	Returns:
	Normalized audio array
	"""
	max_val = np.max(np.abs(audio))
	if max_val > 0:
	audio = audio / max_val * peak
	return audio


	def pad_or_trim(audio: np.ndarray, target_length: int) -> np.ndarray:
	"""
	Pad with zeros or trim audio to target length.

	Args:
	audio: Audio array
	target_length: Desired length in samples

	Returns:
	Audio array with exactly target_length samples
	"""
	current_length = len(audio)

	if current_length == target_length:
	return audio
	elif current_length > target_length:
	return audio[:target_length]
	else:
	# Pad with zeros
	padding = np.zeros(target_length - current_length, dtype=audio.dtype)
	return np.concatenate([audio, padding])


	def encode_wav_to_bytes(audio: np.ndarray, sr: int) -> bytes:
	"""
	Encode numpy array to WAV bytes.

	Args:
	audio: Audio array
	sr: Sample rate

	Returns:
	WAV file as bytes
	"""
	buf = io.BytesIO()
	sf.write(buf, audio, sr, format='WAV')
	buf.seek(0)
	return buf.read()


	def encode_flac_to_bytes(audio: np.ndarray, sr: int) -> bytes:
	"""
	Encode numpy array to FLAC bytes (lossless, ~50% smaller than WAV).

	Args:
	audio: Audio array
	sr: Sample rate

	Returns:
	FLAC file as bytes
	"""
	buf = io.BytesIO()
	# Convert float32 to int16 for FLAC (better compression)
	if audio.dtype == np.float32 or audio.dtype == np.float64:
	audio_int = (audio * 32767).astype(np.int16)
	else:
	audio_int = audio
	sf.write(buf, audio_int, sr, format='FLAC')
	buf.seek(0)
	return buf.read()