Spaces:

vasugo05
/

AudioDubbAi

Running

App Files Files Community

AudioDubbAi / src /core /audio_processor.py

vasugo05

Upload 24 files

fad5c32 verified about 1 month ago

raw

history blame contribute delete

6.11 kB

	"""
	Audio utilities for processing and file I/O
	Handles loading, saving, and processing audio files
	"""

	import logging
	import numpy as np
	import soundfile as sf
	import librosa
	from typing import Tuple, Optional
	import tempfile
	import os

	logger = logging.getLogger(__name__)


	class AudioProcessor:
	"""Handles audio file operations and processing"""

	SUPPORTED_FORMATS = ['wav', 'mp3', 'm4a', 'flac', 'ogg']
	DEFAULT_SAMPLE_RATE = 24000 # For XTTS-v2

	@staticmethod
	def load_audio(
	file_path: str,
	sr: Optional[int] = None,
	mono: bool = True
	) -> Tuple[np.ndarray, int]:
	"""
	Load audio file

	Args:
	file_path: Path to audio file
	sr: Target sample rate (None = original)
	mono: Convert to mono if True

	Returns:
	Tuple of (audio_waveform, sample_rate)
	"""
	logger.info(f"Loading audio from: {file_path}")

	try:
	# Load with librosa for flexibility
	audio, sample_rate = librosa.load(
	file_path,
	sr=sr,
	mono=mono
	)
	logger.info(f"Audio loaded. Shape: {audio.shape}, SR: {sample_rate}")
	return audio, sample_rate

	except Exception as e:
	logger.error(f"Error loading audio: {str(e)}")
	raise

	@staticmethod
	def save_audio(
	audio_waveform: np.ndarray,
	sample_rate: int,
	output_path: str,
	subtype: str = 'PCM_16'
	) -> str:
	"""
	Save audio to WAV file

	Args:
	audio_waveform: Audio waveform array
	sample_rate: Sample rate
	output_path: Output file path
	subtype: Audio subtype (PCM_16, PCM_24, PCM_32, FLOAT)

	Returns:
	Path to saved file
	"""
	logger.info(f"Saving audio to: {output_path}")

	try:
	# Ensure output directory exists
	os.makedirs(os.path.dirname(output_path), exist_ok=True)

	# Save audio
	sf.write(output_path, audio_waveform, sample_rate, subtype=subtype)

	logger.info(f"Audio saved successfully. Size: {os.path.getsize(output_path)} bytes")
	return output_path

	except Exception as e:
	logger.error(f"Error saving audio: {str(e)}")
	raise

	@staticmethod
	def resample_audio(
	audio: np.ndarray,
	orig_sr: int,
	target_sr: int
	) -> np.ndarray:
	"""
	Resample audio to target sample rate

	Args:
	audio: Audio waveform
	orig_sr: Original sample rate
	target_sr: Target sample rate

	Returns:
	Resampled audio
	"""
	if orig_sr == target_sr:
	return audio

	logger.info(f"Resampling from {orig_sr} to {target_sr}")
	return librosa.resample(audio, orig_sr=orig_sr, target_sr=target_sr)

	@staticmethod
	def concatenate_audio(*audio_arrays) -> np.ndarray:
	"""
	Concatenate multiple audio arrays

	Args:
	*audio_arrays: Variable number of audio arrays

	Returns:
	Concatenated audio array
	"""
	logger.info(f"Concatenating {len(audio_arrays)} audio segments")
	return np.concatenate(audio_arrays)

	@staticmethod
	def get_audio_duration(audio: np.ndarray, sr: int) -> float:
	"""Get duration of audio in seconds"""
	return len(audio) / sr

	@staticmethod
	def validate_audio_file(file_path: str) -> bool:
	"""
	Validate if file is a supported audio format

	Args:
	file_path: Path to audio file

	Returns:
	True if valid, False otherwise
	"""
	ext = file_path.split('.')[-1].lower()
	is_valid = ext in AudioProcessor.SUPPORTED_FORMATS

	if not is_valid:
	logger.warning(f"Unsupported format: {ext}")

	return is_valid

	@staticmethod
	def create_temp_audio_file(suffix: str = '.wav') -> str:
	"""
	Create a temporary audio file

	Returns:
	Path to temporary file
	"""
	temp_file = tempfile.NamedTemporaryFile(
	suffix=suffix,
	delete=False
	)
	logger.info(f"Created temporary file: {temp_file.name}")
	return temp_file.name

	@staticmethod
	def cleanup_temp_file(file_path: str):
	"""
	Delete temporary file safely

	Args:
	file_path: Path to file to delete
	"""
	try:
	if os.path.exists(file_path):
	os.remove(file_path)
	logger.info(f"Deleted temporary file: {file_path}")
	except Exception as e:
	logger.warning(f"Could not delete file {file_path}: {str(e)}")

	@staticmethod
	def normalize_audio(audio: np.ndarray, target_db: float = -20.0) -> np.ndarray:
	"""
	Normalize audio to target loudness

	Args:
	audio: Audio waveform
	target_db: Target loudness in dB

	Returns:
	Normalized audio
	"""
	# Calculate RMS
	rms = np.sqrt(np.mean(audio ** 2))

	if rms == 0:
	return audio

	# Convert target db to linear scale
	target_linear = 10 ** (target_db / 20.0)

	# Scale audio
	normalized = audio * (target_linear / rms)

	# Clip to prevent clipping
	normalized = np.clip(normalized, -1.0, 1.0)

	logger.info(f"Audio normalized to {target_db} dB")
	return normalized