Spaces:

Hedrekao
/

audio-explorers-visualization

Sleeping

audio-explorers-visualization / pipeline_modules /audio_loader.py

hedrekao

HF deploy: clean snapshot without local artifacts

a361db3 about 2 months ago

1.86 kB

	"""Audio loading and validation."""

	import logging
	import numpy as np
	import soundfile as sf
	from typing import Tuple

	log = logging.getLogger(__name__)


	def load_audio(wav_path: str) -> Tuple[np.ndarray, int]:
	"""
	Load audio from WAV file.

	Args:
	wav_path: Path to input WAV file

	Returns:
	Tuple of (audio_data, sample_rate)
	- audio_data: shape (n_samples, n_channels), dtype float32
	- sample_rate: integer Hz
	"""
	log.info(f"Loading audio: {wav_path}")
	data, sr = sf.read(wav_path, always_2d=True)
	data = data.astype(np.float32)

	duration = data.shape[0] / sr
	log.info(f" Samples: {data.shape[0]}, Duration: {duration:.2f}s, "
	f"Channels: {data.shape[1]}, Sample rate: {sr} Hz")

	return data, sr


	def validate_audio(data: np.ndarray, expected_channels: int = 4) -> bool:
	"""
	Validate audio for pipeline requirements.

	Args:
	data: Audio array of shape (n_samples, n_channels)
	expected_channels: Expected number of channels (default 4 for hearing aid array)

	Returns:
	True if valid, raises ValueError otherwise
	"""
	if data.ndim != 2:
	raise ValueError(f"Expected 2D array, got shape {data.shape}")

	if data.shape[1] != expected_channels:
	raise ValueError(
	f"Expected {expected_channels} channels, got {data.shape[1]}. "
	"Input must be 4-channel hearing aid microphone array: "
	"[Left Front, Left Rear, Right Front, Right Rear]"
	)

	if data.dtype != np.float32:
	log.warning(f"Converting dtype {data.dtype} to float32")
	data = data.astype(np.float32)

	# Check for NaN or Inf
	if np.any(~np.isfinite(data)):
	raise ValueError("Audio contains NaN or Inf values")

	return True