hedrekao
HF deploy: clean snapshot without local artifacts
a361db3
"""Audio loading and validation."""
import logging
import numpy as np
import soundfile as sf
from typing import Tuple
log = logging.getLogger(__name__)
def load_audio(wav_path: str) -> Tuple[np.ndarray, int]:
"""
Load audio from WAV file.
Args:
wav_path: Path to input WAV file
Returns:
Tuple of (audio_data, sample_rate)
- audio_data: shape (n_samples, n_channels), dtype float32
- sample_rate: integer Hz
"""
log.info(f"Loading audio: {wav_path}")
data, sr = sf.read(wav_path, always_2d=True)
data = data.astype(np.float32)
duration = data.shape[0] / sr
log.info(f" Samples: {data.shape[0]}, Duration: {duration:.2f}s, "
f"Channels: {data.shape[1]}, Sample rate: {sr} Hz")
return data, sr
def validate_audio(data: np.ndarray, expected_channels: int = 4) -> bool:
"""
Validate audio for pipeline requirements.
Args:
data: Audio array of shape (n_samples, n_channels)
expected_channels: Expected number of channels (default 4 for hearing aid array)
Returns:
True if valid, raises ValueError otherwise
"""
if data.ndim != 2:
raise ValueError(f"Expected 2D array, got shape {data.shape}")
if data.shape[1] != expected_channels:
raise ValueError(
f"Expected {expected_channels} channels, got {data.shape[1]}. "
"Input must be 4-channel hearing aid microphone array: "
"[Left Front, Left Rear, Right Front, Right Rear]"
)
if data.dtype != np.float32:
log.warning(f"Converting dtype {data.dtype} to float32")
data = data.astype(np.float32)
# Check for NaN or Inf
if np.any(~np.isfinite(data)):
raise ValueError("Audio contains NaN or Inf values")
return True