| """Audio loading and validation.""" |
|
|
| import logging |
| import numpy as np |
| import soundfile as sf |
| from typing import Tuple |
|
|
| log = logging.getLogger(__name__) |
|
|
|
|
| def load_audio(wav_path: str) -> Tuple[np.ndarray, int]: |
| """ |
| Load audio from WAV file. |
| |
| Args: |
| wav_path: Path to input WAV file |
| |
| Returns: |
| Tuple of (audio_data, sample_rate) |
| - audio_data: shape (n_samples, n_channels), dtype float32 |
| - sample_rate: integer Hz |
| """ |
| log.info(f"Loading audio: {wav_path}") |
| data, sr = sf.read(wav_path, always_2d=True) |
| data = data.astype(np.float32) |
| |
| duration = data.shape[0] / sr |
| log.info(f" Samples: {data.shape[0]}, Duration: {duration:.2f}s, " |
| f"Channels: {data.shape[1]}, Sample rate: {sr} Hz") |
| |
| return data, sr |
|
|
|
|
| def validate_audio(data: np.ndarray, expected_channels: int = 4) -> bool: |
| """ |
| Validate audio for pipeline requirements. |
| |
| Args: |
| data: Audio array of shape (n_samples, n_channels) |
| expected_channels: Expected number of channels (default 4 for hearing aid array) |
| |
| Returns: |
| True if valid, raises ValueError otherwise |
| """ |
| if data.ndim != 2: |
| raise ValueError(f"Expected 2D array, got shape {data.shape}") |
| |
| if data.shape[1] != expected_channels: |
| raise ValueError( |
| f"Expected {expected_channels} channels, got {data.shape[1]}. " |
| "Input must be 4-channel hearing aid microphone array: " |
| "[Left Front, Left Rear, Right Front, Right Rear]" |
| ) |
| |
| if data.dtype != np.float32: |
| log.warning(f"Converting dtype {data.dtype} to float32") |
| data = data.astype(np.float32) |
| |
| |
| if np.any(~np.isfinite(data)): |
| raise ValueError("Audio contains NaN or Inf values") |
| |
| return True |
|
|