from __future__ import annotations import io import numpy as np import soundfile as sf def validate_wav(data: bytes) -> dict: """Validate a WAV file and return its properties.""" buf = io.BytesIO(data) try: info = sf.info(buf) except Exception as e: raise ValueError(f"Invalid WAV file: {e}") from e return { "sample_rate": info.samplerate, "channels": info.channels, "duration": info.duration, "frames": info.frames, "format": info.format, } def validate_reference_audio(data: bytes) -> dict: """Validate reference audio for voice cloning. Requirements: - Mono channel - 16-44 kHz sample rate - 3-15 seconds duration """ props = validate_wav(data) if props["channels"] != 1: raise ValueError( f"Reference audio must be mono (1 channel), got {props['channels']} channels" ) if not (8000 <= props["sample_rate"] <= 48000): raise ValueError( f"Reference audio sample rate must be 8-48 kHz, got {props['sample_rate']} Hz" ) if props["duration"] < 1.0: raise ValueError( f"Reference audio too short ({props['duration']:.1f}s), minimum 1 second" ) if props["duration"] > 30.0: raise ValueError( f"Reference audio too long ({props['duration']:.1f}s), maximum 30 seconds" ) return props def pcm_to_wav_bytes(pcm_data: np.ndarray, sample_rate: int = 24000) -> bytes: """Convert float32 PCM numpy array to WAV bytes.""" buf = io.BytesIO() sf.write(buf, pcm_data, sample_rate, format="WAV", subtype="PCM_16") buf.seek(0) return buf.read()