File size: 1,704 Bytes
35bb6f4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | from __future__ import annotations
import io
import numpy as np
import soundfile as sf
def validate_wav(data: bytes) -> dict:
"""Validate a WAV file and return its properties."""
buf = io.BytesIO(data)
try:
info = sf.info(buf)
except Exception as e:
raise ValueError(f"Invalid WAV file: {e}") from e
return {
"sample_rate": info.samplerate,
"channels": info.channels,
"duration": info.duration,
"frames": info.frames,
"format": info.format,
}
def validate_reference_audio(data: bytes) -> dict:
"""Validate reference audio for voice cloning.
Requirements:
- Mono channel
- 16-44 kHz sample rate
- 3-15 seconds duration
"""
props = validate_wav(data)
if props["channels"] != 1:
raise ValueError(
f"Reference audio must be mono (1 channel), got {props['channels']} channels"
)
if not (8000 <= props["sample_rate"] <= 48000):
raise ValueError(
f"Reference audio sample rate must be 8-48 kHz, got {props['sample_rate']} Hz"
)
if props["duration"] < 1.0:
raise ValueError(
f"Reference audio too short ({props['duration']:.1f}s), minimum 1 second"
)
if props["duration"] > 30.0:
raise ValueError(
f"Reference audio too long ({props['duration']:.1f}s), maximum 30 seconds"
)
return props
def pcm_to_wav_bytes(pcm_data: np.ndarray, sample_rate: int = 24000) -> bytes:
"""Convert float32 PCM numpy array to WAV bytes."""
buf = io.BytesIO()
sf.write(buf, pcm_data, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()
|