neuapi / api /src /services /audio_utils.py
grimshaw's picture
Upload folder using huggingface_hub
35bb6f4 verified
Raw
History Blame Contribute Delete
1.7 kB
from __future__ import annotations
import io
import numpy as np
import soundfile as sf
def validate_wav(data: bytes) -> dict:
"""Validate a WAV file and return its properties."""
buf = io.BytesIO(data)
try:
info = sf.info(buf)
except Exception as e:
raise ValueError(f"Invalid WAV file: {e}") from e
return {
"sample_rate": info.samplerate,
"channels": info.channels,
"duration": info.duration,
"frames": info.frames,
"format": info.format,
}
def validate_reference_audio(data: bytes) -> dict:
"""Validate reference audio for voice cloning.
Requirements:
- Mono channel
- 16-44 kHz sample rate
- 3-15 seconds duration
"""
props = validate_wav(data)
if props["channels"] != 1:
raise ValueError(
f"Reference audio must be mono (1 channel), got {props['channels']} channels"
)
if not (8000 <= props["sample_rate"] <= 48000):
raise ValueError(
f"Reference audio sample rate must be 8-48 kHz, got {props['sample_rate']} Hz"
)
if props["duration"] < 1.0:
raise ValueError(
f"Reference audio too short ({props['duration']:.1f}s), minimum 1 second"
)
if props["duration"] > 30.0:
raise ValueError(
f"Reference audio too long ({props['duration']:.1f}s), maximum 30 seconds"
)
return props
def pcm_to_wav_bytes(pcm_data: np.ndarray, sample_rate: int = 24000) -> bytes:
"""Convert float32 PCM numpy array to WAV bytes."""
buf = io.BytesIO()
sf.write(buf, pcm_data, sample_rate, format="WAV", subtype="PCM_16")
buf.seek(0)
return buf.read()