Spaces:
Sleeping
Sleeping
File size: 968 Bytes
b3f89f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | import io
import librosa
import numpy as np
import torch
import torchaudio
import soundfile as sf
def load_audio(audio_bytes: bytes, target_sr: int = 16000, max_duration: int = 5) -> tuple[np.ndarray, int]:
"""
Load audio from bytes, resample if necessary, and truncate/pad.
Returns (audio_array, sample_rate).
"""
try:
# Load using librosa (handles various formats via soundfile/audioread)
# mono=True mixes down to mono
audio, sr = librosa.load(io.BytesIO(audio_bytes), sr=target_sr, mono=True)
# Truncate
max_samples = int(target_sr * max_duration)
if len(audio) > max_samples:
audio = audio[:max_samples]
return audio, sr
except Exception as e:
raise ValueError(f"Failed to load audio: {e}")
def to_tensor(audio_array: np.ndarray) -> torch.Tensor:
"""Convert numpy array to torch tensor."""
return torch.tensor(audio_array).float()
|