S-Vetrivel's picture
Refactor project structure to Unified AI Voice Detection System
b3f89f5
raw
history blame contribute delete
968 Bytes
import io
import librosa
import numpy as np
import torch
import torchaudio
import soundfile as sf
def load_audio(audio_bytes: bytes, target_sr: int = 16000, max_duration: int = 5) -> tuple[np.ndarray, int]:
"""
Load audio from bytes, resample if necessary, and truncate/pad.
Returns (audio_array, sample_rate).
"""
try:
# Load using librosa (handles various formats via soundfile/audioread)
# mono=True mixes down to mono
audio, sr = librosa.load(io.BytesIO(audio_bytes), sr=target_sr, mono=True)
# Truncate
max_samples = int(target_sr * max_duration)
if len(audio) > max_samples:
audio = audio[:max_samples]
return audio, sr
except Exception as e:
raise ValueError(f"Failed to load audio: {e}")
def to_tensor(audio_array: np.ndarray) -> torch.Tensor:
"""Convert numpy array to torch tensor."""
return torch.tensor(audio_array).float()