VoiceGuard-API / verify_speechbrain.py
S-Vetrivel's picture
Heavy & Accurate: Integrated SpeechBrain VAD + MMS-300M pipeline
62f98bb
import torch
import torchaudio
import numpy as np
from speechbrain.inference.VAD import VAD
def verify_vad():
model_source = "speechbrain/vad-crdnn-libriparty"
print(f"Loading VAD model: {model_source}...")
try:
# Load VAD
vad_model = VAD.from_hparams(
source=model_source,
savedir="tmp_vad_model",
run_opts={"device": "cpu"} # Force CPU for verification
)
print("VAD Model loaded successfully!")
# Create dummy audio (random noise + silence + random noise)
sr = 16000
duration = 5 # seconds
t = np.linspace(0, duration, int(sr * duration))
# 1 sec noise, 2 sec silence, 2 sec noise
audio = np.random.uniform(-0.1, 0.1, int(sr * 1))
audio = np.concatenate([audio, np.zeros(int(sr * 2))])
audio = np.concatenate([audio, np.random.uniform(-0.1, 0.1, int(sr * 2))])
# Convert to tensor path not needed if we can process tensor
# SpeechBrain VAD usually expects a file path, but let's check input flexibility
# For this test, save to a temp file
import soundfile as sf
sf.write('test_vad.wav', audio, sr)
print("Processing test_vad.wav...")
# Boundaries usually returns a tensor of [start, end]
boundaries = vad_model.get_speech_segments("test_vad.wav")
print(f"Speech Segments found: \n{boundaries}")
# Check if it filtered the silence
print("Verification complete.")
except Exception as e:
print(f"Error: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
verify_vad()