File size: 1,706 Bytes
62f98bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51

import torch
import torchaudio
import numpy as np
from speechbrain.inference.VAD import VAD

def verify_vad():
    model_source = "speechbrain/vad-crdnn-libriparty"
    print(f"Loading VAD model: {model_source}...")
    
    try:
        # Load VAD
        vad_model = VAD.from_hparams(
            source=model_source,
            savedir="tmp_vad_model",
            run_opts={"device": "cpu"} # Force CPU for verification
        )
        print("VAD Model loaded successfully!")
        
        # Create dummy audio (random noise + silence + random noise)
        sr = 16000
        duration = 5 # seconds
        t = np.linspace(0, duration, int(sr * duration))
        
        # 1 sec noise, 2 sec silence, 2 sec noise
        audio = np.random.uniform(-0.1, 0.1, int(sr * 1))
        audio = np.concatenate([audio, np.zeros(int(sr * 2))])
        audio = np.concatenate([audio, np.random.uniform(-0.1, 0.1, int(sr * 2))])
        
        # Convert to tensor path not needed if we can process tensor
        # SpeechBrain VAD usually expects a file path, but let's check input flexibility
        # For this test, save to a temp file
        import soundfile as sf
        sf.write('test_vad.wav', audio, sr)
        
        print("Processing test_vad.wav...")
        # Boundaries usually returns a tensor of [start, end]
        boundaries = vad_model.get_speech_segments("test_vad.wav")
        print(f"Speech Segments found: \n{boundaries}")
        
        # Check if it filtered the silence
        print("Verification complete.")

    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    verify_vad()