Update modules/vad/silero_vad.py
Browse files- modules/vad/silero_vad.py +10 -3
modules/vad/silero_vad.py
CHANGED
|
@@ -106,10 +106,17 @@ class SileroVAD:
|
|
| 106 |
min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
|
| 107 |
|
| 108 |
audio_length_samples = len(audio)
|
|
|
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
triggered = False
|
| 115 |
speeches = []
|
|
|
|
| 106 |
min_silence_samples_at_max_speech = self.sampling_rate * 98 / 1000
|
| 107 |
|
| 108 |
audio_length_samples = len(audio)
|
| 109 |
+
state, context = self.model.get_initial_states(batch_size=1)
|
| 110 |
|
| 111 |
+
speech_probs = []
|
| 112 |
+
for current_start_sample in range(0, audio_length_samples, window_size_samples):
|
| 113 |
+
progress(current_start_sample/audio_length_samples, desc="Detecting speeches only using VAD...")
|
| 114 |
+
|
| 115 |
+
chunk = audio[current_start_sample: current_start_sample + window_size_samples]
|
| 116 |
+
if len(chunk) < window_size_samples:
|
| 117 |
+
chunk = np.pad(chunk, (0, int(window_size_samples - len(chunk))))
|
| 118 |
+
speech_prob, state, context = self.model(chunk, state, context, sampling_rate)
|
| 119 |
+
speech_probs.append(speech_prob)
|
| 120 |
|
| 121 |
triggered = False
|
| 122 |
speeches = []
|