Spaces:
Runtime error
Runtime error
File size: 1,173 Bytes
2651a17 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | import webrtcvad
from collections import deque
class VADFrameSplitter:
"""
Converts raw PCM stream into speech segments.
- 20ms frames
- speech/silence detection
- endpoint detection
"""
def __init__(self, sample_rate=16000, mode=2):
self.vad = webrtcvad.Vad(mode)
self.sample_rate = sample_rate
self.frame_buffer = deque()
self.speech_count = 0
self.silence_count = 0
def process(self, frame: bytes):
"""
Returns:
- True if speaking
- False if silence
"""
is_speech = self.vad.is_speech(frame, self.sample_rate)
self.frame_buffer.append(frame)
if is_speech:
self.speech_count += 1
self.silence_count = 0
else:
self.silence_count += 1
return is_speech
def is_end_of_utterance(self):
"""
Detect speech completion.
"""
return self.silence_count > 15
def flush(self):
audio = b"".join(self.frame_buffer)
self.frame_buffer.clear()
self.speech_count = 0
self.silence_count = 0
return audio |