Spaces:
Runtime error
Runtime error
| import webrtcvad | |
| from collections import deque | |
| class VADFrameSplitter: | |
| """ | |
| Converts raw PCM stream into speech segments. | |
| - 20ms frames | |
| - speech/silence detection | |
| - endpoint detection | |
| """ | |
| def __init__(self, sample_rate=16000, mode=2): | |
| self.vad = webrtcvad.Vad(mode) | |
| self.sample_rate = sample_rate | |
| self.frame_buffer = deque() | |
| self.speech_count = 0 | |
| self.silence_count = 0 | |
| def process(self, frame: bytes): | |
| """ | |
| Returns: | |
| - True if speaking | |
| - False if silence | |
| """ | |
| is_speech = self.vad.is_speech(frame, self.sample_rate) | |
| self.frame_buffer.append(frame) | |
| if is_speech: | |
| self.speech_count += 1 | |
| self.silence_count = 0 | |
| else: | |
| self.silence_count += 1 | |
| return is_speech | |
| def is_end_of_utterance(self): | |
| """ | |
| Detect speech completion. | |
| """ | |
| return self.silence_count > 15 | |
| def flush(self): | |
| audio = b"".join(self.frame_buffer) | |
| self.frame_buffer.clear() | |
| self.speech_count = 0 | |
| self.silence_count = 0 | |
| return audio |