daihui.zhang
commited on
Commit
·
359ffc6
1
Parent(s):
5c84c3c
update vad parameter
Browse files- transcribe/pipelines/pipe_vad.py +1 -1
- transcribe/strategy.py +14 -4
transcribe/pipelines/pipe_vad.py
CHANGED
|
@@ -21,7 +21,7 @@ class VadPipe(BasePipe):
|
|
| 21 |
fusion_threshold=0.45, # 提高以更好地融合语音片段
|
| 22 |
min_speech_duration=0.2, # 略微降低以捕获短音节
|
| 23 |
max_speech_duration=20, # 保持不变
|
| 24 |
-
min_silence_duration=300, # 增加到300毫秒,允许说话间的自然停顿
|
| 25 |
sample_rate=cls.sample_rate # 采样率,音频信号的采样频率
|
| 26 |
)
|
| 27 |
cls.vac = FixedVADIterator(cls.model.silero_vad, sampling_rate=cls.sample_rate,)
|
|
|
|
| 21 |
fusion_threshold=0.45, # 提高以更好地融合语音片段
|
| 22 |
min_speech_duration=0.2, # 略微降低以捕获短音节
|
| 23 |
max_speech_duration=20, # 保持不变
|
| 24 |
+
# min_silence_duration=300, # 增加到300毫秒,允许说话间的自然停顿
|
| 25 |
sample_rate=cls.sample_rate # 采样率,音频信号的采样频率
|
| 26 |
)
|
| 27 |
cls.vac = FixedVADIterator(cls.model.silero_vad, sampling_rate=cls.sample_rate,)
|
transcribe/strategy.py
CHANGED
|
@@ -169,8 +169,15 @@ class TranscriptBuffer:
|
|
| 169 |
def get_seg_id(self) -> int:
|
| 170 |
return self._current_seg_id
|
| 171 |
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
|
| 175 |
def update_pending_text(self, text: str) -> None:
|
| 176 |
"""更新临时缓冲字符串"""
|
|
@@ -192,10 +199,13 @@ class TranscriptBuffer:
|
|
| 192 |
|
| 193 |
count = 0
|
| 194 |
current_sentences = []
|
| 195 |
-
while len(self._sentences) and count
|
| 196 |
item = self._sentences.popleft()
|
| 197 |
current_sentences.append(item)
|
| 198 |
-
|
|
|
|
|
|
|
|
|
|
| 199 |
if current_sentences:
|
| 200 |
self._segments.append("".join(current_sentences))
|
| 201 |
logger.debug(f"=== count to paragraph ===")
|
|
|
|
| 169 |
def get_seg_id(self) -> int:
|
| 170 |
return self._current_seg_id
|
| 171 |
|
| 172 |
+
@property
|
| 173 |
+
def current_sentences_length(self) -> int:
|
| 174 |
+
count = 0
|
| 175 |
+
for item in self._sentences:
|
| 176 |
+
if self._separator:
|
| 177 |
+
count += len(item.split(self._separator))
|
| 178 |
+
else:
|
| 179 |
+
count += len(item)
|
| 180 |
+
return count
|
| 181 |
|
| 182 |
def update_pending_text(self, text: str) -> None:
|
| 183 |
"""更新临时缓冲字符串"""
|
|
|
|
| 199 |
|
| 200 |
count = 0
|
| 201 |
current_sentences = []
|
| 202 |
+
while len(self._sentences) and count < 20:
|
| 203 |
item = self._sentences.popleft()
|
| 204 |
current_sentences.append(item)
|
| 205 |
+
if self._separator:
|
| 206 |
+
count += len(item.split(self._separator))
|
| 207 |
+
else:
|
| 208 |
+
count += len(item)
|
| 209 |
if current_sentences:
|
| 210 |
self._segments.append("".join(current_sentences))
|
| 211 |
logger.debug(f"=== count to paragraph ===")
|