david
commited on
Commit
·
37262f1
1
Parent(s):
ce0e589
add custom vad silence
Browse files
transcribe/pipelines/pipe_vad.py
CHANGED
|
@@ -2,10 +2,27 @@
|
|
| 2 |
from .base import MetaItem, BasePipe
|
| 3 |
from ..helpers.vadprocessor import SileroVADProcessor, FixedVADIterator
|
| 4 |
import numpy as np
|
| 5 |
-
from silero_vad import get_speech_timestamps
|
| 6 |
import torch
|
|
|
|
| 7 |
# import noisereduce as nr
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
class VadPipe(BasePipe):
|
| 11 |
model = None
|
|
|
|
| 2 |
from .base import MetaItem, BasePipe
|
| 3 |
from ..helpers.vadprocessor import SileroVADProcessor, FixedVADIterator
|
| 4 |
import numpy as np
|
| 5 |
+
from silero_vad import get_speech_timestamps
|
| 6 |
import torch
|
| 7 |
+
from typing import List
|
| 8 |
# import noisereduce as nr
|
| 9 |
|
| 10 |
+
def collect_chunks(tss: List[dict], wav: torch.Tensor, sample_rate: int = 16000):
|
| 11 |
+
chunks = []
|
| 12 |
+
silent_samples = int(0.3 * sample_rate) # 300ms 的静音样本数
|
| 13 |
+
silence = torch.zeros(silent_samples) # 创建300ms的静音
|
| 14 |
+
|
| 15 |
+
for i in range(len(tss)):
|
| 16 |
+
# 先添加当前语音片段
|
| 17 |
+
chunks.append(wav[tss[i]['start']: tss[i]['end']])
|
| 18 |
+
|
| 19 |
+
# 如果不是最后一个片段,且与下一个片段间隔大于100ms,则添加静音
|
| 20 |
+
if i < len(tss) - 1:
|
| 21 |
+
gap = tss[i+1]['start'] - tss[i]['end']
|
| 22 |
+
if gap > 0.1 * sample_rate: # 判断间隔是否大于100ms
|
| 23 |
+
chunks.append(silence) # 添加300ms静音
|
| 24 |
+
|
| 25 |
+
return torch.cat(chunks)
|
| 26 |
|
| 27 |
class VadPipe(BasePipe):
|
| 28 |
model = None
|