david commited on
Commit
37262f1
·
1 Parent(s): ce0e589

add custom vad silence

Browse files
Files changed (1) hide show
  1. transcribe/pipelines/pipe_vad.py +18 -1
transcribe/pipelines/pipe_vad.py CHANGED
@@ -2,10 +2,27 @@
2
  from .base import MetaItem, BasePipe
3
  from ..helpers.vadprocessor import SileroVADProcessor, FixedVADIterator
4
  import numpy as np
5
- from silero_vad import get_speech_timestamps,collect_chunks
6
  import torch
 
7
  # import noisereduce as nr
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class VadPipe(BasePipe):
11
  model = None
 
2
  from .base import MetaItem, BasePipe
3
  from ..helpers.vadprocessor import SileroVADProcessor, FixedVADIterator
4
  import numpy as np
5
+ from silero_vad import get_speech_timestamps
6
  import torch
7
+ from typing import List
8
  # import noisereduce as nr
9
 
10
+ def collect_chunks(tss: List[dict], wav: torch.Tensor, sample_rate: int = 16000):
11
+ chunks = []
12
+ silent_samples = int(0.3 * sample_rate) # 300ms 的静音样本数
13
+ silence = torch.zeros(silent_samples) # 创建300ms的静音
14
+
15
+ for i in range(len(tss)):
16
+ # 先添加当前语音片段
17
+ chunks.append(wav[tss[i]['start']: tss[i]['end']])
18
+
19
+ # 如果不是最后一个片段,且与下一个片段间隔大于100ms,则添加静音
20
+ if i < len(tss) - 1:
21
+ gap = tss[i+1]['start'] - tss[i]['end']
22
+ if gap > 0.1 * sample_rate: # 判断间隔是否大于100ms
23
+ chunks.append(silence) # 添加300ms静音
24
+
25
+ return torch.cat(chunks)
26
 
27
  class VadPipe(BasePipe):
28
  model = None