Jimmy-test
#3
by
Zhongzhimin
- opened
- README.md +4 -4
- hyperparams.yaml +1 -1
README.md
CHANGED
|
@@ -72,7 +72,7 @@ Please notice that we encourage you to read our tutorials and learn more about
|
|
| 72 |
### Perform Voice Activity Detection
|
| 73 |
|
| 74 |
```
|
| 75 |
-
from speechbrain.
|
| 76 |
|
| 77 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
| 78 |
boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
|
|
@@ -93,7 +93,7 @@ To do it:
|
|
| 93 |
|
| 94 |
```
|
| 95 |
import torchaudio
|
| 96 |
-
upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'example_vad.wav')
|
| 97 |
torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
|
| 98 |
```
|
| 99 |
|
|
@@ -116,11 +116,11 @@ We designed the VAD such that you can have access to all of these steps (this mi
|
|
| 116 |
|
| 117 |
|
| 118 |
```python
|
| 119 |
-
from speechbrain.
|
| 120 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
| 121 |
|
| 122 |
# 1- Let's compute frame-level posteriors first
|
| 123 |
-
audio_file =
|
| 124 |
prob_chunks = VAD.get_speech_prob_file(audio_file)
|
| 125 |
|
| 126 |
# 2- Let's apply a threshold on top of the posteriors
|
|
|
|
| 72 |
### Perform Voice Activity Detection
|
| 73 |
|
| 74 |
```
|
| 75 |
+
from speechbrain.pretrained import VAD
|
| 76 |
|
| 77 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
| 78 |
boundaries = VAD.get_speech_segments("speechbrain/vad-crdnn-libriparty/example_vad.wav")
|
|
|
|
| 93 |
|
| 94 |
```
|
| 95 |
import torchaudio
|
| 96 |
+
upsampled_boundaries = VAD.upsample_boundaries(boundaries, 'pretrained_model_checkpoints/example_vad.wav')
|
| 97 |
torchaudio.save('vad_final.wav', upsampled_boundaries.cpu(), 16000)
|
| 98 |
```
|
| 99 |
|
|
|
|
| 116 |
|
| 117 |
|
| 118 |
```python
|
| 119 |
+
from speechbrain.pretrained import VAD
|
| 120 |
VAD = VAD.from_hparams(source="speechbrain/vad-crdnn-libriparty", savedir="pretrained_models/vad-crdnn-libriparty")
|
| 121 |
|
| 122 |
# 1- Let's compute frame-level posteriors first
|
| 123 |
+
audio_file = 'pretrained_model_checkpoints/example_vad.wav'
|
| 124 |
prob_chunks = VAD.get_speech_prob_file(audio_file)
|
| 125 |
|
| 126 |
# 2- Let's apply a threshold on top of the posteriors
|
hyperparams.yaml
CHANGED
|
@@ -21,7 +21,7 @@ rnn_bidirectional: True
|
|
| 21 |
dnn_blocks: 1
|
| 22 |
dnn_neurons: 16
|
| 23 |
output_neurons: 1
|
| 24 |
-
device: 'cpu' #
|
| 25 |
|
| 26 |
# Feature/Model objects
|
| 27 |
compute_features: !new:speechbrain.lobes.features.Fbank
|
|
|
|
| 21 |
dnn_blocks: 1
|
| 22 |
dnn_neurons: 16
|
| 23 |
output_neurons: 1
|
| 24 |
+
device: 'cpu' # set 'cuda:0' for gpu
|
| 25 |
|
| 26 |
# Feature/Model objects
|
| 27 |
compute_features: !new:speechbrain.lobes.features.Fbank
|