Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,8 @@ import gradio as gr
|
|
| 2 |
import os
|
| 3 |
import torchaudio
|
| 4 |
from uuid import uuid4
|
|
|
|
|
|
|
| 5 |
|
| 6 |
import torch
|
| 7 |
torch.set_num_threads(1)
|
|
@@ -19,15 +21,9 @@ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
|
| 19 |
|
| 20 |
|
| 21 |
def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
speech_timestamps =
|
| 25 |
-
model,
|
| 26 |
-
sampling_rate=16000,
|
| 27 |
-
threshold=threshold,
|
| 28 |
-
min_speech_duration_ms=min_speech_duration_ms,
|
| 29 |
-
min_silence_duration_ms=min_silence_duration_ms,
|
| 30 |
-
return_seconds=True)
|
| 31 |
labels_str = []
|
| 32 |
labels = []
|
| 33 |
|
|
@@ -79,8 +75,8 @@ interface = gr.Interface(
|
|
| 79 |
gr.Number(label="min_speech_duration_ms", value=250, info="default (250)"),
|
| 80 |
gr.Number(label="min_silence_duration_ms", value=40, info="default (100)"),
|
| 81 |
gr.Checkbox(label="Auto merge", value=True),
|
| 82 |
-
gr.Textbox(label="Gap max threshold value (seconds)", value=0.
|
| 83 |
-
gr.Number(label="Approx Max Segment Length", value=
|
| 84 |
],
|
| 85 |
[
|
| 86 |
gr.File(label="VAD Labels"),
|
|
|
|
| 2 |
import os
|
| 3 |
import torchaudio
|
| 4 |
from uuid import uuid4
|
| 5 |
+
from pydub.silence import detect_nonsilent
|
| 6 |
+
from pydub import AudioSegment
|
| 7 |
|
| 8 |
import torch
|
| 9 |
torch.set_num_threads(1)
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
|
| 24 |
+
audio = AudioSegment.from_file(audio_fp)
|
| 25 |
+
speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
|
| 26 |
+
speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
labels_str = []
|
| 28 |
labels = []
|
| 29 |
|
|
|
|
| 75 |
gr.Number(label="min_speech_duration_ms", value=250, info="default (250)"),
|
| 76 |
gr.Number(label="min_silence_duration_ms", value=40, info="default (100)"),
|
| 77 |
gr.Checkbox(label="Auto merge", value=True),
|
| 78 |
+
gr.Textbox(label="Gap max threshold value (seconds)", value=0.3),
|
| 79 |
+
gr.Number(label="Approx Max Segment Length", value=5)
|
| 80 |
],
|
| 81 |
[
|
| 82 |
gr.File(label="VAD Labels"),
|