Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
|
|
| 18 |
collect_chunks) = utils
|
| 19 |
|
| 20 |
|
| 21 |
-
def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms):
|
| 22 |
wav, sr = torchaudio.load(audio_fp)
|
| 23 |
wav = torchaudio.functional.resample(wav, orig_freq=sr, new_freq=16000)
|
| 24 |
speech_timestamps = get_speech_timestamps(wav,
|
|
@@ -28,19 +28,62 @@ def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration
|
|
| 28 |
min_speech_duration_ms=min_speech_duration_ms,
|
| 29 |
min_silence_duration_ms=min_silence_duration_ms,
|
| 30 |
return_seconds=True)
|
|
|
|
| 31 |
labels = []
|
|
|
|
| 32 |
for i, st in enumerate(speech_timestamps):
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
fn = str(uuid4()) + ".txt"
|
| 35 |
with open(fn, "w") as f:
|
| 36 |
-
f.write("\n".join(
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
interface = gr.Interface(
|
| 41 |
get_labels,
|
| 42 |
-
[
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
)
|
| 45 |
|
| 46 |
if __name__ == "__main__":
|
|
|
|
| 18 |
collect_chunks) = utils
|
| 19 |
|
| 20 |
|
| 21 |
+
def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_seg_length):
|
| 22 |
wav, sr = torchaudio.load(audio_fp)
|
| 23 |
wav = torchaudio.functional.resample(wav, orig_freq=sr, new_freq=16000)
|
| 24 |
speech_timestamps = get_speech_timestamps(wav,
|
|
|
|
| 28 |
min_speech_duration_ms=min_speech_duration_ms,
|
| 29 |
min_silence_duration_ms=min_silence_duration_ms,
|
| 30 |
return_seconds=True)
|
| 31 |
+
labels_str = []
|
| 32 |
labels = []
|
| 33 |
+
|
| 34 |
for i, st in enumerate(speech_timestamps):
|
| 35 |
+
labels_str.append(f"{st['start']}\t{st['end']}\tSound {i+1}")
|
| 36 |
+
labels.append((float(st['start']), float(st['end']), f"Sound {i+1}"))
|
| 37 |
+
|
| 38 |
fn = str(uuid4()) + ".txt"
|
| 39 |
with open(fn, "w") as f:
|
| 40 |
+
f.write("\n".join(labels_str))
|
| 41 |
+
|
| 42 |
+
if not auto_merge:
|
| 43 |
+
return fn, None
|
| 44 |
+
|
| 45 |
+
gaps = [labels[i][0] - labels[i - 1][1] for i in range(1, len(labels))]
|
| 46 |
+
|
| 47 |
+
duration = lambda x: float(x[1]) - float(x[0])
|
| 48 |
+
|
| 49 |
+
new_labels = [list(labels[0])]
|
| 50 |
+
for i in range(1, len(labels)):
|
| 51 |
+
if (
|
| 52 |
+
gaps[i - 1] <= threshold
|
| 53 |
+
and duration(new_labels[-1]) + gaps[i - 1] + duration(labels[i])
|
| 54 |
+
< max_segment_length
|
| 55 |
+
):
|
| 56 |
+
new_labels[-1][1] = labels[i][1]
|
| 57 |
+
new_labels[-1][
|
| 58 |
+
2
|
| 59 |
+
] = f'{new_labels[-1][2]} |{round(gaps[i-1], 2)}s| {labels[i][2]}'
|
| 60 |
+
else:
|
| 61 |
+
new_labels.append(list(labels[i]))
|
| 62 |
+
|
| 63 |
+
translate_labels = list(map(lambda x: f"{x[0]}\t{x[1]}\t{x[2]}", new_labels))
|
| 64 |
+
|
| 65 |
+
filename_path = f"{fn}_translate_label.txt"
|
| 66 |
+
with open(filename_path, "w") as f:
|
| 67 |
+
f.write("\n".join(translate_labels))
|
| 68 |
+
|
| 69 |
+
return fn, filename_path
|
| 70 |
|
| 71 |
|
| 72 |
interface = gr.Interface(
|
| 73 |
get_labels,
|
| 74 |
+
[
|
| 75 |
+
gr.Audio(type="filepath", label="Audio file"),
|
| 76 |
+
gr.Slider(0, 1, value=0.5, label="Threshold", step=0.01, info="default (0.5)"),
|
| 77 |
+
gr.Number(label="min_speech_duration_ms", value=250, info="default (250)"),
|
| 78 |
+
gr.Number(label="min_silence_duration_ms", value=100, info="default (100)"),
|
| 79 |
+
gr.Checkbox(label="Auto merge", value=True),
|
| 80 |
+
gr.Number(label="Gap max threshold value (seconds)", value=1),
|
| 81 |
+
gr.Number(label="Approx Max Segment Length", value=12)
|
| 82 |
+
],
|
| 83 |
+
[
|
| 84 |
+
gr.File(label="VAD Labels"),
|
| 85 |
+
gr.File(label="Merged Labels File")
|
| 86 |
+
]
|
| 87 |
)
|
| 88 |
|
| 89 |
if __name__ == "__main__":
|