Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,8 +5,8 @@ from pydub.silence import detect_nonsilent
|
|
| 5 |
from pydub import AudioSegment
|
| 6 |
|
| 7 |
|
| 8 |
-
def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
|
| 9 |
-
audio = AudioSegment.from_file(audio_fp).set_channels(1)
|
| 10 |
speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
|
| 11 |
speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
|
| 12 |
speech_timestamps = [{"start": s[0]/1000, "end": s[1]/1000} for s in speech_timestamps]
|
|
@@ -44,6 +44,23 @@ def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_m
|
|
| 44 |
else:
|
| 45 |
new_labels.append(list(labels[i]))
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
translate_labels = list(map(lambda x: f"{x[0]}\t{x[1]}\t{x[2]}", new_labels))
|
| 48 |
|
| 49 |
filename_path = f"{fn}_translate_label.txt"
|
|
@@ -56,12 +73,15 @@ def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_m
|
|
| 56 |
interface = gr.Interface(
|
| 57 |
get_labels,
|
| 58 |
[
|
| 59 |
-
gr.
|
| 60 |
-
gr.Number(label="min_speech_duration_ms", value=
|
| 61 |
-
gr.Number(label="min_silence_duration_ms", value=40, info="default (
|
| 62 |
gr.Checkbox(label="Auto merge", value=True),
|
| 63 |
gr.Textbox(label="Gap max threshold value (seconds)", value=0.350),
|
| 64 |
-
gr.Number(label="Approx Max Segment Length", value=7)
|
|
|
|
|
|
|
|
|
|
| 65 |
],
|
| 66 |
[
|
| 67 |
gr.File(label="VAD Labels"),
|
|
|
|
| 5 |
from pydub import AudioSegment
|
| 6 |
|
| 7 |
|
| 8 |
+
def get_labels(audio_fp, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length, end_extension, extend_small_segments, show_duration_label):
|
| 9 |
+
audio = AudioSegment.from_file(audio_fp.name).set_channels(1)
|
| 10 |
speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
|
| 11 |
speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
|
| 12 |
speech_timestamps = [{"start": s[0]/1000, "end": s[1]/1000} for s in speech_timestamps]
|
|
|
|
| 44 |
else:
|
| 45 |
new_labels.append(list(labels[i]))
|
| 46 |
|
| 47 |
+
extended = [False] * (len(new_labels) - 1)
|
| 48 |
+
if extend_small_segments:
|
| 49 |
+
for i, nl in enumerate(new_labels[:-1]):
|
| 50 |
+
if nl[1] - nl[0] <= 1.02 and nl[0] + 1.02 < new_labels[i+1][0]:
|
| 51 |
+
nl[1] = nl[0] + 1.02
|
| 52 |
+
extended[i] = True
|
| 53 |
+
|
| 54 |
+
if end_extension:
|
| 55 |
+
for i, nl in enumerate(new_labels[:-1]):
|
| 56 |
+
if not extended[i]:
|
| 57 |
+
if nl[1] + end_extension < new_labels[i+1][0]:
|
| 58 |
+
nl[1] = nl[1] + end_extension
|
| 59 |
+
|
| 60 |
+
if show_duration_label:
|
| 61 |
+
for nl in new_labels:
|
| 62 |
+
nl[2] = round(nl[1] - nl[0], 3)
|
| 63 |
+
|
| 64 |
translate_labels = list(map(lambda x: f"{x[0]}\t{x[1]}\t{x[2]}", new_labels))
|
| 65 |
|
| 66 |
filename_path = f"{fn}_translate_label.txt"
|
|
|
|
| 73 |
interface = gr.Interface(
|
| 74 |
get_labels,
|
| 75 |
[
|
| 76 |
+
gr.File(type="filepath", label="Audio file", file_types=["audio"], file_count="single"),
|
| 77 |
+
gr.Number(label="min_speech_duration_ms", value=40, info="default (40)"),
|
| 78 |
+
gr.Number(label="min_silence_duration_ms", value=40, info="default (40)"),
|
| 79 |
gr.Checkbox(label="Auto merge", value=True),
|
| 80 |
gr.Textbox(label="Gap max threshold value (seconds)", value=0.350),
|
| 81 |
+
gr.Number(label="Approx Max Segment Length", value=7),
|
| 82 |
+
gr.Number(label="Extend end by (seconds)", value=0),
|
| 83 |
+
gr.Checkbox(label="Extend small segments (minimum 1.02 seconds)", value=False),
|
| 84 |
+
gr.Checkbox(label="Show only duration in labels", value=False)
|
| 85 |
],
|
| 86 |
[
|
| 87 |
gr.File(label="VAD Labels"),
|