Spaces:

presencesw
/

split_temp_audio

Sleeping

App Files Files

presencesw commited on Oct 21, 2025

Commit

e2dc557

1 Parent(s): 3301011

Feat: add app.py file

Browse files

Files changed (2) hide show

Dockerfile +0 -2
app.py +59 -0

Dockerfile CHANGED Viewed

@@ -2,8 +2,6 @@ FROM python:3.12
 WORKDIR /app
 COPY ./requirements.txt /app/requirements.txt
-# copy all . to /app
-COPY . /app/
 # RUN apt-get update && apt-get install -y espeak
 RUN apt-get update && apt-get install -y espeak-ng espeak-ng-data

 WORKDIR /app
 COPY ./requirements.txt /app/requirements.txt
 # RUN apt-get update && apt-get install -y espeak
 RUN apt-get update && apt-get install -y espeak-ng espeak-ng-data

app.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import gradio as gr
+from split_audio.main import AudioSplitter
+import numpy as np
+import time
+import os
+import soundfile as sf
+import librosa
+splitter = AudioSplitter(language="vi")
+def split_audio(str_raw, str_trunc, audio_input):
+    audio_input = audio_input[1]
+    y_cut = splitter.split_audio(str_raw, str_trunc, audio_input)
+    intervals = librosa.effects.split(y_cut, top_db=30)
+    y_cut = np.concatenate([y_cut[start:end] for start, end in intervals])
+    return (24000, y_cut)
+with gr.Blocks() as demo:
+    # with gr.Row():
+    #     text_input = gr.Textbox(value="Đây là một ví dụ về tổng hợp giọng nói.")
+    #     audio_output = gr.Audio()
+    # with gr.Row():
+    #     run_button = gr.Button(value="generate voice")
+    #     rtf_log = gr.Number(label="Real Time Factor")
+    with gr.Row():
+        with gr.Column():
+            text_raw = gr.Textbox(value="", label="Text raw", interactive=False, lines=3)
+            text_cut = gr.Textbox(label="temp", lines=3)
+            run_button = gr.Button(value="Run")
+        with gr.Column():
+            audio_input = gr.Audio(label="Audio raw", interactive=False, )
+            audio_output = gr.Audio(label="Temp", interactive=False)
+    # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output])
+    # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output, rtf_log, phonemizer],)
+    run_button.click(fn=split_audio, inputs=[text_raw, text_cut, audio_input], outputs=[audio_output])
+    # get all file wavs in "audio_example" folder
+    audio_files = [f for f in os.listdir("audio_example") if f.endswith(".wav")]
+    audio_files = audio_files[:30]
+    # text_files = [f.replace(".wav", ".txt") for f in audio_files]
+    # audio = [sf.read(os.path.join("audio_example", f)) for f in audio_files]
+    # text = [open(os.path.join("audio_example", f), "r", encoding="utf-8").read().strip() for f in text_files]
+    examples_data = []
+    for wav_file in audio_files:
+        # waveform, sr = sf.read(os.path.join("audio_example", wav_file))
+        waveform, sr = librosa.load(os.path.join("audio_example", wav_file), sr=24000)
+        intervals = librosa.effects.split(waveform, top_db=30)
+        waveform = np.concatenate([waveform[start:end] for start, end in intervals])
+        with open(os.path.join("audio_example", wav_file.replace(".wav", ".txt")), "r", encoding="utf-8") as f:
+            text = f.read().strip()
+        examples_data.append([text, (sr, waveform)])
+    gr.Examples(examples=examples_data, inputs=[text_raw, audio_input])
+if __name__ == "__main__":
+    demo.launch(server_port=7860, server_name="0.0.0.0")