File size: 2,674 Bytes
e2dc557
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from split_audio.main import AudioSplitter
import numpy as np
import time
import os
import soundfile as sf
import librosa

splitter = AudioSplitter(language="vi")

def split_audio(str_raw, str_trunc, audio_input):
    audio_input = audio_input[1]
    y_cut = splitter.split_audio(str_raw, str_trunc, audio_input)
    intervals = librosa.effects.split(y_cut, top_db=30)
    y_cut = np.concatenate([y_cut[start:end] for start, end in intervals])
    return (24000, y_cut)

with gr.Blocks() as demo:
    # with gr.Row():
    #     text_input = gr.Textbox(value="Đây là một ví dụ về tổng hợp giọng nói.")
    #     audio_output = gr.Audio()
    # with gr.Row():
    #     run_button = gr.Button(value="generate voice")
    #     rtf_log = gr.Number(label="Real Time Factor")
    with gr.Row():
        with gr.Column():
            text_raw = gr.Textbox(value="", label="Text raw", interactive=False, lines=3)
            text_cut = gr.Textbox(label="temp", lines=3)
            run_button = gr.Button(value="Run")
        with gr.Column():
            audio_input = gr.Audio(label="Audio raw", interactive=False, )
            audio_output = gr.Audio(label="Temp", interactive=False)

    # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output])
    # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output, rtf_log, phonemizer],)
    run_button.click(fn=split_audio, inputs=[text_raw, text_cut, audio_input], outputs=[audio_output])
    # get all file wavs in "audio_example" folder
    
    audio_files = [f for f in os.listdir("audio_example") if f.endswith(".wav")]
    audio_files = audio_files[:30]
    # text_files = [f.replace(".wav", ".txt") for f in audio_files]
    # audio = [sf.read(os.path.join("audio_example", f)) for f in audio_files]
    # text = [open(os.path.join("audio_example", f), "r", encoding="utf-8").read().strip() for f in text_files]

    examples_data = []
    for wav_file in audio_files:
        # waveform, sr = sf.read(os.path.join("audio_example", wav_file))
        waveform, sr = librosa.load(os.path.join("audio_example", wav_file), sr=24000)
        intervals = librosa.effects.split(waveform, top_db=30)

        waveform = np.concatenate([waveform[start:end] for start, end in intervals])
        with open(os.path.join("audio_example", wav_file.replace(".wav", ".txt")), "r", encoding="utf-8") as f:
            text = f.read().strip()
        examples_data.append([text, (sr, waveform)])

    gr.Examples(examples=examples_data, inputs=[text_raw, audio_input])

if __name__ == "__main__":
    demo.launch(server_port=7860, server_name="0.0.0.0")