Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from split_audio.main import AudioSplitter | |
| import numpy as np | |
| import time | |
| import os | |
| import soundfile as sf | |
| import librosa | |
| splitter = AudioSplitter(language="vi") | |
| def split_audio(str_raw, str_trunc, audio_input): | |
| audio_input = audio_input[1] | |
| y_cut = splitter.split_audio(str_raw, str_trunc, audio_input) | |
| intervals = librosa.effects.split(y_cut, top_db=30) | |
| y_cut = np.concatenate([y_cut[start:end] for start, end in intervals]) | |
| return (24000, y_cut) | |
| with gr.Blocks() as demo: | |
| # with gr.Row(): | |
| # text_input = gr.Textbox(value="Đây là một ví dụ về tổng hợp giọng nói.") | |
| # audio_output = gr.Audio() | |
| # with gr.Row(): | |
| # run_button = gr.Button(value="generate voice") | |
| # rtf_log = gr.Number(label="Real Time Factor") | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_raw = gr.Textbox(value="", label="Text raw", interactive=False, lines=3) | |
| text_cut = gr.Textbox(label="temp", lines=3) | |
| run_button = gr.Button(value="Run") | |
| with gr.Column(): | |
| audio_input = gr.Audio(label="Audio raw", interactive=False, ) | |
| audio_output = gr.Audio(label="Temp", interactive=False) | |
| # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output]) | |
| # run_button.click(fn=generate_voice, inputs=[text_input], outputs=[audio_output, rtf_log, phonemizer],) | |
| run_button.click(fn=split_audio, inputs=[text_raw, text_cut, audio_input], outputs=[audio_output]) | |
| # get all file wavs in "audio_example" folder | |
| audio_files = [f for f in os.listdir("audio_example") if f.endswith(".wav")] | |
| audio_files = audio_files[:30] | |
| # text_files = [f.replace(".wav", ".txt") for f in audio_files] | |
| # audio = [sf.read(os.path.join("audio_example", f)) for f in audio_files] | |
| # text = [open(os.path.join("audio_example", f), "r", encoding="utf-8").read().strip() for f in text_files] | |
| examples_data = [] | |
| for wav_file in audio_files: | |
| # waveform, sr = sf.read(os.path.join("audio_example", wav_file)) | |
| waveform, sr = librosa.load(os.path.join("audio_example", wav_file), sr=24000) | |
| intervals = librosa.effects.split(waveform, top_db=30) | |
| waveform = np.concatenate([waveform[start:end] for start, end in intervals]) | |
| with open(os.path.join("audio_example", wav_file.replace(".wav", ".txt")), "r", encoding="utf-8") as f: | |
| text = f.read().strip() | |
| examples_data.append([text, (sr, waveform)]) | |
| gr.Examples(examples=examples_data, inputs=[text_raw, audio_input]) | |
| if __name__ == "__main__": | |
| demo.launch(server_port=7860, server_name="0.0.0.0") | |