| |
| |
| import gradio as gr |
| import os, torch, io |
| os.system('python -m unidic download') |
| |
| from melo.api import TTS |
| speed = 1.0 |
| import tempfile |
| device = 'cuda' |
| models = { |
| 'EN': TTS(language='EN', device=device), |
| 'ES': TTS(language='ES', device=device), |
| 'FR': TTS(language='FR', device=device), |
| 'ZH': TTS(language='ZH', device=device), |
| 'JP': TTS(language='JP', device=device), |
| 'KR': TTS(language='KR', device=device), |
| } |
| speaker_ids = models['EN'].hps.data.spk2id |
|
|
| default_text_dict = { |
| 'EN': 'The field of text-to-speech has seen rapid development recently.', |
| 'ES': 'El campo de la conversiรณn de texto a voz ha experimentado un rรกpido desarrollo recientemente.', |
| 'FR': 'Le domaine de la synthรจse vocale a connu un dรฉveloppement rapide rรฉcemment', |
| 'ZH': 'text-to-speech ้ขๅ่ฟๅนดๆฅๅๅฑ่ฟ
้', |
| 'JP': 'ใใญในใ่ชญใฟไธใใฎๅ้ใฏๆ่ฟๆฅ้ใช็บๅฑใ้ใใฆใใพใ', |
| 'KR': '์ต๊ทผ ํ
์คํธ ์์ฑ ๋ณํ ๋ถ์ผ๊ฐ ๊ธ์๋๋ก ๋ฐ์ ํ๊ณ ์์ต๋๋ค.', |
| } |
| |
| def synthesize(speaker, text, speed, language, progress=gr.Progress()): |
| bio = io.BytesIO() |
| models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav') |
| return bio.getvalue() |
| def load_speakers(language, text): |
| if text in list(default_text_dict.values()): |
| newtext = default_text_dict[language] |
| else: |
| newtext = text |
| return gr.update(value=list(models[language].hps.data.spk2id.keys())[0], choices=list(models[language].hps.data.spk2id.keys())), newtext |
| with gr.Blocks() as demo: |
| gr.Markdown('# MeloTTS Demo\n\nAn unofficial for [MeloTTS](https://github.com/myshell-ai/MeloTTS).') |
| with gr.Group(): |
| speaker = gr.Dropdown(speaker_ids.keys(), interactive=True, value='EN-US', label='Speaker') |
| language = gr.Radio(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], label='Language', value='EN') |
| speed = gr.Slider(label='Speed', minimum=0.1, maximum=10.0, value=1.0, interactive=True, step=0.1) |
| text = gr.Textbox(label="Text to speak", value=default_text_dict['EN']) |
| language.input(load_speakers, inputs=[language, text], outputs=[speaker, text]) |
| btn = gr.Button('Synthesize', variant='primary') |
| aud = gr.Audio(interactive=False) |
| btn.click(synthesize, inputs=[speaker, text, speed, language], outputs=[aud]) |
| gr.Markdown('Demo by [mrfakename](https://twitter.com/realmrfakename).') |
|
|
|
|
| demo.queue(api_open=False, default_concurrency_limit=10).launch(show_api=False) |
|
|
|
|