Spaces:
Runtime error
Runtime error
| import tempfile | |
| from typing import Optional | |
| from TTS.config import load_config | |
| import gradio as gr | |
| import numpy as np | |
| from TTS.utils.manage import ModelManager | |
| from TTS.utils.synthesizer import Synthesizer | |
| MODELS = {} | |
| SPEAKERS = {} | |
| MAX_TXT_LEN = 10000 | |
| manager = ModelManager() | |
| MODEL_NAMES = manager.list_tts_models() | |
| # filter out multi-speaker models and slow wavegrad vocoders | |
| filters = ["vctk", "your_tts", "ek1"] | |
| MODEL_NAMES = [model_name for model_name in MODEL_NAMES if not any(f in model_name for f in filters)] | |
| # reorder models | |
| MODEL_NAMES[0], MODEL_NAMES[1], MODEL_NAMES[2]= MODEL_NAMES[1], MODEL_NAMES[2], MODEL_NAMES[0] | |
| print(MODEL_NAMES) | |
| def tts(text: str, model_name: str, speaker_idx: str=None): | |
| if len(text) > MAX_TXT_LEN: | |
| text = text[:MAX_TXT_LEN] | |
| print(f"Input text was cutoff since it went over the {MAX_TXT_LEN} character limit.") | |
| print(text, model_name) | |
| # download model | |
| model_path, config_path, model_item = manager.download_model(f"tts_models/{model_name}") | |
| vocoder_name: Optional[str] = model_item["default_vocoder"] | |
| # download vocoder | |
| vocoder_path = None | |
| vocoder_config_path = None | |
| if vocoder_name is not None: | |
| vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) | |
| # init synthesizer | |
| synthesizer = Synthesizer( | |
| model_path, config_path, None, None, vocoder_path, vocoder_config_path, | |
| ) | |
| # synthesize | |
| if synthesizer is None: | |
| raise NameError("model not found") | |
| wavs = synthesizer.tts(text, speaker_idx) | |
| # return output | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
| synthesizer.save_wav(wavs, fp) | |
| return fp.name | |
| article= """ | |
| Visit us on Coqui.ai and drop a π to π<a href="https://github.com/coqui-ai/TTS" target="_blank">CoquiTTS</a>. | |
| <br/> | |
| Run CoquiTTS locally for the best result. Check out our π<a href="https://tts.readthedocs.io/en/latest/inference.html">documentation</a>. | |
| ```bash | |
| $ pip install TTS | |
| ... | |
| $ tts --list_models | |
| ... | |
| $ tts --text "Text for TTS" --model_name "<type>/<language>/<dataset>/<model_name>" --out_path folder/to/save/output.wav | |
| ``` | |
| <img src="https://static.scarf.sh/a.png?x-pxid=1404a024-e647-4406-bb9a-4ade0c931182" /> | |
| <br/> | |
| π <b> Model contributors</b> | |
| - <a href="https://github.com/nmstoker/" target="_blank">@nmstoker</a> | |
| - <a href="https://github.com/kaiidams/" target="_blank">@kaiidams</a> | |
| - <a href="https://github.com/WeberJulian/" target="_blank">@WeberJulian,</a> | |
| - <a href="https://github.com/Edresson/" target="_blank">@Edresson</a> | |
| - <a href="https://github.com/thorstenMueller/" target="_blank">@thorstenMueller</a> | |
| - <a href="https://github.com/r-dh/" target="_blank">@r-dh</a> | |
| - <a href="https://github.com/kirianguiller/" target="_blank">@kirianguiller</a> | |
| - <a href="https://github.com/robinhad/" target="_blank">@robinhad</a> | |
| - <a href="https://github.com/fkarabiber/" target="_blank">@fkarabiber</a> | |
| - <a href="https://github.com/nicolalandro/" target="_blank">@nicolalandro</a> | |
| - <a href="https://github.com/a-froghyar" target="_blank">@a-froghyar</a> | |
| π Drop a β¨PRβ¨ on πΈTTS to share a new model and have it included here. | |
| """ | |
| iface = gr.Interface( | |
| fn=tts, | |
| inputs=[ | |
| gr.inputs.Textbox( | |
| label="Input Text", | |
| default="This sentence has been generated by a speech synthesis system.", | |
| ), | |
| gr.inputs.Radio( | |
| label="Pick a TTS Model - (language/dataset/model_name)", | |
| choices=MODEL_NAMES, | |
| ), | |
| # gr.inputs.Dropdown(label="Select a speaker", choices=SPEAKERS, default=None) | |
| # gr.inputs.Audio(source="microphone", label="Record your voice.", type="numpy", label=None, optional=False) | |
| ], | |
| outputs=gr.outputs.Audio(label="Output"), | |
| title="πΈπ¬ CoquiTTS Demo", | |
| theme="grass", | |
| description="πΈπ¬ Coqui TTS - a deep learning toolkit for Text-to-Speech, battle-tested in research and production.", | |
| article=article, | |
| allow_flagging=False, | |
| flagging_options=['error', 'bad-quality', 'wrong-pronounciation'], | |
| layout="vertical", | |
| live=False | |
| ) | |
| iface.launch(share=False) | |