Spaces:
Sleeping
Sleeping
| import logging | |
| import os | |
| import uuid | |
| import time | |
| import gradio as gr | |
| import soundfile as sf | |
| from model import get_pretrained_model, language_to_models | |
| # Function to update model dropdown based on language selection | |
| #def update_model_dropdown(language): | |
| # if language in language_to_models: | |
| # choices = language_to_models[language] | |
| # return gr.Dropdown.update(choices=choices, value=choices[0]) | |
| # else: | |
| # raise ValueError(f"Unsupported language: {language}") | |
| def update_model_dropdown(language: str): | |
| if language in language_to_models: | |
| choices = language_to_models[language] | |
| return gr.Dropdown( | |
| choices=choices, | |
| value=choices[0], | |
| interactive=True, | |
| ) | |
| raise ValueError(f"Unsupported language: {language}") | |
| # Function to process text to speech conversion | |
| def process(language, repo_id, text, sid, speed): | |
| logging.info(f"Input text: {text}, SID: {sid}, Speed: {speed}") | |
| sid = int(sid) | |
| tts = get_pretrained_model(repo_id, speed) | |
| start = time.time() | |
| audio = tts.generate(text, sid=sid) | |
| duration = len(audio.samples) / audio.sample_rate | |
| elapsed_seconds = time.time() - start | |
| rtf = elapsed_seconds / duration | |
| info = f""" | |
| Wave duration: {duration:.3f} s<br/> | |
| Processing time: {elapsed_seconds:.3f} s<br/> | |
| RTF: {rtf:.3f}<br/> | |
| """ | |
| logging.info(info) | |
| filename = f"{uuid.uuid4()}.wav" | |
| sf.write(filename, audio.samples, samplerate=audio.sample_rate, subtype="PCM_16") | |
| return filename | |
| # Interface layout | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.Markdown("# Text to Voice") | |
| gr.Markdown("High Fidelity TTS. Visit <a href='https://ruslanmv.com/' target='_blank'>ruslanmv.com</a> for more information.") | |
| language_choices = list(language_to_models.keys()) | |
| language_radio = gr.Radio(label="Language", choices=language_choices, value=language_choices[0]) | |
| model_dropdown = gr.Dropdown(label="Select a model", choices=language_to_models[language_choices[0]]) | |
| language_radio.change(update_model_dropdown, inputs=language_radio, outputs=model_dropdown) | |
| input_text = gr.Textbox(lines=10, label="Enter text to convert to speech") | |
| input_sid = gr.Textbox(label="Speaker ID", value="0", placeholder="Valid only for multi-speaker model") | |
| input_speed = gr.Slider(minimum=0.1, maximum=10, value=1, step=0.1, label="Speed (larger->faster; smaller->slower)") | |
| output_audio = gr.Audio(label="Generated audio") | |
| #output_info = gr.HTML(label="Info") | |
| input_button = gr.Button("Submit") | |
| input_button.click(process, inputs=[language_radio, model_dropdown, input_text, input_sid, input_speed], outputs=[output_audio]) | |
| # Download necessary data | |
| def download_espeak_ng_data(): | |
| os.system( | |
| """ | |
| cd /tmp | |
| wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 | |
| tar xf espeak-ng-data.tar.bz2 | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| download_espeak_ng_data() | |
| logging.basicConfig(level=logging.INFO) | |
| demo.launch() | |