Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| import spaces | |
| import torch | |
| from synthesize import synthesize | |
| # initialize model | |
| audio, sample_rate = synthesize( | |
| text="Hello", | |
| duration_model_config="./train_duration_dit_s.yaml", | |
| acoustic_model_config="./train_acoustic_dit_b.yaml", | |
| duration_model_checkpoint="./duration_model_0120000.pt", | |
| acoustic_model_checkpoint="./acoustic_model_0140000.pt", | |
| speaker_id=0, | |
| cfg_scale=4.0, | |
| num_sampling_steps=100, | |
| ) | |
| def text_to_speech(text, speaker_id, cfg_scale, num_sampling_steps): | |
| audio, sample_rate = synthesize( | |
| text=text, | |
| duration_model_config="./train_duration_dit_s.yaml", | |
| acoustic_model_config="./train_acoustic_dit_b.yaml", | |
| duration_model_checkpoint="./duration_model_0120000.pt", | |
| acoustic_model_checkpoint="./acoustic_model_0140000.pt", | |
| speaker_id=speaker_id, | |
| cfg_scale=cfg_scale, | |
| num_sampling_steps=num_sampling_steps, | |
| ) | |
| return (sample_rate, audio) | |
| speaker_ids = [str(i) for i in range(100)] | |
| sampling_steps = [100, 250, 500, 1000] | |
| demo = gr.Interface( | |
| fn=text_to_speech, | |
| inputs=[ | |
| gr.Textbox(label="Text", value="Text to Speech with Diffusion Transformer"), | |
| gr.Dropdown(choices=speaker_ids, label="Speaker ID", value="0"), | |
| gr.Slider(minimum=0, maximum=10, value=4.0, label="CFG Scale"), | |
| gr.Dropdown(choices=sampling_steps, label="Sampling Steps", value=100), | |
| ], | |
| outputs=gr.Audio(label="Generated Speech"), | |
| title="Text to Speech with Diffusion Transformer", | |
| description="Enter text, select a speaker ID (0-99), and adjust the CFG scale to generate speech.", | |
| flagging_options=None, | |
| ) | |
| demo.launch() | |