Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
| import torch | |
| #import soundfile as sf | |
| from datasets import load_dataset | |
| processor = SpeechT5Processor.from_pretrained('microsoft/speecht5_tts') | |
| model = SpeechT5ForTextToSpeech.from_pretrained('microsoft/speecht5_tts') | |
| vocoder = SpeechT5HifiGan.from_pretrained('microsoft/speecht5_hifigan') | |
| def text_to_speech(text): | |
| if text is None or text.strip() == '': | |
| yield None | |
| return | |
| inputs = processor(text=text, return_tensors='pt') | |
| embeddings_dataset = load_dataset('Matthijs/cmu-arctic-xvectors', split='validation') | |
| speaker_embeddings = torch.tensor(embeddings_dataset[7306]['xvector']).unsqueeze(0) | |
| speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) | |
| #sf.write("speech.wav", speech.numpy(), samplerate=16000) | |
| audio = (16000, speech.numpy()) | |
| yield audio | |
| app = gr.Interface( | |
| title='Text To Speech', | |
| fn=text_to_speech, | |
| inputs=gr.Textbox(label='text(english)'), | |
| outputs=gr.Audio(), | |
| flagging_mode='never', | |
| concurrency_limit=20 | |
| ) | |
| app.launch() | |