Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| import tempfile | |
| import numpy as np | |
| from nemo.collections.tts.models import FastPitchModel | |
| from nemo.collections.tts.models import HifiGanModel | |
| from nemo.collections.tts.models import MixerTTSModel | |
| from transformers import pipeline | |
| # spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx") | |
| # model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx") | |
| def greet(name): | |
| return "Hello " + name + "!!" | |
| def run(spec_generator, voc_model, pipe): | |
| def generate_tts(text: str, speaker: int = 0): | |
| sr = 44100 | |
| parsed = spec_generator.parse(text) | |
| spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker) | |
| audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram) | |
| return gr.Audio.update(sr, audio.squeeze(0).cpu().numpy()) | |
| demo = gr.Interface( | |
| fn=generate_tts, | |
| inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"), | |
| gr.Slider(0, 10, step=1, label="Speaker")], | |
| outputs=gr.Audio(label="Output", type="numpy"), | |
| allow_flagging=False, | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |
| if __name__ == "__main__": | |
| spec_generator = FastPitchModel.from_pretrained("tts_en_fastpitch_multispeaker") | |
| spec_generator.eval() | |
| voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch") | |
| voc_model.eval() | |
| pipe = pipeline("text-to-speech", model="suno/bark-small") | |
| run(spec_generator, voc_model, pipe) |