| import gradio as gr | |
| import torch | |
| from transformers import AutoProcessor, VitsModel | |
| model_id = "facebook/mms-tts-ava" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = VitsModel.from_pretrained(model_id) | |
| def tts_fn(text): | |
| inputs = processor(text=text, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(**inputs) | |
| waveform = output.waveform.squeeze().numpy() | |
| return (16000, waveform) | |
| gr.Interface( | |
| fn=tts_fn, | |
| inputs=gr.Textbox(label="Nhập văn bản Avar"), | |
| outputs=gr.Audio(label="Kết quả TTS", type="numpy"), | |
| title="TTS Avar - facebook/mms-tts-ava" | |
| ).launch() |