| | import gradio as gr |
| | import torch |
| | import numpy as np |
| | from transformers import VitsModel, AutoTokenizer |
| |
|
| | |
| | model_id = "facebook/mms-tts-yua" |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
| | print(f"Cargando modelo en: {device}") |
| | model = VitsModel.from_pretrained(model_id).to(device) |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| |
|
| | def generate_tts(text): |
| | if not text.strip(): |
| | return None |
| | |
| | inputs = tokenizer(text, return_tensors="pt").to(device) |
| | |
| | with torch.no_grad(): |
| | output = model(**inputs) |
| | |
| | waveform = output.waveform[0].cpu().numpy() |
| | sample_rate = model.config.sampling_rate |
| | |
| | return (sample_rate, waveform) |
| |
|
| | |
| | demo = gr.Interface( |
| | fn=generate_tts, |
| | inputs=gr.Textbox(label="Escribe en Maya Yucateco", placeholder="Bix yanilech..."), |
| | outputs=gr.Audio(label="Audio Generado"), |
| | title="Maya Yucateco TTS (Facebook MMS)", |
| | description="Servidor de síntesis de voz para el idioma Maya Yucateco." |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch() |