| import torch | |
| import torchaudio | |
| from transformers import AutoProcessor, AutoModelForTextToWaveform | |
| import gradio as gr | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_id = "facebook/mms-tts-sah" | |
| processor = AutoProcessor.from_pretrained(model_id) | |
| model = AutoModelForTextToWaveform.from_pretrained(model_id).to(device) | |
| def yakut_tts(text): | |
| inputs = processor(text=text, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| waveform = outputs.waveform.squeeze().cpu() | |
| return (model.config.sampling_rate, waveform.numpy()) | |
| gr.Interface( | |
| fn=yakut_tts, | |
| inputs=gr.Textbox(label="Yakut Text", placeholder="Саха тыла"), | |
| outputs=gr.Audio(label="Generated Audio", type="numpy"), | |
| title="Yakut Text-to-Speech", | |
| description="Enter Yakut (Sakha) text and generate speech using facebook/mms-tts-sah model." | |
| ).launch() |