Ttsyakut / app.py
ollui's picture
Update app.py
42712e0 verified
import torch
import torchaudio
from transformers import AutoProcessor, AutoModelForTextToWaveform
import gradio as gr
device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "facebook/mms-tts-sah"
processor = AutoProcessor.from_pretrained(model_id)
model = AutoModelForTextToWaveform.from_pretrained(model_id).to(device)
def yakut_tts(text):
inputs = processor(text=text, return_tensors="pt").to(device)
with torch.no_grad():
outputs = model(**inputs)
waveform = outputs.waveform.squeeze().cpu()
return (model.config.sampling_rate, waveform.numpy())
gr.Interface(
fn=yakut_tts,
inputs=gr.Textbox(label="Yakut Text", placeholder="Саха тыла"),
outputs=gr.Audio(label="Generated Audio", type="numpy"),
title="Yakut Text-to-Speech",
description="Enter Yakut (Sakha) text and generate speech using facebook/mms-tts-sah model."
).launch()