AI-API / language /moore /mos_tts.py
Anicet
update: add dioula models for STT and TTS
a6299ef
Raw
History Blame Contribute Delete
854 Bytes
import torch, base64, tempfile, os
import scipy.io.wavfile as wavfile
from transformers import VitsModel, VitsTokenizer
MODEL_NAME = "facebook/mms-tts-mos"
tokenizer = VitsTokenizer.from_pretrained(MODEL_NAME)
model = VitsModel.from_pretrained(MODEL_NAME)
def mooreTTS(text: str) -> str:
inputs = tokenizer(text, return_tensors="pt")
with torch.no_grad():
output = model(**inputs)
waveform = output.waveform[0].cpu().numpy()
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile:
wavfile.write(tempFile.name, rate=model.config.sampling_rate, data=waveform)
tempAudioPath = tempFile.name
try:
with open(tempAudioPath, "rb") as file:
audioBase64 = base64.b64encode(file.read()).decode("utf-8")
finally:
os.remove(tempAudioPath)
return audioBase64