| import torch, base64, tempfile, os | |
| import scipy.io.wavfile as wavfile | |
| from transformers import VitsModel, VitsTokenizer | |
| MODEL_NAME = "facebook/mms-tts-mos" | |
| tokenizer = VitsTokenizer.from_pretrained(MODEL_NAME) | |
| model = VitsModel.from_pretrained(MODEL_NAME) | |
| def mooreTTS(text: str) -> str: | |
| inputs = tokenizer(text, return_tensors="pt") | |
| with torch.no_grad(): | |
| output = model(**inputs) | |
| waveform = output.waveform[0].cpu().numpy() | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tempFile: | |
| wavfile.write(tempFile.name, rate=model.config.sampling_rate, data=waveform) | |
| tempAudioPath = tempFile.name | |
| try: | |
| with open(tempAudioPath, "rb") as file: | |
| audioBase64 = base64.b64encode(file.read()).decode("utf-8") | |
| finally: | |
| os.remove(tempAudioPath) | |
| return audioBase64 | |