translation_game / app /speech.py
wdeback's picture
Upload folder using huggingface_hub
20f080f
import torch
from TTS.api import TTS
from loguru import logger
class Speech():
def __init__(self) -> None:
# List available 🐸TTS models
# print(TTS().list_models())
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Init TTS (and download model if necessary)
self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device)
logger.info(f"Initialized TTS: {self.tts}")
def text_to_speech(self, text:str, language:str):
logger.info(f"Text to speech: {text} to {language}")
# remove ## from text
text = text.replace("##", "")
# language = en, de, es, fr, it, pt, pl.
valid_languages = ["en", "de", "es", "fr", "it", "pt", "pl"]
if language not in valid_languages:
raise ValueError(f"Language '{language}' is not one of the supported languages: {valid_languages}")
return None
else:
# Run TTS
# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
# Text to speech list of amplitude values as output
# wav = self.tts.tts(text=text, language=language)
# # Text to speech to a file
self.tts.tts_to_file(text=text, language=language, file_path="output.wav")
logger.info(f"Converted text to speech")
return "output.wav"