Spaces:
Sleeping
Sleeping
| import torch | |
| from TTS.api import TTS | |
| from loguru import logger | |
| class Speech(): | |
| def __init__(self) -> None: | |
| # List available 🐸TTS models | |
| # print(TTS().list_models()) | |
| # Get device | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Init TTS (and download model if necessary) | |
| self.tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1").to(device) | |
| logger.info(f"Initialized TTS: {self.tts}") | |
| def text_to_speech(self, text:str, language:str): | |
| logger.info(f"Text to speech: {text} to {language}") | |
| # remove ## from text | |
| text = text.replace("##", "") | |
| # language = en, de, es, fr, it, pt, pl. | |
| valid_languages = ["en", "de", "es", "fr", "it", "pt", "pl"] | |
| if language not in valid_languages: | |
| raise ValueError(f"Language '{language}' is not one of the supported languages: {valid_languages}") | |
| return None | |
| else: | |
| # Run TTS | |
| # ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language | |
| # Text to speech list of amplitude values as output | |
| # wav = self.tts.tts(text=text, language=language) | |
| # # Text to speech to a file | |
| self.tts.tts_to_file(text=text, language=language, file_path="output.wav") | |
| logger.info(f"Converted text to speech") | |
| return "output.wav" |