| from TTS.tts.models import setup_model as setup_tts_model |
| from TTS.config import load_config |
| import soundfile as sf |
| import torch |
| import os |
|
|
| class TTS_object(object): |
| def __init__(self, model_path, device): |
| config = load_config(os.path.join(model_path, "config.json")) |
| self.model = setup_tts_model(config) |
| self.model.load_checkpoint(config, checkpoint_dir=model_path, |
| eval=True, use_deepspeed=False, strict=False) |
| self.model.to("cuda") |
|
|
| def inference(self, text, reference_file_path): |
| gpt_cond_latent, speaker_embedding = self.model.get_conditioning_latents(audio_path=[reference_file_path]) |
| return self.model.inference( |
| text, "ar", gpt_cond_latent, |
| speaker_embedding, temperature=0.01 |
| )['wav'] |
| |
| def save_inference(self, text, speaker, filepath): |
| pass |
|
|