Spaces:

Flux9665
/

MassivelyMultilingualTTS

Running on T4

App Files Files

Flux9665 commited on Sep 28, 2024

Commit

db5766e

verified ·

1 Parent(s): 43f2732

Update InferenceInterfaces/ControllableInterface.py

Browse files

Files changed (1) hide show

InferenceInterfaces/ControllableInterface.py +13 -5

InferenceInterfaces/ControllableInterface.py CHANGED Viewed

@@ -1,23 +1,31 @@
 import os
 import torch
 from InferenceInterfaces.ToucanTTSInterface import ToucanTTSInterface
 from Modules.ControllabilityGAN.GAN import GanWrapper
-from Utility.storage_config import MODELS_DIR
 class ControllableInterface:
-    def __init__(self, gpu_id="cpu", available_artificial_voices=1000):
         if gpu_id == "cpu":
             os.environ["CUDA_VISIBLE_DEVICES"] = ""
-        else:
             os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
             os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
         self.device = "cuda" if gpu_id != "cpu" else "cpu"
-        self.model = ToucanTTSInterface(device=self.device, tts_model_path="Meta")
-        self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device=self.device)
         self.generated_speaker_embeds = list()
         self.available_artificial_voices = available_artificial_voices
         self.current_language = ""

 import os
 import torch
+from huggingface_hub import hf_hub_download
 from InferenceInterfaces.ToucanTTSInterface import ToucanTTSInterface
 from Modules.ControllabilityGAN.GAN import GanWrapper
 class ControllableInterface:
+    def __init__(self, gpu_id="cpu", available_artificial_voices=50, tts_model_path=None, vocoder_model_path=None, embedding_gan_path=None):
         if gpu_id == "cpu":
             os.environ["CUDA_VISIBLE_DEVICES"] = ""
+        elif gpu_id == "cuda":
+            pass
+        else:  # in this case we hopefully got a number.
             os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
             os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_id}"
+        if tts_model_path is None:
+            tts_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="ToucanTTS.pt")
+        if vocoder_model_path is None:
+            vocoder_model_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="Vocoder.pt")
+        if embedding_gan_path is None:
+            embedding_gan_path = hf_hub_download(repo_id="Flux9665/ToucanTTS", filename="embedding_gan.pt")
         self.device = "cuda" if gpu_id != "cpu" else "cpu"
+        self.model = ToucanTTSInterface(device=self.device, tts_model_path=tts_model_path, vocoder_model_path=vocoder_model_path)
+        self.wgan = GanWrapper(embedding_gan_path, num_cached_voices=available_artificial_voices, device=self.device)
         self.generated_speaker_embeds = list()
         self.available_artificial_voices = available_artificial_voices
         self.current_language = ""