Spaces:
Running
on
T4
Running
on
T4
try to figure out how ZeroGPU works
Browse files
app.py
CHANGED
|
@@ -23,7 +23,7 @@ class ControllableInterface(torch.nn.Module):
|
|
| 23 |
|
| 24 |
def __init__(self, available_artificial_voices=1000):
|
| 25 |
super().__init__()
|
| 26 |
-
self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta")
|
| 27 |
self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cpu")
|
| 28 |
self.generated_speaker_embeds = list()
|
| 29 |
self.available_artificial_voices = available_artificial_voices
|
|
@@ -53,11 +53,8 @@ class ControllableInterface(torch.nn.Module):
|
|
| 53 |
loudness_in_db
|
| 54 |
):
|
| 55 |
if self.current_language != language:
|
| 56 |
-
self.model
|
| 57 |
self.current_language = language
|
| 58 |
-
if self.current_accent != accent:
|
| 59 |
-
self.model.set_accent_language(accent)
|
| 60 |
-
self.current_accent = accent
|
| 61 |
|
| 62 |
self.wgan.set_latent(voice_seed)
|
| 63 |
controllability_vector = torch.tensor([emb_slider_1,
|
|
@@ -71,40 +68,7 @@ class ControllableInterface(torch.nn.Module):
|
|
| 71 |
|
| 72 |
phones = self.model.text2phone.get_phone_string(prompt)
|
| 73 |
if len(phones) > 1800:
|
| 74 |
-
|
| 75 |
-
prompt = "Deine Eingabe war zu lang. Bitte versuche es entweder mit einem kürzeren Text oder teile ihn in mehrere Teile auf."
|
| 76 |
-
elif language == "ell":
|
| 77 |
-
prompt = "Η εισήγησή σας ήταν πολύ μεγάλη. Παρακαλώ δοκιμάστε είτε ένα μικρότερο κείμενο είτε χωρίστε το σε διάφορα μέρη."
|
| 78 |
-
elif language == "spa":
|
| 79 |
-
prompt = "Su entrada es demasiado larga. Por favor, intente un texto más corto o divídalo en varias partes."
|
| 80 |
-
elif language == "fin":
|
| 81 |
-
prompt = "Vastauksesi oli liian pitkä. Kokeile joko lyhyempää tekstiä tai jaa se useampaan osaan."
|
| 82 |
-
elif language == "rus":
|
| 83 |
-
prompt = "Ваш текст слишком длинный. Пожалуйста, попробуйте либо сократить текст, либо разделить его на несколько частей."
|
| 84 |
-
elif language == "hun":
|
| 85 |
-
prompt = "Túl hosszú volt a bevitele. Kérjük, próbáljon meg rövidebb szöveget írni, vagy ossza több részre."
|
| 86 |
-
elif language == "nld":
|
| 87 |
-
prompt = "Uw input was te lang. Probeer een kortere tekst of splits het in verschillende delen."
|
| 88 |
-
elif language == "fra":
|
| 89 |
-
prompt = "Votre saisie était trop longue. Veuillez essayer un texte plus court ou le diviser en plusieurs parties."
|
| 90 |
-
elif language == 'pol':
|
| 91 |
-
prompt = "Twój wpis był zbyt długi. Spróbuj skrócić tekst lub podzielić go na kilka części."
|
| 92 |
-
elif language == 'por':
|
| 93 |
-
prompt = "O seu contributo foi demasiado longo. Por favor, tente um texto mais curto ou divida-o em várias partes."
|
| 94 |
-
elif language == 'ita':
|
| 95 |
-
prompt = "Il tuo input era troppo lungo. Per favore, prova un testo più corto o dividilo in più parti."
|
| 96 |
-
elif language == 'cmn':
|
| 97 |
-
prompt = "你的输入太长了。请尝试使用较短的文本或将其拆分为多个部分。"
|
| 98 |
-
elif language == 'vie':
|
| 99 |
-
prompt = "Đầu vào của bạn quá dài. Vui lòng thử một văn bản ngắn hơn hoặc chia nó thành nhiều phần."
|
| 100 |
-
else:
|
| 101 |
-
prompt = "Your input was too long. Please try either a shorter text or split it into several parts."
|
| 102 |
-
if self.current_language != "eng":
|
| 103 |
-
self.model.set_phonemizer_language("eng")
|
| 104 |
-
self.current_language = "eng"
|
| 105 |
-
if self.current_accent != "eng":
|
| 106 |
-
self.model.set_accent_language("eng")
|
| 107 |
-
self.current_accent = "eng"
|
| 108 |
|
| 109 |
print(prompt)
|
| 110 |
wav, sr, fig = self.model(prompt,
|
|
@@ -118,7 +82,7 @@ class ControllableInterface(torch.nn.Module):
|
|
| 118 |
return sr, wav, fig
|
| 119 |
|
| 120 |
|
| 121 |
-
title = "Controllable Text-to-Speech for over 7000 Languages"
|
| 122 |
article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
|
| 123 |
available_artificial_voices = 1000
|
| 124 |
path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
|
|
|
|
| 23 |
|
| 24 |
def __init__(self, available_artificial_voices=1000):
|
| 25 |
super().__init__()
|
| 26 |
+
self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta", language="eng")
|
| 27 |
self.wgan = GanWrapper(os.path.join(MODELS_DIR, "Embedding", "embedding_gan.pt"), device="cpu")
|
| 28 |
self.generated_speaker_embeds = list()
|
| 29 |
self.available_artificial_voices = available_artificial_voices
|
|
|
|
| 53 |
loudness_in_db
|
| 54 |
):
|
| 55 |
if self.current_language != language:
|
| 56 |
+
self.model = ToucanTTSInterface(device="cpu", tts_model_path="Meta", language=language)
|
| 57 |
self.current_language = language
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
self.wgan.set_latent(voice_seed)
|
| 60 |
controllability_vector = torch.tensor([emb_slider_1,
|
|
|
|
| 68 |
|
| 69 |
phones = self.model.text2phone.get_phone_string(prompt)
|
| 70 |
if len(phones) > 1800:
|
| 71 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
print(prompt)
|
| 74 |
wav, sr, fig = self.model(prompt,
|
|
|
|
| 82 |
return sr, wav, fig
|
| 83 |
|
| 84 |
|
| 85 |
+
title = "🚧UNDER CONSTRUCTION🚧 Controllable Text-to-Speech for over 7000 Languages"
|
| 86 |
article = "Check out the IMS Toucan TTS Toolkit at https://github.com/DigitalPhonetics/IMS-Toucan"
|
| 87 |
available_artificial_voices = 1000
|
| 88 |
path_to_iso_list = "Preprocessing/multilinguality/iso_to_fullname.json"
|