Spaces:
Running
Running
Commit
·
310916c
1
Parent(s):
42db371
add metrics text
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import random
|
|
| 5 |
from zipfile import ZipFile
|
| 6 |
import uuid
|
| 7 |
|
|
|
|
| 8 |
import torch
|
| 9 |
import torchaudio
|
| 10 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
|
@@ -59,6 +60,7 @@ DEVICE_ASSERT_DETECTED=0
|
|
| 59 |
DEVICE_ASSERT_PROMPT=None
|
| 60 |
DEVICE_ASSERT_LANG=None
|
| 61 |
|
|
|
|
| 62 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree,):
|
| 63 |
if agree == True:
|
| 64 |
supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
|
|
@@ -165,9 +167,18 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 165 |
global DEVICE_ASSERT_LANG
|
| 166 |
#It will likely never come here as we restart space on first unrecoverable error now
|
| 167 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
|
|
|
|
|
|
|
|
|
| 168 |
|
|
|
|
| 169 |
gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
|
|
|
|
|
|
|
|
|
|
| 170 |
wav_chunks = []
|
|
|
|
|
|
|
| 171 |
|
| 172 |
chunks = model.inference_stream(
|
| 173 |
prompt,
|
|
@@ -175,14 +186,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 175 |
gpt_cond_latent,
|
| 176 |
speaker_embedding,)
|
| 177 |
try:
|
|
|
|
| 178 |
for i, chunk in enumerate(chunks):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
wav_chunks.append(chunk)
|
| 180 |
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
|
|
|
| 181 |
out_file = f'{i}.wav'
|
| 182 |
write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
|
| 183 |
audio = AudioSegment.from_file(out_file)
|
| 184 |
audio.export(out_file, format='wav')
|
| 185 |
-
|
|
|
|
|
|
|
| 186 |
except RuntimeError as e :
|
| 187 |
if "device-side assert" in str(e):
|
| 188 |
# cannot do anything on cuda device side error, need tor estart
|
|
@@ -212,6 +233,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 212 |
audio="output.wav",
|
| 213 |
),
|
| 214 |
"sil.wav",
|
|
|
|
| 215 |
speaker_wav,
|
| 216 |
)
|
| 217 |
else:
|
|
@@ -220,6 +242,7 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 220 |
None,
|
| 221 |
None,
|
| 222 |
None,
|
|
|
|
| 223 |
)
|
| 224 |
|
| 225 |
|
|
@@ -439,10 +462,11 @@ gr.Interface(
|
|
| 439 |
outputs=[
|
| 440 |
gr.Video(label="Waveform Visual"),
|
| 441 |
gr.Audio(label="Synthesised Audio", streaming=True, autoplay=True),
|
|
|
|
| 442 |
gr.Audio(label="Reference Audio Used"),
|
| 443 |
],
|
| 444 |
title=title,
|
| 445 |
description=description,
|
| 446 |
article=article,
|
| 447 |
-
|
| 448 |
-
).queue().launch(debug=True,show_api=
|
|
|
|
| 5 |
from zipfile import ZipFile
|
| 6 |
import uuid
|
| 7 |
|
| 8 |
+
import time
|
| 9 |
import torch
|
| 10 |
import torchaudio
|
| 11 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
|
|
|
| 60 |
DEVICE_ASSERT_PROMPT=None
|
| 61 |
DEVICE_ASSERT_LANG=None
|
| 62 |
|
| 63 |
+
|
| 64 |
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_cleanup, no_lang_auto_detect, agree,):
|
| 65 |
if agree == True:
|
| 66 |
supported_languages=["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn"]
|
|
|
|
| 167 |
global DEVICE_ASSERT_LANG
|
| 168 |
#It will likely never come here as we restart space on first unrecoverable error now
|
| 169 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
metrics_text= ""
|
| 173 |
|
| 174 |
+
t_latent=time.time()
|
| 175 |
gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
|
| 176 |
+
latent_calculation_time = time.time() - t_latent
|
| 177 |
+
metrics_text=f"Embedding calculation time: {latent_calculation_time:.2f} seconds\n"
|
| 178 |
+
|
| 179 |
wav_chunks = []
|
| 180 |
+
|
| 181 |
+
t_inference=time.time()
|
| 182 |
|
| 183 |
chunks = model.inference_stream(
|
| 184 |
prompt,
|
|
|
|
| 186 |
gpt_cond_latent,
|
| 187 |
speaker_embedding,)
|
| 188 |
try:
|
| 189 |
+
first_chunk=True
|
| 190 |
for i, chunk in enumerate(chunks):
|
| 191 |
+
if first_chunk:
|
| 192 |
+
first_chunk_time = time.time() - t_inference
|
| 193 |
+
metrics_text+=f"Streaming: First chunk actual latency: {first_chunk_time:.2f} seconds\n"
|
| 194 |
+
first_chunk=False
|
| 195 |
+
|
| 196 |
+
|
| 197 |
wav_chunks.append(chunk)
|
| 198 |
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
| 199 |
+
|
| 200 |
out_file = f'{i}.wav'
|
| 201 |
write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
|
| 202 |
audio = AudioSegment.from_file(out_file)
|
| 203 |
audio.export(out_file, format='wav')
|
| 204 |
+
|
| 205 |
+
yield (None, out_file, metrics_text, None)
|
| 206 |
+
|
| 207 |
except RuntimeError as e :
|
| 208 |
if "device-side assert" in str(e):
|
| 209 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
|
| 233 |
audio="output.wav",
|
| 234 |
),
|
| 235 |
"sil.wav",
|
| 236 |
+
metrics_text,
|
| 237 |
speaker_wav,
|
| 238 |
)
|
| 239 |
else:
|
|
|
|
| 242 |
None,
|
| 243 |
None,
|
| 244 |
None,
|
| 245 |
+
None,
|
| 246 |
)
|
| 247 |
|
| 248 |
|
|
|
|
| 462 |
outputs=[
|
| 463 |
gr.Video(label="Waveform Visual"),
|
| 464 |
gr.Audio(label="Synthesised Audio", streaming=True, autoplay=True),
|
| 465 |
+
gr.Text(label="Metrics"),
|
| 466 |
gr.Audio(label="Reference Audio Used"),
|
| 467 |
],
|
| 468 |
title=title,
|
| 469 |
description=description,
|
| 470 |
article=article,
|
| 471 |
+
examples=examples,
|
| 472 |
+
).queue().launch(debug=True,show_api=True)
|