Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
| 1 |
import sys
|
| 2 |
-
import os,stat
|
| 3 |
import subprocess
|
| 4 |
import random
|
| 5 |
from zipfile import ZipFile
|
| 6 |
import uuid
|
| 7 |
|
|
|
|
|
|
|
| 8 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 9 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 10 |
|
|
@@ -13,9 +15,18 @@ os.environ["COQUI_TOS_AGREED"] = "1"
|
|
| 13 |
import langid
|
| 14 |
|
| 15 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
| 16 |
from TTS.api import TTS
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 18 |
from huggingface_hub import HfApi
|
|
|
|
| 19 |
# will use api to restart space on a unrecoverable error
|
| 20 |
api = HfApi(token=HF_TOKEN)
|
| 21 |
repo_id = "coqui/xtts"
|
|
@@ -29,8 +40,19 @@ os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
|
|
| 29 |
|
| 30 |
# Load TTS
|
| 31 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
| 32 |
-
tts.to("cuda")
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
# This is for debugging purposes only
|
| 36 |
DEVICE_ASSERT_DETECTED=0
|
|
@@ -143,14 +165,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 143 |
global DEVICE_ASSERT_LANG
|
| 144 |
#It will likely never come here as we restart space on first unrecoverable error now
|
| 145 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
except RuntimeError as e :
|
| 155 |
if "device-side assert" in str(e):
|
| 156 |
# cannot do anything on cuda device side error, need tor estart
|
|
@@ -168,13 +200,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
|
|
| 168 |
else:
|
| 169 |
print("RuntimeError: non device-side assert error:", str(e))
|
| 170 |
raise e
|
| 171 |
-
return (
|
| 172 |
-
gr.make_waveform(
|
| 173 |
-
audio="output.wav",
|
| 174 |
-
),
|
| 175 |
-
"output.wav",
|
| 176 |
-
speaker_wav,
|
| 177 |
-
)
|
| 178 |
else:
|
| 179 |
gr.Warning("Please accept the Terms & Condition!")
|
| 180 |
return (
|
|
|
|
| 1 |
import sys
|
| 2 |
+
import io, os, stat
|
| 3 |
import subprocess
|
| 4 |
import random
|
| 5 |
from zipfile import ZipFile
|
| 6 |
import uuid
|
| 7 |
|
| 8 |
+
import torch
|
| 9 |
+
import torchaudio
|
| 10 |
# By using XTTS you agree to CPML license https://coqui.ai/cpml
|
| 11 |
os.environ["COQUI_TOS_AGREED"] = "1"
|
| 12 |
|
|
|
|
| 15 |
import langid
|
| 16 |
|
| 17 |
import gradio as gr
|
| 18 |
+
from scipy.io.wavfile import write
|
| 19 |
+
from pydub import AudioSegment
|
| 20 |
+
|
| 21 |
from TTS.api import TTS
|
| 22 |
+
from TTS.tts.configs.xtts_config import XttsConfig
|
| 23 |
+
from TTS.tts.models.xtts import Xtts
|
| 24 |
+
from TTS.utils.generic_utils import get_user_data_dir
|
| 25 |
+
|
| 26 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 27 |
+
|
| 28 |
from huggingface_hub import HfApi
|
| 29 |
+
|
| 30 |
# will use api to restart space on a unrecoverable error
|
| 31 |
api = HfApi(token=HF_TOKEN)
|
| 32 |
repo_id = "coqui/xtts"
|
|
|
|
| 40 |
|
| 41 |
# Load TTS
|
| 42 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
|
|
|
|
| 43 |
|
| 44 |
+
model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1")
|
| 45 |
+
config = XttsConfig()
|
| 46 |
+
config.load_json(os.path.join(model_path, "config.json"))
|
| 47 |
+
model = Xtts.init_from_config(config)
|
| 48 |
+
model.load_checkpoint(
|
| 49 |
+
config,
|
| 50 |
+
checkpoint_path=os.path.join(model_path, "model.pth"),
|
| 51 |
+
vocab_path=os.path.join(model_path, "vocab.json"),
|
| 52 |
+
eval=True,
|
| 53 |
+
use_deepspeed=True
|
| 54 |
+
)
|
| 55 |
+
model.cuda()
|
| 56 |
|
| 57 |
# This is for debugging purposes only
|
| 58 |
DEVICE_ASSERT_DETECTED=0
|
|
|
|
| 165 |
global DEVICE_ASSERT_LANG
|
| 166 |
#It will likely never come here as we restart space on first unrecoverable error now
|
| 167 |
print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
|
| 168 |
+
|
| 169 |
+
gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
|
| 170 |
+
wav_chunks = []
|
| 171 |
+
|
| 172 |
+
chunks = model.inference_stream(
|
| 173 |
+
prompt,
|
| 174 |
+
language,
|
| 175 |
+
gpt_cond_latent,
|
| 176 |
+
speaker_embedding,)
|
| 177 |
+
try:
|
| 178 |
+
|
| 179 |
+
for i, chunk in enumerate(chunks):
|
| 180 |
+
print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
|
| 181 |
+
out_file = f'{i}.wav'
|
| 182 |
+
write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
|
| 183 |
+
audio = AudioSegment.from_file(out_file)
|
| 184 |
+
audio.export(out_file, format='wav')
|
| 185 |
+
yield (gr.make_waveform(audio=out_file),out_file, speaker_wav)
|
| 186 |
except RuntimeError as e :
|
| 187 |
if "device-side assert" in str(e):
|
| 188 |
# cannot do anything on cuda device side error, need tor estart
|
|
|
|
| 200 |
else:
|
| 201 |
print("RuntimeError: non device-side assert error:", str(e))
|
| 202 |
raise e
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
else:
|
| 204 |
gr.Warning("Please accept the Terms & Condition!")
|
| 205 |
return (
|