Spaces:

gorkemgoknar
/

xtts-streaming

Running

App Files Files Community

reach-vb commited on Oct 5, 2023

Commit

096c2f1

1 Parent(s): d9911b4

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -17

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import sys
-import os,stat
 import subprocess
 import random
 from zipfile import ZipFile
 import uuid
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
@@ -13,9 +15,18 @@ os.environ["COQUI_TOS_AGREED"] = "1"
 import langid
 import gradio as gr
 from TTS.api import TTS
 HF_TOKEN = os.environ.get("HF_TOKEN")
 from huggingface_hub import HfApi
 # will use api to restart space on a unrecoverable error
 api = HfApi(token=HF_TOKEN)
 repo_id = "coqui/xtts"
@@ -29,8 +40,19 @@ os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
 # Load TTS
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
-tts.to("cuda")
 # This is for debugging purposes only
 DEVICE_ASSERT_DETECTED=0
@@ -143,14 +165,24 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
             global DEVICE_ASSERT_LANG
             #It will likely never come here as we restart space on first unrecoverable error now
             print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
-        try:
-            tts.tts_to_file(
-                text=prompt,
-                file_path="output.wav",
-                language=language,
-                speaker_wav=speaker_wav,
-            )
         except RuntimeError as e :
             if "device-side assert" in str(e):
                 # cannot do anything on cuda device side error, need tor estart
@@ -168,13 +200,6 @@ def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, voice_clea
             else:
                 print("RuntimeError: non device-side assert error:", str(e))
                 raise e
-        return (
-            gr.make_waveform(
-                audio="output.wav",
-            ),
-            "output.wav",
-            speaker_wav,
-        )
     else:
         gr.Warning("Please accept the Terms & Condition!")
         return (

 import sys
+import io, os, stat
 import subprocess
 import random
 from zipfile import ZipFile
 import uuid
+import torch
+import torchaudio
 # By using XTTS you agree to CPML license https://coqui.ai/cpml
 os.environ["COQUI_TOS_AGREED"] = "1"
 import langid
 import gradio as gr
+from scipy.io.wavfile import write
+from pydub import AudioSegment
 from TTS.api import TTS
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+from TTS.utils.generic_utils import get_user_data_dir
 HF_TOKEN = os.environ.get("HF_TOKEN")
 from huggingface_hub import HfApi
 # will use api to restart space on a unrecoverable error
 api = HfApi(token=HF_TOKEN)
 repo_id = "coqui/xtts"
 # Load TTS
 tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
+model_path = os.path.join(get_user_data_dir("tts"), "tts_models--multilingual--multi-dataset--xtts_v1")
+config = XttsConfig()
+config.load_json(os.path.join(model_path, "config.json"))
+model = Xtts.init_from_config(config)
+model.load_checkpoint(
+    config,
+    checkpoint_path=os.path.join(model_path, "model.pth"),
+    vocab_path=os.path.join(model_path, "vocab.json"),
+    eval=True,
+    use_deepspeed=True
+)
+model.cuda()
 # This is for debugging purposes only
 DEVICE_ASSERT_DETECTED=0
             global DEVICE_ASSERT_LANG
             #It will likely never come here as we restart space on first unrecoverable error now
             print(f"Unrecoverable exception caused by language:{DEVICE_ASSERT_LANG} prompt:{DEVICE_ASSERT_PROMPT}")
+        gpt_cond_latent, _, speaker_embedding = model.get_conditioning_latents(audio_path=speaker_wav)
+        wav_chunks = []
+        chunks = model.inference_stream(
+            prompt,
+            language,
+            gpt_cond_latent,
+            speaker_embedding,)
+        try:
+            for i, chunk in enumerate(chunks):
+                print(f"Received chunk {i} of audio length {chunk.shape[-1]}")
+                out_file = f'{i}.wav'
+                write(out_file, 24000, chunk.detach().cpu().numpy().squeeze())
+                audio = AudioSegment.from_file(out_file)
+                audio.export(out_file, format='wav')
+                yield (gr.make_waveform(audio=out_file),out_file, speaker_wav)
         except RuntimeError as e :
             if "device-side assert" in str(e):
                 # cannot do anything on cuda device side error, need tor estart
             else:
                 print("RuntimeError: non device-side assert error:", str(e))
                 raise e
     else:
         gr.Warning("Please accept the Terms & Condition!")
         return (