Spaces:

TGPro1
/

XTTS-v2

Sleeping

TGPro1 commited on Jan 10

Commit

8e1bb8b

verified ·

1 Parent(s): 45e1de1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,41 +1,33 @@
 import os
 import gradio as gr
-import torch
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
 os.environ["COQUI_TOS_AGREED"] = "1"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-print("Loading XTTS from Hugging Face Hub...")
-config = XttsConfig()
-config.load_json("https://huggingface.co/coqui/XTTS-v2/raw/main/config.json")
-model = Xtts.init_from_config(config)
-model.load_checkpoint(config, checkpoint_dir="coqui/XTTS-v2", use_deepspeed=False)
-if device == "cuda":
-    model.cuda()
-def clone(text, lang, ref):
-    out = "out.wav"
-    outputs = model.synthesize(
-        text,
-        config,
-        speaker_wav=ref,
-        language=lang,
-        gpt_cond_len=3,
-        temperature=0.75
     )
-    import scipy.io.wavfile
-    scipy.io.wavfile.write(out, 24000, outputs["wav"])
-    return out
 LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
-with gr.Blocks() as demo:
-    t=gr.Textbox(label="Text")
-    l=gr.Dropdown(label="Lang", choices=LANGS, value="fr")
-    r=gr.Audio(label="Ref", type="filepath")
-    o=gr.Audio(label="Out")
-    gr.Button("Generate").click(clone, [t,l,r], [o])
 demo.launch()

 import os
 import gradio as gr
+from TTS.api import TTS
 os.environ["COQUI_TOS_AGREED"] = "1"
+print("Loading XTTS model...")
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=False)
+def clone_voice(text, language, reference_audio):
+    output_path = "output.wav"
+    tts.tts_to_file(
+        text=text,
+        file_path=output_path,
+        speaker_wav=reference_audio,
+        language=language
     )
+    return output_path
 LANGS = ["en","es","fr","de","it","pt","pl","tr","ru","nl","cs","ar","zh-cn","ja","ko","hu"]
+demo = gr.Interface(
+    fn=clone_voice,
+    inputs=[
+        gr.Textbox(label="Text to synthesize"),
+        gr.Dropdown(choices=LANGS, value="fr", label="Language"),
+        gr.Audio(label="Reference Voice (5-30s)", type="filepath")
+    ],
+    outputs=gr.Audio(label="Generated Audio"),
+    title="XTTS v2 Voice Cloning"
+)
 demo.launch()