Spaces:

immad84
/

xtts-space

Running

App Files Files Community

immad84 commited on Aug 12, 2025

Commit

a279c11

verified ·

1 Parent(s): 7a10ff5

update app.py

Browse files

Files changed (1) hide show

app.py +28 -24

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 import gradio as gr
 from TTS.api import TTS
-# Patch torch.load for compatibility
 old_torch_load = torch.load
 def patched_torch_load(*args, **kwargs):
     kwargs["weights_only"] = False
@@ -15,13 +15,17 @@ torch.load = patched_torch_load
 # Accept Coqui TOS
 os.environ["COQUI_TOS_AGREED"] = "1"
-# Model name
 MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
 print("Loading model:", MODEL)
-# Detect if GPU available (Hugging Face ZeroGPU = CPU only)
-use_gpu = torch.cuda.is_available()
-tts = TTS(MODEL, gpu=use_gpu)
 # Supported languages
 LANGS = [
@@ -29,32 +33,31 @@ LANGS = [
     "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
 ]
-# Default speaker if no wav provided
-DEFAULT_SPEAKER = tts.speakers[0] if tts.speakers else None
-print("Default speaker:", DEFAULT_SPEAKER)
-def generate_audio(text, language, speaker_wav):
     if not text or len(text.strip()) < 2:
         return None
     out_path = tempfile.mktemp(suffix=".wav")
-    if speaker_wav:
-        # Use reference WAV for voice cloning
-        tts.tts_to_file(
-            text=text,
-            speaker_wav=speaker_wav,
-            language=language,
-            file_path=out_path
-        )
     else:
-        # Use built-in default voice
         tts.tts_to_file(
             text=text,
-            speaker=DEFAULT_SPEAKER,
             language=language,
             file_path=out_path
         )
     return out_path
@@ -63,10 +66,11 @@ demo = gr.Interface(
     inputs=[
         gr.Textbox(lines=3, label="Text"),
         gr.Dropdown(LANGS, value="en", label="Language"),
-        gr.Audio(label="Speaker reference (optional, WAV)", type="filepath")
     ],
-    outputs=gr.Audio(type="filepath", label="Generated speech"),
-    title="XTTS-v2 (Multilingual + Voice Cloning)",
     allow_flagging="never",
 )

 import gradio as gr
 from TTS.api import TTS
+# Patch torch.load for compatibility with older Coqui checkpoints
 old_torch_load = torch.load
 def patched_torch_load(*args, **kwargs):
     kwargs["weights_only"] = False
 # Accept Coqui TOS
 os.environ["COQUI_TOS_AGREED"] = "1"
+# Ensure speakers folder exists
+SPEAKER_DIR = "speakers"
+os.makedirs(SPEAKER_DIR, exist_ok=True)
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Model
 MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
 print("Loading model:", MODEL)
+tts = TTS(MODEL).to(device)
 # Supported languages
 LANGS = [
     "cs", "ar", "zh-cn", "ja", "ko", "hu", "hi"
 ]
+def generate_audio(text, language, speaker_file):
     if not text or len(text.strip()) < 2:
         return None
     out_path = tempfile.mktemp(suffix=".wav")
+    # Resolve speaker path (either from upload or from speakers folder)
+    speaker_path = None
+    if speaker_file:
+        speaker_path = speaker_file
     else:
+        # Default to first speaker file in folder if exists
+        files = [f for f in os.listdir(SPEAKER_DIR) if f.lower().endswith(".wav")]
+        if files:
+            speaker_path = os.path.join(SPEAKER_DIR, files[0])
+    if speaker_path:
         tts.tts_to_file(
             text=text,
+            speaker_wav=speaker_path,
             language=language,
             file_path=out_path
         )
+    else:
+        return None
     return out_path
     inputs=[
         gr.Textbox(lines=3, label="Text"),
         gr.Dropdown(LANGS, value="en", label="Language"),
+        gr.Audio(label="Upload speaker reference (optional)", type="filepath")
     ],
+    outputs=gr.Audio(type="filepath", label="Generated Speech"),
+    title="XTTS-v2 Voice Cloning",
+    description=f"Drop WAV files into `{SPEAKER_DIR}` folder for reusable speaker voices.",
     allow_flagging="never",
 )