Spaces:

don0726
/

xtts

Build error

App Files Files Community

don0726 commited on Mar 20

Commit

545eaa6

verified ·

1 Parent(s): 41ed8fc

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -62

app.py CHANGED Viewed

@@ -2,93 +2,82 @@ import gradio as gr
 import torch
 import torchaudio
 import tempfile
 import os
-from TTS.api import TTS
-# ---------------------------
-# Device setup (CPU only)
-# ---------------------------
-device = "cpu"
-print("Loading XTTS model...")
-tts = TTS(
-    model_name="tts_models/multilingual/multi-dataset/xtts_v2",
-    progress_bar=False
-).to(device)
 print("Model loaded!")
-# ---------------------------
 # Voice cloning function
-# ---------------------------
 def clone_voice(audio_file, text, lang):
     try:
         if audio_file is None:
-            return None, "❌ Please upload audio"
-        if text.strip() == "":
-            return None, "❌ Please enter text"
-        # CPU safety limit
         if len(text) > 200:
-            return None, "❌ Text too long (max 200 chars for CPU)"
         # Load audio
-        waveform, sr = torchaudio.load(audio_file)
-        # Convert to mono
-        if waveform.shape[0] > 1:
-            waveform = waveform.mean(dim=0, keepdim=True)
-        # Save temp speaker audio
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
-            speaker_path = tmp.name
-            torchaudio.save(speaker_path, waveform, sr)
-        # Output file
-        output_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
-        # Generate speech
-        tts.tts_to_file(
-            text=text,
             speaker_wav=speaker_path,
-            language=lang,
-            file_path=output_path,
-            speed=1.1   # slight speed boost
         )
-        return output_path, "✅ Success"
     except Exception as e:
-        return None, f"❌ Error: {str(e)}"
-# ---------------------------
-# Gradio UI
-# ---------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🎤 XTTS Voice Cloning (CPU Space)")
-    gr.Markdown("Upload a voice sample, enter text, choose language")
-    with gr.Row():
-        audio_input = gr.Audio(type="filepath", label="🎙 Sample Voice")
-        text_input = gr.Textbox(label="📝 Text", placeholder="Enter text here...")
-    lang_input = gr.Textbox(
-        label="🌐 Language Code",
-        value="en",
-        placeholder="en, hi, fr, de..."
-    )
-    generate_btn = gr.Button("🚀 Generate")
-    output_audio = gr.Audio(label="🔊 Output")
-    status = gr.Textbox(label="Status")
-    generate_btn.click(
-        fn=clone_voice,
-        inputs=[audio_input, text_input, lang_input],
-        outputs=[output_audio, status]
-    )
-# Required for Hugging Face Spaces
 demo.launch(server_name="0.0.0.0", server_port=7860)

 import torch
 import torchaudio
 import tempfile
+from TTS.tts.models.xtts import Xtts
+from TTS.utils.audio import AudioProcessor
+from TTS.config import load_config
 import os
+# -------------------------
+# Load model manually (no heavy install)
+# -------------------------
+MODEL_NAME = "tts_models/multilingual/multi-dataset/xtts_v2"
+print("Loading model...")
+from huggingface_hub import snapshot_download
+model_path = snapshot_download(repo_id="coqui/XTTS-v2")
+config = load_config(os.path.join(model_path, "config.json"))
+model = Xtts.init_from_config(config)
+model.load_checkpoint(config, checkpoint_dir=model_path)
+model.eval()
 print("Model loaded!")
+# -------------------------
 # Voice cloning function
+# -------------------------
 def clone_voice(audio_file, text, lang):
     try:
         if audio_file is None:
+            return None, "Upload audio"
         if len(text) > 200:
+            return None, "Text too long (max 200 chars)"
         # Load audio
+        wav, sr = torchaudio.load(audio_file)
+        if wav.shape[0] > 1:
+            wav = wav.mean(dim=0, keepdim=True)
+        # Save temp speaker
+        speaker_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+        torchaudio.save(speaker_path, wav, sr)
+        # Generate
+        outputs = model.synthesize(
+            text,
+            config,
             speaker_wav=speaker_path,
+            language=lang
         )
+        out_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
+        torchaudio.save(out_path, torch.tensor(outputs["wav"]).unsqueeze(0), 24000)
+        return out_path, "Success"
     except Exception as e:
+        return None, str(e)
+# -------------------------
+# UI
+# -------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# XTTS Voice Cloning (CPU Fixed)")
+    audio = gr.Audio(type="filepath", label="Voice Sample")
+    text = gr.Textbox(label="Text")
+    lang = gr.Textbox(value="en", label="Language")
+    btn = gr.Button("Generate")
+    out_audio = gr.Audio()
+    status = gr.Textbox()
+    btn.click(clone_voice, [audio, text, lang], [out_audio, status])
 demo.launch(server_name="0.0.0.0", server_port=7860)