Spaces:

Successmove
/

XTTS

Runtime error

App Files Files Community

Successmove commited on Sep 11

Commit

08a0d1e

verified ·

1 Parent(s): b639183

Create app.py

Browse files

Files changed (1) hide show

app.py +141 -0

app.py ADDED Viewed

	@@ -0,0 +1,141 @@

+import gradio as gr
+import torch
+import tempfile
+import os
+from TTS.api import TTS
+# Initialize the XTTS model
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Initialize XTTS model
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+# Get list of supported languages
+supported_languages = [
+    "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl",
+    "cs", "ar", "zh-cn", "ja", "hu", "ko"
+]
+def generate_speech(
+    text,
+    language,
+    speaker_wav=None,
+    voice_preset=None,
+    speed=1.0,
+    temperature=0.7
+):
+    """
+    Generate speech from text using XTTS model
+    """
+    # Create a temporary file for output
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+        output_path = tmp_file.name
+    try:
+        # If speaker wav is provided, use it for voice cloning
+        if speaker_wav is not None:
+            tts.tts_to_file(
+                text=text,
+                file_path=output_path,
+                speaker_wav=speaker_wav,
+                language=language,
+                speed=speed,
+                temperature=temperature
+            )
+        else:
+            # Use default voice if no speaker wav is provided
+            tts.tts_to_file(
+                text=text,
+                file_path=output_path,
+                language=language,
+                speed=speed,
+                temperature=temperature
+            )
+        return output_path
+    except Exception as e:
+        # Clean up temporary file if error occurs
+        if os.path.exists(output_path):
+            os.unlink(output_path)
+        raise gr.Error(f"Error generating speech: {str(e)}")
+# Create Gradio interface
+with gr.Blocks(title="XTTS Text-to-Speech") as demo:
+    gr.Markdown("# XTTS Text-to-Speech Generator")
+    gr.Markdown("Generate speech from text with voice cloning capabilities using XTTS v2")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Input Text",
+                placeholder="Enter text to convert to speech...",
+                lines=3
+            )
+            language_input = gr.Dropdown(
+                label="Language",
+                choices=[(lang, lang) for lang in supported_languages],
+                value="en",
+                info="Select the language for synthesis"
+            )
+            speaker_wav_input = gr.Audio(
+                label="Reference Voice (Optional)",
+                type="filepath",
+                info="Upload a 3-10 second audio sample for voice cloning"
+            )
+            with gr.Accordion("Advanced Settings", open=False):
+                speed_input = gr.Slider(
+                    label="Speed",
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    info="Speech speed (0.5 = slow, 2.0 = fast)"
+                )
+                temperature_input = gr.Slider(
+                    label="Temperature",
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    info="Voice variability (lower = more deterministic)"
+                )
+            generate_btn = gr.Button("Generate Speech", variant="primary")
+        with gr.Column():
+            audio_output = gr.Audio(
+                label="Generated Speech",
+                type="filepath"
+            )
+    gr.Examples(
+        examples=[
+            ["Hello, world! This is a sample text to speech generation.", "en"],
+            ["Bonjour, comment allez-vous aujourd'hui?", "fr"],
+            ["Hola, ¿cómo estás?", "es"],
+        ],
+        inputs=[text_input, language_input],
+        outputs=audio_output,
+        fn=generate_speech,
+        cache_examples=True
+    )
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[
+            text_input,
+            language_input,
+            speaker_wav_input,
+            speed_input,
+            temperature_input
+        ],
+        outputs=audio_output
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)