Spaces:

Aranwer
/

TTS

Runtime error

App Files Files Community

Aranwer commited on May 18, 2025

Commit

20c432f

verified ·

1 Parent(s): 9d610a2

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -47

app.py CHANGED Viewed

@@ -3,41 +3,56 @@ from TTS.api import TTS
 import tempfile
 import os
-# Load multilingual TTS model
-model_name = "tts_models/multilingual/multi-dataset/your_tts"
 tts = TTS(model_name)
-# Get available languages and speakers
-available_languages = list(tts.languages)  # ['en', 'fr-fr', 'pt-br']
-available_speakers = tts.speakers
-def text_to_speech(text, language, speaker_name, speed, pitch):
     try:
-        # Validate inputs
         if not text.strip():
             raise ValueError("Please enter some text")
-        if language not in available_languages:
-            raise ValueError(f"Language '{language}' not supported by this model")
-        # Create parameters dictionary
-        params = {
-            "text": text,
-            "speaker": speaker_name,
-            "language": language,
-            "file_path": None
-        }
-        # Add optional parameters
-        if speed != 1.0:
-            params["speed"] = speed
-        if pitch != 1.0:
-            params["pitch"] = pitch
-        # Save to temporary WAV file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            params["file_path"] = f.name
-            tts.tts_to_file(**params)
             return f.name
     except Exception as e:
@@ -48,32 +63,61 @@ def create_download_link(audio_file):
         return None
     return gr.DownloadButton(label="Download Audio", value=audio_file)
-with gr.Blocks(title="Enhanced TTS App") as app:
-    gr.Markdown("# Enhanced Multilingual Text-to-Speech")
-    gr.Markdown(f"Supported languages: {', '.join(available_languages)}")
     with gr.Row():
         with gr.Column():
-            text_input = gr.Textbox(label="Enter text", lines=5)
-            language = gr.Dropdown(choices=available_languages, label="Language", value="en")
-            speaker = gr.Dropdown(choices=available_speakers, label="Voice")
-            with gr.Accordion("Advanced Settings", open=False):
-                speed = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1,
-                                 label="Speed (1.0 = normal)")
-                pitch = gr.Slider(minimum=0.5, maximum=2.0, value=1.0, step=0.1,
-                                label="Pitch (1.0 = normal)")
-            generate_btn = gr.Button("Generate Speech", variant="primary")
         with gr.Column():
-            audio_output = gr.Audio(label="Generated Audio", type="filepath")
             download_section = gr.Group(visible=False)
     # Set up interactivity
     generate_btn.click(
         fn=text_to_speech,
-        inputs=[text_input, language, speaker, speed, pitch],
         outputs=audio_output
     ).then(
         fn=lambda: gr.Group(visible=True),
@@ -84,18 +128,24 @@ with gr.Blocks(title="Enhanced TTS App") as app:
         outputs=download_section
     )
-    # Update examples to only use supported languages
     gr.Examples(
         examples=[
-            ["Hello, welcome to our text-to-speech application!", "en", available_speakers[0], 1.0, 1.0],
-            ["Bonjour, bienvenue dans notre application!", "fr-fr", available_speakers[-1], 1.0, 1.0],
-            ["Olá, bem-vindo ao nosso aplicativo!", "pt-br", available_speakers[0], 1.0, 1.0]
         ],
-        inputs=[text_input, language, speaker, speed, pitch],
         outputs=audio_output,
         fn=text_to_speech,
-        cache_examples=True
     )
 if __name__ == "__main__":
     app.launch()

 import tempfile
 import os
+# Initialize TTS with a better English storytelling model
+model_name = "tts_models/en/vctk/vits"  # Better for English narration
 tts = TTS(model_name)
+# Custom speaker labels for better narration options
+speaker_labels = {
+    "p225": "Male, Young Adult",
+    "p226": "Female, Middle-Aged",
+    "p227": "Male, Mature Storyteller",
+    "p228": "Female, Young Adult",
+    "p229": "Male, Elderly Narrator",
+    "p230": "Female, Warm Storyteller",
+    "p231": "Male, Deep Voice",
+    "p232": "Female, Clear Articulation",
+    "p233": "Male, Authoritative",
+    "p234": "Female, Gentle Storyteller"
+}
+# Get available speakers and filter to our labeled ones
+available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
+def text_to_speech(text, speaker_name, speed, pitch):
     try:
         if not text.strip():
             raise ValueError("Please enter some text")
+        # Create temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            # Generate with adjusted speed and pitch
+            tts.tts_to_file(
+                text=text,
+                speaker=speaker_name,
+                file_path=f.name,
+                speed=speed,
+                # Note: Some models may not support pitch directly
+                # We'll use a workaround for pitch adjustment
+            )
+            # Apply pitch adjustment if needed (using sox if available)
+            if pitch != 1.0:
+                try:
+                    import sox
+                    tfm = sox.Transformer()
+                    tfm.pitch(pitch)
+                    adjusted_file = f.name + "_adjusted.wav"
+                    tfm.build_file(f.name, adjusted_file)
+                    os.replace(adjusted_file, f.name)
+                except ImportError:
+                    print("Sox not installed, pitch adjustment skipped")
             return f.name
     except Exception as e:
         return None
     return gr.DownloadButton(label="Download Audio", value=audio_file)
+with gr.Blocks(title="Storytelling TTS App") as app:
+    gr.Markdown("# Professional Storytelling Text-to-Speech")
+    gr.Markdown("Perfect for audiobooks, podcasts, and narrative content")
     with gr.Row():
         with gr.Column():
+            text_input = gr.Textbox(label="Enter your story text", lines=8,
+                                  placeholder="Once upon a time...")
+            speaker = gr.Dropdown(
+                choices=available_speakers,
+                label="Narrator Voice",
+                value="p227",  # Default to mature storyteller
+                format_func=lambda x: speaker_labels[x]
+            )
+            with gr.Accordion("Voice Adjustment", open=True):
+                speed = gr.Slider(
+                    minimum=0.5, maximum=2.0,
+                    value=1.0, step=0.1,
+                    label="Speaking Rate",
+                    info="1.0 = normal, lower for slower narration"
+                )
+                pitch = gr.Slider(
+                    minimum=-5.0, maximum=5.0,
+                    value=0.0, step=0.5,
+                    label="Pitch Adjustment",
+                    info="0 = normal, positive for higher pitch"
+                )
+            generate_btn = gr.Button("Generate Narration", variant="primary")
         with gr.Column():
+            audio_output = gr.Audio(
+                label="Generated Narration",
+                type="filepath",
+                elem_classes=["output-audio"]
+            )
             download_section = gr.Group(visible=False)
+    # Voice preview samples
+    with gr.Accordion("Preview Narrator Voices", open=False):
+        gr.Markdown("Listen to sample narration from each voice:")
+        with gr.Row():
+            for speaker_id in available_speakers[:3]:
+                gr.Audio(
+                    value=f"https://example.com/samples/{speaker_id}.wav",  # Replace with actual samples
+                    label=speaker_labels[speaker_id],
+                    visible=False  # Set to True if you have sample files
+                )
     # Set up interactivity
     generate_btn.click(
         fn=text_to_speech,
+        inputs=[text_input, speaker, speed, pitch],
         outputs=audio_output
     ).then(
         fn=lambda: gr.Group(visible=True),
         outputs=download_section
     )
+    # Storytelling examples
     gr.Examples(
         examples=[
+            ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
+            ["In a quiet village nestled between the mountains, a young girl discovered a secret that would change everything.", "p234", 1.0, 0.5],
+            ["The detective examined the clue carefully, knowing this small piece of evidence could crack the entire case wide open.", "p231", 1.1, -1.0]
         ],
+        inputs=[text_input, speaker, speed, pitch],
         outputs=audio_output,
         fn=text_to_speech,
+        cache_examples=False
     )
 if __name__ == "__main__":
+    # Install sox for pitch adjustment if not available
+    try:
+        import sox
+    except ImportError:
+        print("Consider installing sox for pitch adjustment: pip install sox")
     app.launch()