Spaces:

Aranwer
/

TTS

Runtime error

App Files Files Community

Aranwer commited on May 18, 2025

Commit

cc74df0

verified ·

1 Parent(s): ddab3bb

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -67

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
-import gradio as gr
 from TTS.api import TTS
 import tempfile
 import os
-# Initialize TTS with a better English storytelling model
-model_name = "tts_models/en/vctk/vits"  # Better for English narration
 tts = TTS(model_name)
-# Custom speaker labels for better narration options
 speaker_labels = {
     "p225": "Male, Young Adult",
     "p226": "Female, Middle-Aged",
@@ -21,7 +21,7 @@ speaker_labels = {
     "p234": "Female, Gentle Storyteller"
 }
-# Get available speakers and filter to our labeled ones
 available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
 def text_to_speech(text, speaker_name, speed, pitch):
@@ -29,106 +29,102 @@ def text_to_speech(text, speaker_name, speed, pitch):
         if not text.strip():
             raise ValueError("Please enter some text")
-        # Create temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
-            # Generate with adjusted speed and pitch
             tts.tts_to_file(
                 text=text,
                 speaker=speaker_name,
                 file_path=f.name,
-                speed=speed,
-                # Note: Some models may not support pitch directly
-                # We'll use a workaround for pitch adjustment
             )
-            # Apply pitch adjustment if needed (using sox if available)
-            if pitch != 1.0:
-                try:
-                    import sox
-                    tfm = sox.Transformer()
-                    tfm.pitch(pitch)
-                    adjusted_file = f.name + "_adjusted.wav"
-                    tfm.build_file(f.name, adjusted_file)
-                    os.replace(adjusted_file, f.name)
-                except ImportError:
-                    print("Sox not installed, pitch adjustment skipped")
-            return f.name
     except Exception as e:
         raise gr.Error(f"Error generating speech: {str(e)}")
 def create_download_link(audio_file):
-    if audio_file is None or not os.path.exists(audio_file):
-        return None
-    return gr.DownloadButton(label="Download Audio", value=audio_file)
 with gr.Blocks(title="Storytelling TTS App") as app:
-    gr.Markdown("# Professional Storytelling Text-to-Speech")
-    gr.Markdown("Perfect for audiobooks, podcasts, and narrative content")
     with gr.Row():
         with gr.Column():
-            text_input = gr.Textbox(label="Enter your story text", lines=8,
-                                  placeholder="Once upon a time...")
             speaker = gr.Dropdown(
                 choices=available_speakers,
                 label="Narrator Voice",
-                value="p227",  # Default to mature storyteller
                 format_func=lambda x: speaker_labels[x]
             )
-            with gr.Accordion("Voice Adjustment", open=True):
                 speed = gr.Slider(
-                    minimum=0.5, maximum=2.0,
                     value=1.0, step=0.1,
                     label="Speaking Rate",
-                    info="1.0 = normal, lower for slower narration"
                 )
                 pitch = gr.Slider(
-                    minimum=-5.0, maximum=5.0,
                     value=0.0, step=0.5,
-                    label="Pitch Adjustment",
-                    info="0 = normal, positive for higher pitch"
                 )
-            generate_btn = gr.Button("Generate Narration", variant="primary")
         with gr.Column():
             audio_output = gr.Audio(
-                label="Generated Narration",
                 type="filepath",
                 elem_classes=["output-audio"]
             )
-            download_section = gr.Group(visible=False)
-    # Voice preview samples
-    with gr.Accordion("Preview Narrator Voices", open=False):
-        gr.Markdown("Listen to sample narration from each voice:")
-        with gr.Row():
-            for speaker_id in available_speakers[:3]:
-                gr.Audio(
-                    value=f"https://example.com/samples/{speaker_id}.wav",  # Replace with actual samples
-                    label=speaker_labels[speaker_id],
-                    visible=False  # Set to True if you have sample files
-                )
-    # Set up interactivity
     generate_btn.click(
         fn=text_to_speech,
         inputs=[text_input, speaker, speed, pitch],
         outputs=audio_output
-    ).then(
-        fn=lambda: gr.Group(visible=True),
-        outputs=download_section
     ).then(
         fn=create_download_link,
         inputs=audio_output,
-        outputs=download_section
     )
-    # Storytelling examples
     gr.Examples(
         examples=[
             ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
@@ -142,7 +138,6 @@ with gr.Blocks(title="Storytelling TTS App") as app:
     )
 if __name__ == "__main__":
-    # Install sox for pitch adjustment if not available
     try:
         import sox
     except ImportError:

+import gradio as gr
 from TTS.api import TTS
 import tempfile
 import os
+# Initialize TTS
+model_name = "tts_models/en/vctk/vits"
 tts = TTS(model_name)
+# Custom speaker labels
 speaker_labels = {
     "p225": "Male, Young Adult",
     "p226": "Female, Middle-Aged",
     "p234": "Female, Gentle Storyteller"
 }
+# Filter available speakers
 available_speakers = [spk for spk in tts.speakers if spk in speaker_labels]
 def text_to_speech(text, speaker_name, speed, pitch):
         if not text.strip():
             raise ValueError("Please enter some text")
+        # Generate temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
             tts.tts_to_file(
                 text=text,
                 speaker=speaker_name,
                 file_path=f.name,
+                speed=speed
             )
+            output_path = f.name
+        # Adjust pitch using sox if needed
+        if pitch != 0.0:
+            try:
+                import sox
+                tfm = sox.Transformer()
+                tfm.pitch(pitch)
+                adjusted_file = output_path + "_adjusted.wav"
+                tfm.build_file(output_path, adjusted_file)
+                os.replace(adjusted_file, output_path)
+            except ImportError:
+                print("Sox not installed; skipping pitch adjustment.")
+        return output_path
     except Exception as e:
         raise gr.Error(f"Error generating speech: {str(e)}")
 def create_download_link(audio_file):
+    if audio_file and os.path.exists(audio_file):
+        return gr.update(visible=True, value=audio_file)
+    return gr.update(visible=False)
 with gr.Blocks(title="Storytelling TTS App") as app:
+    gr.Markdown("# 🎙️ Professional Storytelling Text-to-Speech")
+    gr.Markdown("Convert your text into narrated audio using expressive voices. Ideal for audiobooks, storytelling, and podcast narration.")
     with gr.Row():
         with gr.Column():
+            text_input = gr.Textbox(
+                label="Enter your story text",
+                lines=8,
+                placeholder="Once upon a time..."
+            )
             speaker = gr.Dropdown(
                 choices=available_speakers,
                 label="Narrator Voice",
+                value="p227",
                 format_func=lambda x: speaker_labels[x]
             )
+            with gr.Accordion("🎛️ Voice Adjustment", open=True):
                 speed = gr.Slider(
+                    minimum=0.5, maximum=2.0,
                     value=1.0, step=0.1,
                     label="Speaking Rate",
+                    info="1.0 = normal speed"
                 )
                 pitch = gr.Slider(
+                    minimum=-5.0, maximum=5.0,
                     value=0.0, step=0.5,
+                    label="Pitch Shift (in semitones)",
+                    info="0 = normal, positive = higher pitch"
                 )
+            generate_btn = gr.Button("🎧 Generate Narration", variant="primary")
         with gr.Column():
             audio_output = gr.Audio(
+                label="Generated Narration",
                 type="filepath",
                 elem_classes=["output-audio"]
             )
+            download_button = gr.DownloadButton(
+                label="Download Audio", visible=False
+            )
+    with gr.Accordion("🎤 Preview Narrator Voices (Samples Coming Soon)", open=False):
+        gr.Markdown("Previews will be available here once sample audios are added.")
+        for speaker_id in available_speakers[:3]:
+            gr.Audio(
+                value=None,
+                label=speaker_labels[speaker_id],
+                visible=False  # Set to True and provide file path or URL to enable
+            )
     generate_btn.click(
         fn=text_to_speech,
         inputs=[text_input, speaker, speed, pitch],
         outputs=audio_output
     ).then(
         fn=create_download_link,
         inputs=audio_output,
+        outputs=download_button
     )
     gr.Examples(
         examples=[
             ["The old man sat by the fireplace, his eyes twinkling with memories of adventures past.", "p227", 0.9, 0.0],
     )
 if __name__ == "__main__":
     try:
         import sox
     except ImportError: