Spaces:

Nick021402
/

Text2podcast

Build error

App Files Files Community

Nick021402 commited on May 23, 2025

Commit

4cfc491

verified ·

1 Parent(s): d8fb30b

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import re
+import numpy as np
+from transformers import pipeline
+import gradio as gr
+# Available voices and their corresponding models
+VOICES = {
+    "Amy (Female)": "microsoft/vits-piper-en-us-amy",
+    "Joe (Male)": "microsoft/vits-piper-en-us-joe",
+    "Clara (Female)": "microsoft/vits-piper-en-us-clb",
+    "Ryan (Male)": "microsoft/vits-piper-en-us-jvs"
+}
+def parse_segments(text):
+    """Parse input text for speaker segments using regex"""
+    pattern = re.compile(r'$$(?P<speaker>[^$$]+)$$(?P<text>.*?)$$\/\1$$', re.DOTALL)
+    return [(match.group('speaker'), match.group('text').strip())
+            for match in pattern.finditer(text)]
+def generate_podcast(input_text):
+    """Convert text to podcast with multiple voices"""
+    try:
+        segments = parse_segments(input_text)
+        if not segments:
+            return (22050, np.zeros(0)), "No valid speaker segments found"
+        all_audio = []
+        current_pipe = None
+        current_model = ""
+        for speaker, text in segments:
+            if speaker not in VOICES:
+                return (22050, np.zeros(0)), f"Invalid speaker: {speaker}"
+            model_name = VOICES[speaker]
+            # Load model only when needed
+            if current_model != model_name:
+                if current_pipe: del current_pipe
+                current_pipe = pipeline("text-to-speech", model=model_name)
+                current_model = model_name
+            # Generate audio for this segment
+            output = current_pipe(text)
+            all_audio.append(output["audio"])
+        # Combine all audio segments with short pauses
+        final_audio = np.concatenate([np.concatenate((audio, np.zeros(5000))) for audio in all_audio])
+        return (output["sampling_rate"], final_audio), "Podcast generated successfully!"
+    except Exception as e:
+        return (22050, np.zeros(0)), f"Error: {str(e)}"
+# Create Gradio interface
+def podcast_interface(text):
+    (sr, audio), status = generate_podcast(text)
+    return (sr, audio) if audio.size > 0 else gr.update(), status
+demo = gr.Interface(
+    fn=podcast_interface,
+    inputs=gr.Textbox(
+        label="Input Text with Speaker Tags",
+        lines=12,
+        placeholder="""Example format:
+[Amy (Female)]Hello and welcome to today's episode![/Amy (Female)]
+[Joe (Male)]Excited to have you here![/Joe (Male)]"""
+    ),
+    outputs=[
+        gr.Audio(label="Generated Podcast", type="numpy"),
+        gr.Textbox(label="Status", value="Ready")
+    ],
+    examples=[
+        ["""[Amy (Female)]Welcome to our podcast![/Amy (Female)]
+[Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]"""]
+    ],
+    title="🎙️ Multi-Voice Podcast Generator",
+    description="Generate podcasts with multiple free AI voices using Microsoft's Piper TTS models. Use [SpeakerName] tags to assign different voices to different text segments.",
+    theme="soft",
+    allow_flagging="never"
+)
+if __name__ == "__main__":
+    demo.launch()