Spaces:
Build error
Build error
| import re | |
| import numpy as np | |
| from transformers import pipeline | |
| import gradio as gr | |
| # Available voices and their corresponding models | |
| VOICES = { | |
| "Amy (Female)": "microsoft/vits-piper-en-us-amy", | |
| "Joe (Male)": "microsoft/vits-piper-en-us-joe", | |
| "Clara (Female)": "microsoft/vits-piper-en-us-clb", | |
| "Ryan (Male)": "microsoft/vits-piper-en-us-jvs" | |
| } | |
| def parse_segments(text): | |
| """Parse input text for speaker segments with improved validation""" | |
| pattern = re.compile(r'$$(?P<speaker>[^$$]+?)$$(?P<text>.*?)$$/(?P=speaker)$$', re.DOTALL) | |
| matches = list(pattern.finditer(text)) | |
| # Validate speaker names and collect results | |
| valid_segments = [] | |
| for match in matches: | |
| speaker = match.group('speaker') | |
| if speaker in VOICES: | |
| valid_segments.append((speaker, match.group('text').strip())) | |
| # Find any invalid segments | |
| if len(matches) < len(text.strip()): | |
| return valid_segments, f"Warning: Found {len(matches)} valid segments, but text contains untagged content or invalid speaker names" | |
| return valid_segments, None | |
| def generate_podcast(input_text): | |
| """Convert text to podcast with multiple voices""" | |
| try: | |
| segments, warning = parse_segments(input_text) | |
| if not segments: | |
| return (22050, np.zeros(0)), "No valid speaker segments found. Please use the format: [Speaker Name]text[/Speaker Name]" | |
| all_audio = [] | |
| current_pipe = None | |
| current_model = "" | |
| for speaker, text in segments: | |
| model_name = VOICES[speaker] | |
| # Load model only when needed | |
| if current_model != model_name: | |
| if current_pipe: del current_pipe | |
| current_pipe = pipeline("text-to-speech", model=model_name) | |
| current_model = model_name | |
| # Generate audio for this segment | |
| output = current_pipe(text) | |
| all_audio.append(output["audio"]) | |
| # Combine all audio segments with short pauses | |
| final_audio = np.concatenate([np.concatenate((audio, np.zeros(5000))) for audio in all_audio]) | |
| status = "Podcast generated successfully!" | |
| if warning: | |
| status += " " + warning | |
| return (output["sampling_rate"], final_audio), status | |
| except Exception as e: | |
| return (22050, np.zeros(0)), f"Error: {str(e)}" | |
| # Create Gradio interface | |
| def podcast_interface(text): | |
| (sr, audio), status = generate_podcast(text) | |
| return (sr, audio) if audio.size > 0 else gr.update(), status | |
| demo = gr.Interface( | |
| fn=podcast_interface, | |
| inputs=gr.Textbox( | |
| label="Input Text with Speaker Tags", | |
| lines=12, | |
| placeholder="""Example format: | |
| [Amy (Female)]Welcome to our podcast![/Amy (Female)] | |
| [Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)]""" | |
| ), | |
| outputs=[ | |
| gr.Audio(label="Generated Podcast", type="numpy"), | |
| gr.Textbox(label="Status", value="Ready") | |
| ], | |
| examples=[ | |
| ["""[Amy (Female)]Welcome to our podcast![/Amy (Female)] | |
| [Joe (Male)]Today we're discussing AI innovations.[/Joe (Male)] | |
| [Clara (Female)]This is Clara speaking![/Clara (Female)]"""] | |
| ], | |
| title="🎙️ Multi-Voice Podcast Generator", | |
| description="Generate podcasts with multiple free AI voices using Microsoft's Piper TTS models. Use [SpeakerName] tags to assign different voices to different text segments.", | |
| theme="soft", | |
| allow_flagging="never" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |