Spaces:
Sleeping
Sleeping
| # app.py - Main Gradio application | |
| import gradio as gr | |
| import os | |
| import tempfile | |
| import shutil | |
| from pathlib import Path | |
| import asyncio | |
| from typing import List, Tuple, Generator | |
| import logging | |
| from datetime import datetime | |
| # Import our custom modules | |
| from segmenter import TextSegmenter | |
| from tts_engine import NariDIAEngine | |
| from audio_utils import AudioProcessor | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class PodXplainApp: | |
| def __init__(self): | |
| self.segmenter = TextSegmenter() | |
| self.tts_engine = NariDIAEngine() | |
| self.audio_processor = AudioProcessor() | |
| self.temp_dir = None | |
| def create_temp_directory(self) -> str: | |
| """Create a temporary directory for processing.""" | |
| if self.temp_dir: | |
| shutil.rmtree(self.temp_dir, ignore_errors=True) | |
| self.temp_dir = tempfile.mkdtemp(prefix="podxplain_") | |
| return self.temp_dir | |
| def cleanup_temp_directory(self): | |
| """Clean up temporary files.""" | |
| if self.temp_dir and os.path.exists(self.temp_dir): | |
| shutil.rmtree(self.temp_dir, ignore_errors=True) | |
| self.temp_dir = None | |
| def generate_podcast( | |
| self, | |
| text: str, | |
| speaker_detection_mode: str = "auto", | |
| progress=gr.Progress() | |
| ) -> Tuple[str, str]: | |
| """ | |
| Main function to convert text to podcast audio. | |
| Args: | |
| text: Input text (up to 50,000 characters) | |
| speaker_detection_mode: How to detect speaker changes | |
| progress: Gradio progress tracker | |
| Returns: | |
| Tuple of (audio_path, status_message) | |
| """ | |
| try: | |
| # Validate input | |
| if not text or len(text.strip()) == 0: | |
| return None, "β Please provide some text to convert." | |
| if len(text) > 50000: | |
| return None, f"β Text too long ({len(text)} chars). Maximum is 50,000 characters." | |
| # Create temporary directory | |
| temp_dir = self.create_temp_directory() | |
| progress(0, desc="π Starting podcast generation...") | |
| # Step 1: Segment text and assign speakers | |
| progress(0.1, desc="π Analyzing text and assigning speakers...") | |
| segments = self.segmenter.segment_and_assign_speakers( | |
| text, mode=speaker_detection_mode | |
| ) | |
| if not segments: | |
| return None, "β Could not process the text. Please check the input." | |
| logger.info(f"Generated {len(segments)} segments") | |
| # Step 2: Generate audio for each segment | |
| progress(0.2, desc="π€ Generating audio segments...") | |
| audio_files = [] | |
| for i, (speaker, segment_text) in enumerate(segments): | |
| progress( | |
| 0.2 + (0.7 * i / len(segments)), | |
| desc=f"π΅ Processing segment {i+1}/{len(segments)} (Speaker {speaker})" | |
| ) | |
| # Generate audio for this segment | |
| audio_path = self.tts_engine.synthesize_segment( | |
| segment_text, | |
| speaker, | |
| os.path.join(temp_dir, f"segment_{i:03d}.wav") | |
| ) | |
| if audio_path: | |
| audio_files.append(audio_path) | |
| else: | |
| logger.warning(f"Failed to generate audio for segment {i}") | |
| if not audio_files: | |
| return None, "β Failed to generate any audio segments." | |
| # Step 3: Merge audio files and convert to MP3 | |
| progress(0.9, desc="π§ Merging segments and converting to MP3...") | |
| final_audio_path = self.audio_processor.merge_and_convert_to_mp3( | |
| audio_files, | |
| os.path.join(temp_dir, "podcast_output.mp3") | |
| ) | |
| if not final_audio_path: | |
| return None, "β Failed to merge audio segments." | |
| progress(1.0, desc="β Podcast generated successfully!") | |
| # Generate summary | |
| total_segments = len(segments) | |
| speakers_used = len(set(speaker for speaker, _ in segments)) | |
| duration_estimate = len(text) / 1000 * 60 # Rough estimate: 1000 chars β 1 minute | |
| status_message = f""" | |
| β **Podcast Generated Successfully!** | |
| π **Statistics:** | |
| - Total segments: {total_segments} | |
| - Speakers used: {speakers_used} | |
| - Estimated duration: {duration_estimate:.1f} minutes | |
| - Character count: {len(text):,} | |
| π§ **Your podcast is ready for download!** | |
| """ | |
| return final_audio_path, status_message | |
| except Exception as e: | |
| logger.error(f"Error generating podcast: {str(e)}") | |
| return None, f"β Error: {str(e)}" | |
| finally: | |
| # Clean up temporary files (except the final output) | |
| # Note: We keep the final MP3 for download | |
| pass | |
| def create_gradio_interface(): | |
| """Create the Gradio interface.""" | |
| app = PodXplainApp() | |
| # Custom CSS for better styling | |
| css = """ | |
| .main-container { | |
| max-width: 1200px; | |
| margin: 0 auto; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 20px; | |
| background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
| color: white; | |
| border-radius: 10px; | |
| margin-bottom: 20px; | |
| } | |
| .footer { | |
| text-align: center; | |
| padding: 20px; | |
| color: #666; | |
| font-size: 0.9em; | |
| } | |
| """ | |
| with gr.Blocks(css=css, title="PodXplain - Text to Podcast") as interface: | |
| # Header | |
| gr.HTML(""" | |
| <div class="header"> | |
| <h1>ποΈ PodXplain</h1> | |
| <p><em>From script to story β voice it like never before.</em></p> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Input section | |
| gr.Markdown("## π Input Your Script") | |
| text_input = gr.Textbox( | |
| label="Podcast Script", | |
| placeholder="Enter your podcast script here (up to 50,000 characters).\n\nTip: Use paragraph breaks to help with speaker detection.", | |
| lines=15, | |
| max_lines=20, | |
| show_label=True | |
| ) | |
| char_count = gr.HTML("Characters: 0 / 50,000") | |
| # Options | |
| speaker_mode = gr.Radio( | |
| choices=["auto", "paragraph", "dialogue"], | |
| value="auto", | |
| label="Speaker Detection Mode", | |
| info="How to detect when speakers change" | |
| ) | |
| generate_btn = gr.Button( | |
| "π€ Generate Podcast", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| with gr.Column(scale=1): | |
| # Output section | |
| gr.Markdown("## π§ Your Podcast") | |
| status_output = gr.Markdown("Ready to generate your podcast!") | |
| audio_output = gr.Audio( | |
| label="Generated Podcast", | |
| show_download_button=True, | |
| interactive=False | |
| ) | |
| # Footer with instructions | |
| gr.HTML(""" | |
| <div class="footer"> | |
| <h3>π How to Use PodXplain</h3> | |
| <ol> | |
| <li><strong>Write your script:</strong> Enter up to 50,000 characters of text</li> | |
| <li><strong>Choose speaker mode:</strong> Auto-detect, paragraph-based, or dialogue-based</li> | |
| <li><strong>Generate:</strong> Click the button and wait for processing</li> | |
| <li><strong>Listen & Download:</strong> Your MP3 podcast will be ready!</li> | |
| </ol> | |
| <p><strong>π‘ Tips:</strong> Use clear paragraph breaks for better speaker detection. | |
| Write naturally as if speaking to an audience.</p> | |
| </div> | |
| """) | |
| # JavaScript for character counting | |
| text_input.change( | |
| fn=lambda text: f"Characters: {len(text) if text else 0:,} / 50,000", | |
| inputs=[text_input], | |
| outputs=[char_count] | |
| ) | |
| # Main generation function | |
| generate_btn.click( | |
| fn=app.generate_podcast, | |
| inputs=[text_input, speaker_mode], | |
| outputs=[audio_output, status_output], | |
| show_progress=True | |
| ) | |
| return interface | |
| if __name__ == "__main__": | |
| # Create and launch the interface | |
| interface = create_gradio_interface() | |
| interface.launch( | |
| share=True, | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| show_error=True | |
| ) | |