insta-maker-3

Runtime error

App Files Files Community

hivecorp commited on Jun 25, 2025

Commit

6c4df3d

verified ·

1 Parent(s): 3fd60fc

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -570

app.py CHANGED Viewed

@@ -1,604 +1,189 @@
 import gradio as gr
-from pydub import AudioSegment
 import edge_tts
-import os
 import asyncio
-import uuid
-import re
-import time
 import tempfile
-from concurrent.futures import ThreadPoolExecutor
-from typing import List, Tuple, Optional, Dict, Any
-import math
-from dataclasses import dataclass
-class TimingManager:
-    def __init__(self):
-        self.current_time = 0
-        self.segment_gap = 100  # ms gap between segments
-    def get_timing(self, duration):
-        start_time = self.current_time
-        end_time = start_time + duration
-        self.current_time = end_time + self.segment_gap
-        return start_time, end_time
-def get_audio_length(audio_file):
-    audio = AudioSegment.from_file(audio_file)
-    return len(audio) / 1000
-def format_time_ms(milliseconds):
-    seconds, ms = divmod(int(milliseconds), 1000)
-    mins, secs = divmod(seconds, 60)
-    hrs, mins = divmod(mins, 60)
-    return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"
-@dataclass
-class Segment:
-    id: int
-    text: str
-    start_time: int = 0
-    end_time: int = 0
-    duration: int = 0
-    audio: Optional[AudioSegment] = None
-    lines: List[str] = None  # Add lines field for display purposes only
-class TextProcessor:
-    def __init__(self, words_per_line: int, lines_per_segment: int):
-        self.words_per_line = words_per_line
-        self.lines_per_segment = lines_per_segment
-        self.min_segment_words = 3
-        self.max_segment_words = words_per_line * lines_per_segment * 1.5  # Allow 50% more for natural breaks
-        self.punctuation_weights = {
-            '.': 1.0,  # Strong break
-            '!': 1.0,
-            '?': 1.0,
-            ';': 0.8,  # Medium-strong break
-            ':': 0.7,
-            ',': 0.5,  # Medium break
-            '-': 0.3,  # Weak break
-            '(': 0.2,
-            ')': 0.2
-        }
-    def analyze_sentence_complexity(self, text: str) -> float:
-        """Analyze sentence complexity to determine optimal segment length"""
-        words = text.split()
-        complexity = 1.0
-        # Adjust for sentence length
-        if len(words) > self.words_per_line * 2:
-            complexity *= 1.2
-        # Adjust for punctuation density
-        punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
-        complexity *= (1 + (punct_count / len(words)) * 0.5)
-        return complexity
-    def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]:
-        """Find natural break points with their weights"""
-        breaks = []
-        words = text.split()
-        for i, word in enumerate(words):
-            weight = 0
-            # Check for punctuation
-            for punct, punct_weight in self.punctuation_weights.items():
-                if word.endswith(punct):
-                    weight = max(weight, punct_weight)
-            # Check for natural phrase boundaries
-            phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'}
-            if i < len(words) - 1 and words[i+1].lower() in phrase_starters:
-                weight = max(weight, 0.6)
-            # Check for conjunctions at natural points
-            if i > self.min_segment_words:
-                conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'}
-                if word.lower() in conjunctions:
-                    weight = max(weight, 0.4)
-            if weight > 0:
-                breaks.append((i, weight))
-        return breaks
-    def split_into_segments(self, text: str) -> List[Segment]:
-        # Normalize text and add proper spacing around punctuation
-        text = re.sub(r'\s+', ' ', text.strip())
-        text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
-        text = re.sub(r'\s+([.!?,;:])', r'\1', text)
-        # First, split into major segments by strong punctuation
-        segments = []
-        current_segment = []
-        current_text = ""
-        words = text.split()
-        i = 0
-        while i < len(words):
-            complexity = self.analyze_sentence_complexity(' '.join(words[i:i + self.words_per_line * 2]))
-            breaks = self.find_natural_breaks(' '.join(words[i:i + int(self.max_segment_words * complexity)]))
-            # Find best break point
-            best_break = None
-            best_weight = 0
-            for break_idx, weight in breaks:
-                actual_idx = i + break_idx
-                if (actual_idx - i >= self.min_segment_words and
-                    actual_idx - i <= self.max_segment_words):
-                    if weight > best_weight:
-                        best_break = break_idx
-                        best_weight = weight
-            if best_break is None:
-                # If no good break found, use maximum length
-                best_break = min(self.words_per_line * self.lines_per_segment, len(words) - i)
-            # Create segment
-            segment_words = words[i:i + best_break + 1]
-            segment_text = ' '.join(segment_words)
-            # Split segment into lines
-            lines = self.split_into_lines(segment_text)
-            final_segment_text = '\n'.join(lines)
-            segments.append(Segment(
-                id=len(segments) + 1,
-                text=final_segment_text
-            ))
-            i += best_break + 1
-        return segments
-    def split_into_lines(self, text: str) -> List[str]:
-        """Split segment text into natural lines"""
-        words = text.split()
-        lines = []
-        current_line = []
-        word_count = 0
-        for word in words:
-            current_line.append(word)
-            word_count += 1
-            # Check for natural line breaks
-            is_break = (
-                word_count >= self.words_per_line or
-                any(word.endswith(p) for p in '.!?') or
-                (word_count >= self.words_per_line * 0.7 and
-                 any(word.endswith(p) for p in ',;:'))
-            )
-            if is_break:
-                lines.append(' '.join(current_line))
-                current_line = []
-                word_count = 0
-        if current_line:
-            lines.append(' '.join(current_line))
-        return lines
-# IMPROVEMENT 1: Enhanced Error Handling
-class TTSError(Exception):
-    """Custom exception for TTS processing errors"""
-    pass
-async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
-    """Process a complete segment as a single TTS unit with improved error handling"""
-    audio_file = os.path.join(tempfile.gettempdir(), f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
-    try:
-        # Process the entire segment text as one unit, replacing newlines with spaces
-        segment_text = ' '.join(segment.text.split('\n'))
-        tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
-        try:
-            await tts.save(audio_file)
-        except Exception as e:
-            raise TTSError(f"Failed to generate audio for segment {segment.id}: {str(e)}")
-        if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
-            raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
-        try:
-            segment.audio = AudioSegment.from_file(audio_file)
-            # Reduced silence to 30ms for more natural flow
-            silence = AudioSegment.silent(duration=30)
-            segment.audio = silence + segment.audio + silence
-            segment.duration = len(segment.audio)
-        except Exception as e:
-            raise TTSError(f"Failed to process audio file for segment {segment.id}: {str(e)}")
-        return segment
-    except Exception as e:
-        if not isinstance(e, TTSError):
-            raise TTSError(f"Unexpected error processing segment {segment.id}: {str(e)}")
-        raise
-    finally:
-        if os.path.exists(audio_file):
-            try:
-                os.remove(audio_file)
-            except Exception:
-                pass  # Ignore deletion errors
-# IMPROVEMENT 2: Better File Management with cleanup
-class FileManager:
-    """Manages temporary and output files with cleanup capabilities"""
-    def __init__(self):
-        self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
-        self.output_files = []
-        self.max_files_to_keep = 5  # Keep only the 5 most recent output pairs
-    def get_temp_path(self, prefix):
-        """Get a path for a temporary file"""
-        return os.path.join(self.temp_dir, f"{prefix}_{uuid.uuid4()}")
-    def create_output_paths(self):
-        """Create paths for output files"""
-        unique_id = str(uuid.uuid4())
-        audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
-        srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
-        self.output_files.append((srt_path, audio_path))
-        self.cleanup_old_files()
-        return srt_path, audio_path
-    def cleanup_old_files(self):
-        """Clean up old output files, keeping only the most recent ones"""
-        if len(self.output_files) > self.max_files_to_keep:
-            old_files = self.output_files[:-self.max_files_to_keep]
-            for srt_path, audio_path in old_files:
-                try:
-                    if os.path.exists(srt_path):
-                        os.remove(srt_path)
-                    if os.path.exists(audio_path):
-                        os.remove(audio_path)
-                except Exception:
-                    pass  # Ignore deletion errors
-            # Update the list to only include files we're keeping
-            self.output_files = self.output_files[-self.max_files_to_keep:]
-    def cleanup_all(self):
-        """Clean up all managed files"""
-        for srt_path, audio_path in self.output_files:
-            try:
-                if os.path.exists(srt_path):
-                    os.remove(srt_path)
-                if os.path.exists(audio_path):
-                    os.remove(audio_path)
-            except Exception:
-                pass  # Ignore deletion errors
-        try:
-            os.rmdir(self.temp_dir)
-        except Exception:
-            pass  # Ignore if directory isn't empty or can't be removed
-# Create global file manager
-file_manager = FileManager()
-# IMPROVEMENT 3: Parallel Processing for Segments
-async def generate_accurate_srt(
-    text: str,
-    voice: str,
-    rate: str,
-    pitch: str,
-    words_per_line: int,
-    lines_per_segment: int,
-    progress_callback=None,
-    parallel: bool = True,
-    max_workers: int = 4
-) -> Tuple[str, str]:
-    """Generate accurate SRT with parallel processing option"""
-    processor = TextProcessor(words_per_line, lines_per_segment)
-    segments = processor.split_into_segments(text)
-    total_segments = len(segments)
-    processed_segments = []
-    # Update progress to show segmentation is complete
-    if progress_callback:
-        progress_callback(0.1, "Text segmentation complete")
-    if parallel and total_segments > 1:
-        # Process segments in parallel
-        processed_count = 0
-        segment_tasks = []
-        # Create a semaphore to limit concurrent tasks
-        semaphore = asyncio.Semaphore(max_workers)
-        async def process_with_semaphore(segment):
-            async with semaphore:
-                nonlocal processed_count
-                try:
-                    result = await process_segment_with_timing(segment, voice, rate, pitch)
-                    processed_count += 1
-                    if progress_callback:
-                        progress = 0.1 + (0.8 * processed_count / total_segments)
-                        progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
-                    return result
-                except Exception as e:
-                    # Handle errors in individual segments
-                    processed_count += 1
-                    if progress_callback:
-                        progress = 0.1 + (0.8 * processed_count / total_segments)
-                        progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
-                    raise
-        # Create tasks for all segments
-        for segment in segments:
-            segment_tasks.append(process_with_semaphore(segment))
-        # Run all tasks and collect results
-        try:
-            processed_segments = await asyncio.gather(*segment_tasks)
-        except Exception as e:
-            if progress_callback:
-                progress_callback(0.9, f"Error during parallel processing: {str(e)}")
-            raise TTSError(f"Failed during parallel processing: {str(e)}")
-    else:
-        # Process segments sequentially (original method)
-        for i, segment in enumerate(segments):
-            try:
-                processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
-                processed_segments.append(processed_segment)
-                if progress_callback:
-                    progress = 0.1 + (0.8 * (i + 1) / total_segments)
-                    progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
-            except Exception as e:
-                if progress_callback:
-                    progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
-                raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
-    # Sort segments by ID to ensure correct order
-    processed_segments.sort(key=lambda s: s.id)
-    if progress_callback:
-        progress_callback(0.9, "Finalizing audio and subtitles")
-    # Now combine the segments in the correct order
-    current_time = 0
-    final_audio = AudioSegment.empty()
-    srt_content = ""
-    for segment in processed_segments:
-        # Calculate precise timing
-        segment.start_time = current_time
-        segment.end_time = current_time + segment.duration
-        # Add to SRT with precise timing
-        srt_content += (
-            f"{segment.id}\n"
-            f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
-            f"{segment.text}\n\n"
-        )
-        # Add to final audio with precise positioning
-        final_audio = final_audio.append(segment.audio, crossfade=0)
-        # Update timing with precise gap
-        current_time = segment.end_time
-    # Export with high precision
-    srt_path, audio_path = file_manager.create_output_paths()
-    try:
-        # Export with optimized quality settings and compression
-        export_params = {
-            'format': 'mp3',
-            'bitrate': '192k',  # Reduced from 320k but still high quality
-            'parameters': [
-                '-ar', '44100',  # Standard sample rate
-                '-ac', '2',      # Stereo
-                '-compression_level', '0',  # Best compression
-                '-qscale:a', '2'  # High quality VBR encoding
-            ]
-        }
-        final_audio.export(audio_path, **export_params)
-        with open(srt_path, "w", encoding='utf-8') as f:
-            f.write(srt_content)
-    except Exception as e:
-        if progress_callback:
-            progress_callback(1.0, f"Error exporting final files: {str(e)}")
-        raise TTSError(f"Failed to export final files: {str(e)}")
-    if progress_callback:
-        progress_callback(1.0, "Complete!")
-    return srt_path, audio_path
-# IMPROVEMENT 4: Progress Reporting with proper error handling for older Gradio versions
-async def process_text_with_progress(
-    text,
-    pitch,
-    rate,
-    voice,
-    words_per_line,
-    lines_per_segment,
-    parallel_processing,
-    progress=gr.Progress()
-):
-    # Input validation
-    if not text or text.strip() == "":
-        return None, None, None, True, "Please enter some text to convert to speech."
-    # Format pitch and rate strings
-    pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
-    rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
-    try:
-        # Start progress tracking
-        progress(0, "Preparing text...")
-        def update_progress(value, status):
-            progress(value, status)
-        srt_path, audio_path = await generate_accurate_srt(
-            text,
-            voice_options[voice],
-            rate_str,
-            pitch_str,
-            words_per_line,
-            lines_per_segment,
-            progress_callback=update_progress,
-            parallel=parallel_processing
-        )
-        # If successful, return results and hide error
-        return srt_path, audio_path, audio_path, False, ""
-    except TTSError as e:
-        # Return specific TTS error
-        return None, None, None, True, f"TTS Error: {str(e)}"
-    except Exception as e:
-        # Return any other error
-        return None, None, None, True, f"Unexpected error: {str(e)}"
-# Voice options dictionary
-voice_options = {
-    "Andrew Male": "en-US-AndrewNeural",
-    "Jenny Female": "en-US-JennyNeural",
-    "Guy Male": "en-US-GuyNeural",
-    "Ana Female": "en-US-AnaNeural",
-    "Aria Female": "en-US-AriaNeural",
-    "Brian Male": "en-US-BrianNeural",
-    "Christopher Male": "en-US-ChristopherNeural",
-    "Eric Male": "en-US-EricNeural",
-    "Michelle Male": "en-US-MichelleNeural",
-    "Roger Male": "en-US-RogerNeural",
-    "Natasha Female": "en-AU-NatashaNeural",
-    "William Male": "en-AU-WilliamNeural",
-    "Clara Female": "en-CA-ClaraNeural",
-    "Liam Female ": "en-CA-LiamNeural",
-    "Libby Female": "en-GB-LibbyNeural",
-    "Maisie": "en-GB-MaisieNeural",
-    "Ryan": "en-GB-RyanNeural",
-    "Sonia": "en-GB-SoniaNeural",
-    "Thomas": "en-GB-ThomasNeural",
-    "Sam": "en-HK-SamNeural",
-    "Yan": "en-HK-YanNeural",
-    "Connor": "en-IE-ConnorNeural",
-    "Emily": "en-IE-EmilyNeural",
-    "Neerja": "en-IN-NeerjaNeural",
-    "Prabhat": "en-IN-PrabhatNeural",
-    "Asilia": "en-KE-AsiliaNeural",
-    "Chilemba": "en-KE-ChilembaNeural",
-    "Abeo": "en-NG-AbeoNeural",
-    "Ezinne": "en-NG-EzinneNeural",
-    "Mitchell": "en-NZ-MitchellNeural",
-    "James": "en-PH-JamesNeural",
-    "Rosa": "en-PH-RosaNeural",
-    "Luna": "en-SG-LunaNeural",
-    "Wayne": "en-SG-WayneNeural",
-    "Elimu": "en-TZ-ElimuNeural",
-    "Imani": "en-TZ-ImaniNeural",
-    "Leah": "en-ZA-LeahNeural",
-    "Luke": "en-ZA-LukeNeural"
-    # Add other voices as needed
-}
-# Register cleanup on exit
-import atexit
-atexit.register(file_manager.cleanup_all)
-# Create Gradio interface
-with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
-    gr.Markdown("# Advanced TTS with Configurable SRT Generation")
-    gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
-    with gr.Row():
-        with gr.Column(scale=3):
-            text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
-        with gr.Column(scale=2):
-            voice_dropdown = gr.Dropdown(
-                label="Select Voice",
-                choices=list(voice_options.keys()),
-                value="Jenny Female"
-            )
-            pitch_slider = gr.Slider(
-                label="Pitch Adjustment (Hz)",
-                minimum=-10,
-                maximum=10,
-                value=0,
-                step=1
-            )
-            rate_slider = gr.Slider(
-                label="Rate Adjustment (%)",
-                minimum=-25,
-                maximum=25,
-                value=0,
-                step=1
-            )
-    with gr.Row():
-        with gr.Column():
-            words_per_line = gr.Slider(
-                label="Words per Line",
-                minimum=3,
-                maximum=12,
-                value=6,
-                step=1,
-                info="Controls how many words appear on each line of the subtitle"
-            )
-        with gr.Column():
-            lines_per_segment = gr.Slider(
-                label="Lines per Segment",
-                minimum=1,
-                maximum=4,
-                value=2,
-                step=1,
-                info="Controls how many lines appear in each subtitle segment"
-            )
-        with gr.Column():
-            parallel_processing = gr.Checkbox(
-                label="Enable Parallel Processing",
-                value=True,
-                info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
-            )
-    submit_btn = gr.Button("Generate Audio & Subtitles")
-    # Add error message component
-    error_output = gr.Textbox(label="Status", visible=False)
-    with gr.Row():
-        with gr.Column():
-            audio_output = gr.Audio(label="Preview Audio")
-        with gr.Column():
-            srt_file = gr.File(label="Download SRT")
-            audio_file = gr.File(label="Download Audio")
-    # Handle button click with manual error handling instead of .catch()
-    submit_btn.click(
-        fn=process_text_with_progress,
         inputs=[
-            text_input,
-            pitch_slider,
-            rate_slider,
-            voice_dropdown,
-            words_per_line,
-            lines_per_segment,
-            parallel_processing
         ],
         outputs=[
-            srt_file,
-            audio_file,
-            audio_output,
-            error_output,
-            error_output
         ],
-        api_name="generate"
     )
 if __name__ == "__main__":
-    app.launch()

 import gradio as gr
 import edge_tts
 import asyncio
 import tempfile
+import os
+import re
+from pydub import AudioSegment # Required for audio duration, needs ffmpeg installed
+# Get all available voices
+async def get_voices():
+    """Fetches all available voices from the Edge TTS service."""
+    voices = await edge_tts.list_voices()
+    # Format voice names for display in the dropdown
+    return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
+# Text-to-speech function
+async def text_to_speech(text, voice, rate, pitch):
+    """
+    Converts text to speech using Edge TTS and saves it to a temporary file.
+    Returns the path to the generated audio file and the original text for SRT generation.
+    """
+    if not text.strip():
+        # Return a string for the warning, instead of a gr.Warning object directly
+        return None, None, "Please enter text to convert."
+    if not voice:
+        # Return a string for the warning
+        return None, None, "Please select a voice."
+    # Extract the short name from the selected voice string
+    voice_short_name = voice.split(" - ")[0]
+    # Format rate and pitch for the Edge TTS API
+    rate_str = f"{rate:+d}%"
+    pitch_str = f"{pitch:+d}Hz"
+    # Initialize the Edge TTS communicator
+    communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
+    # Create a temporary file to save the audio
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
+        tmp_path = tmp_file.name
+        await communicate.save(tmp_path)
+    return tmp_path, text, "" # Return audio path, original text, and an empty string for no warning
+def format_time(ms):
+    """
+    Formats milliseconds into SRT time format (HH:MM:SS,mmm).
+    """
+    hours = int(ms / 3_600_000)
+    ms %= 3_600_000
+    minutes = int(ms / 60_000)
+    ms %= 60_000
+    seconds = int(ms / 1_000)
+    milliseconds = int(ms % 1_000)
+    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
+def generate_srt(text_input, audio_filepath):
+    """
+    Generates a basic SRT file based on text input and estimated timings
+    from audio duration. Timings are proportional to segment text length.
+    Note: This does not use advanced audio analysis for precise timing of pauses.
+    It's an estimation based on character count per segment.
+    Requires ffmpeg installed for pydub to read audio duration.
+    """
+    if not text_input or not audio_filepath:
+        return None
+    try:
+        # Load audio to get its total duration using pydub
+        audio = AudioSegment.from_file(audio_filepath)
+        audio_duration_ms = len(audio)
+    except Exception as e:
+        print(f"Error getting audio duration with pydub: {e}. SRT generation requires ffmpeg.")
+        # If pydub fails (e.g., ffmpeg not found), return None for SRT
+        return None
+    # Split text into segments. This regex splits on common sentence-ending
+    # punctuation, keeping the punctuation with the segment, and also handles newlines.
+    segments = re.findall(r'[^.!?,\n]+[.!?,\n]*', text_input)
+    segments = [s.strip() for s in segments if s.strip()] # Clean up empty strings
+    if not segments:
+        return None
+    srt_content = []
+    current_time_ms = 0
+    total_chars = sum(len(s) for s in segments)
+    if total_chars == 0: # Prevent division by zero if text is somehow empty after stripping
+        return None
+    for i, segment in enumerate(segments):
+        # Estimate duration for the segment based on its character count
+        # This assumes a roughly constant speech rate throughout the audio.
+        estimated_segment_duration_ms = (len(segment) / total_chars) * audio_duration_ms
+        start_time = current_time_ms
+        end_time = current_time_ms + estimated_segment_duration_ms
+        # Ensure the last segment's end time matches the total audio duration
+        if i == len(segments) - 1:
+            end_time = audio_duration_ms
+        # Add SRT entry
+        srt_content.append(str(i + 1))
+        srt_content.append(f"{format_time(start_time)} --> {format_time(end_time)}")
+        srt_content.append(segment)
+        srt_content.append("") # Empty line separates SRT blocks
+        current_time_ms = end_time
+    # Save the SRT content to a temporary file
+    srt_filename = f"{os.path.splitext(audio_filepath)[0]}.srt"
+    with open(srt_filename, "w", encoding="utf-8") as f:
+        f.write("\n".join(srt_content))
+    return srt_filename
+# Gradio interface function (wraps async functions and handles SRT generation)
+def tts_interface(text, voice, rate, pitch):
+    """
+    The main interface function for Gradio. It calls text_to_speech and then generate_srt.
+    """
+    # Run the async text_to_speech function
+    audio_path, original_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
+    srt_path = None
+    if audio_path: # Only attempt SRT generation if audio was successfully created
+        srt_path = generate_srt(original_text, audio_path)
+    # Return the generated audio, SRT file, and any warnings
+    return audio_path, srt_path, warning
+# Create Gradio application
+async def create_demo():
+    """
+    Asynchronously creates and configures the Gradio interface.
+    """
+    voices = await get_voices() # Fetch voices when the app starts
+    description = """
+    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
+    ✨ **New Feature: Generate SRT Subtitles (Estimated Timings)!** ✨
+    Automatically generates an SRT (SubRip Subtitle) file from your input text.
+    **Important Note on Timings:** The SRT timings are *estimated* based on the length of each text segment relative to the total audio duration. This feature *does not* perform advanced audio waveform analysis for precise pause detection or word-level synchronization. For perfectly synchronized subtitles, dedicated forced-alignment tools are typically required.
+    🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
+    Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
+    Transform your words into stunning, professional-quality videos in just a few clicks.
+    ✨ Features:
+    • Convert text to engaging videos with customizable visuals
+    • Choose from 40+ languages and 300+ voices
+    • Perfect for creating audiobooks, storytelling, and language learning materials
+    • Ideal for educators, content creators, and language enthusiasts
+    Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
+    """
+    demo = gr.Interface(
+        fn=tts_interface, # The function that processes inputs and returns outputs
         inputs=[
+            gr.Textbox(label="Input Text", lines=5, placeholder="Enter your text here to convert to speech and generate SRT..."),
+            gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="", type="value"),
+            gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1),
+            gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
         ],
         outputs=[
+            gr.Audio(label="Generated Audio", type="filepath"),
+            gr.File(label="Generated SRT Subtitle", type="filepath", file_count="single", visible=True), # Output for the SRT file
+            gr.Markdown(label="Warning") # Now expects a string output
         ],
+        title="Edge TTS Text-to-Speech with SRT Generator",
+        description=description,
+        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
+        analytics_enabled=False,
+        allow_flagging=False
     )
+    return demo
+# Run the application
 if __name__ == "__main__":
+    demo = asyncio.run(create_demo())
+    demo.launch()