gds

Sleeping

App Files Files Community

hivecorp commited on May 24, 2025

Commit

49eff30

verified ·

1 Parent(s): 411d260

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -280

app.py CHANGED Viewed

@@ -72,7 +72,8 @@ class TextProcessor:
         # Adjust for punctuation density
         punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
-        complexity *= (1 + (punct_count / len(words)) * 0.5)
         return complexity
@@ -111,38 +112,39 @@ class TextProcessor:
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
         text = re.sub(r'\s+([.!?,;:])', r'\1', text)
-        # First, split into major segments by strong punctuation
         segments = []
-        current_segment = []
-        current_text = ""
         words = text.split()
         i = 0
         while i < len(words):
-            complexity = self.analyze_sentence_complexity(' '.join(words[i:i + self.words_per_line * 2]))
-            breaks = self.find_natural_breaks(' '.join(words[i:i + int(self.max_segment_words * complexity)]))
-            # Find best break point
-            best_break = None
-            best_weight = 0
             for break_idx, weight in breaks:
-                actual_idx = i + break_idx
-                if (actual_idx - i >= self.min_segment_words and
-                    actual_idx - i <= self.max_segment_words):
-                    if weight > best_weight:
-                        best_break = break_idx
-                        best_weight = weight
-            if best_break is None:
-                # If no good break found, use maximum length
-                best_break = min(self.words_per_line * self.lines_per_segment, len(words) - i)
-            # Create segment
-            segment_words = words[i:i + best_break + 1]
             segment_text = ' '.join(segment_words)
-            # Split segment into lines
             lines = self.split_into_lines(segment_text)
             final_segment_text = '\n'.join(lines)
@@ -166,7 +168,6 @@ class TextProcessor:
             current_line.append(word)
             word_count += 1
-            # Check for natural line breaks
             is_break = (
                 word_count >= self.words_per_line or
                 any(word.endswith(p) for p in '.!?') or
@@ -174,7 +175,7 @@ class TextProcessor:
                  any(word.endswith(p) for p in ',;:'))
             )
-            if is_break:
                 lines.append(' '.join(current_line))
                 current_line = []
                 word_count = 0
@@ -184,16 +185,15 @@ class TextProcessor:
         return lines
-# IMPROVEMENT 1: Enhanced Error Handling
 class TTSError(Exception):
     """Custom exception for TTS processing errors"""
     pass
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
     """Process a complete segment as a single TTS unit with improved error handling"""
-    audio_file = os.path.join(tempfile.gettempdir(), f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
     try:
-        # Process the entire segment text as one unit, replacing newlines with spaces
         segment_text = ' '.join(segment.text.split('\n'))
         tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
@@ -207,7 +207,6 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
         try:
             segment.audio = AudioSegment.from_file(audio_file)
-            # Reduced silence to 30ms for more natural flow
             silence = AudioSegment.silent(duration=30)
             segment.audio = silence + segment.audio + silence
             segment.duration = len(segment.audio)
@@ -224,20 +223,15 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
             try:
                 os.remove(audio_file)
             except Exception:
-                pass  # Ignore deletion errors
-# IMPROVEMENT 2: Better File Management with cleanup
 class FileManager:
     """Manages temporary and output files with cleanup capabilities"""
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
         self.output_files = []
-        self.max_files_to_keep = 5  # Keep only the 5 most recent output pairs
-    def get_temp_path(self, prefix):
-        """Get a path for a temporary file"""
-        return os.path.join(self.temp_dir, f"{prefix}_{uuid.uuid4()}")
     def create_output_paths(self):
         """Create paths for output files"""
         unique_id = str(uuid.uuid4())
@@ -252,276 +246,164 @@ class FileManager:
     def cleanup_old_files(self):
         """Clean up old output files, keeping only the most recent ones"""
         if len(self.output_files) > self.max_files_to_keep:
-            old_files = self.output_files[:-self.max_files_to_keep]
-            for srt_path, audio_path in old_files:
                 try:
-                    if os.path.exists(srt_path):
-                        os.remove(srt_path)
-                    if os.path.exists(audio_path):
-                        os.remove(audio_path)
                 except Exception:
-                    pass  # Ignore deletion errors
-            # Update the list to only include files we're keeping
             self.output_files = self.output_files[-self.max_files_to_keep:]
     def cleanup_all(self):
         """Clean up all managed files"""
         for srt_path, audio_path in self.output_files:
             try:
-                if os.path.exists(srt_path):
-                    os.remove(srt_path)
-                if os.path.exists(audio_path):
-                    os.remove(audio_path)
             except Exception:
-                pass  # Ignore deletion errors
         try:
-            os.rmdir(self.temp_dir)
         except Exception:
-            pass  # Ignore if directory isn't empty or can't be removed
-# Create global file manager
 file_manager = FileManager()
-# IMPROVEMENT 3: Parallel Processing for Segments
 async def generate_accurate_srt(
-    text: str,
-    voice: str,
-    rate: str,
-    pitch: str,
-    words_per_line: int,
-    lines_per_segment: int,
-    progress_callback=None,
-    parallel: bool = True,
-    max_workers: int = 4
 ) -> Tuple[str, str]:
     """Generate accurate SRT with parallel processing option"""
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)
     total_segments = len(segments)
-    processed_segments = []
-    # Update progress to show segmentation is complete
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
     if parallel and total_segments > 1:
-        # Process segments in parallel
-        processed_count = 0
-        segment_tasks = []
-        # Create a semaphore to limit concurrent tasks
         semaphore = asyncio.Semaphore(max_workers)
         async def process_with_semaphore(segment):
             async with semaphore:
                 nonlocal processed_count
-                try:
-                    result = await process_segment_with_timing(segment, voice, rate, pitch)
-                    processed_count += 1
-                    if progress_callback:
-                        progress = 0.1 + (0.8 * processed_count / total_segments)
-                        progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
-                    return result
-                except Exception as e:
-                    # Handle errors in individual segments
-                    processed_count += 1
-                    if progress_callback:
-                        progress = 0.1 + (0.8 * processed_count / total_segments)
-                        progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
-                    raise
-        # Create tasks for all segments
-        for segment in segments:
-            segment_tasks.append(process_with_semaphore(segment))
-        # Run all tasks and collect results
-        try:
-            processed_segments = await asyncio.gather(*segment_tasks)
-        except Exception as e:
-            if progress_callback:
-                progress_callback(0.9, f"Error during parallel processing: {str(e)}")
-            raise TTSError(f"Failed during parallel processing: {str(e)}")
     else:
-        # Process segments sequentially (original method)
         for i, segment in enumerate(segments):
-            try:
-                processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
-                processed_segments.append(processed_segment)
-                if progress_callback:
-                    progress = 0.1 + (0.8 * (i + 1) / total_segments)
-                    progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
-            except Exception as e:
-                if progress_callback:
-                    progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
-                raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
-    # Sort segments by ID to ensure correct order
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
-    # Now combine the segments in the correct order
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
-        # Calculate precise timing
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
-        # Add to SRT with precise timing
-        srt_content += (
-            f"{segment.id}\n"
-            f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
-            f"{segment.text}\n\n"
-        )
-        # Add to final audio with precise positioning
         final_audio = final_audio.append(segment.audio, crossfade=0)
-        # Update timing with precise gap
         current_time = segment.end_time
-    # Export with high precision
     srt_path, audio_path = file_manager.create_output_paths()
     try:
-        # Export with optimized quality settings and compression
-        export_params = {
-            'format': 'mp3',
-            'bitrate': '192k',  # Reduced from 320k but still high quality
-            'parameters': [
-                '-ar', '44100',  # Standard sample rate
-                '-ac', '2',      # Stereo
-                '-compression_level', '0',  # Best compression
-                '-qscale:a', '2'  # High quality VBR encoding
-            ]
-        }
         final_audio.export(audio_path, **export_params)
-        with open(srt_path, "w", encoding='utf-8') as f:
-            f.write(srt_content)
     except Exception as e:
-        if progress_callback:
-            progress_callback(1.0, f"Error exporting final files: {str(e)}")
         raise TTSError(f"Failed to export final files: {str(e)}")
     if progress_callback:
         progress_callback(1.0, "Complete!")
     return srt_path, audio_path
-# IMPROVEMENT 4: Progress Reporting with proper error handling for older Gradio versions
 async def process_text_with_progress(
-    text,
-    pitch,
-    rate,
-    voice,
-    words_per_line,
-    lines_per_segment,
-    parallel_processing,
     progress=gr.Progress()
 ):
-    # Input validation
     if not text or text.strip() == "":
-        return None, None, None, gr.update(value="", visible=True), gr.update(value="", visible=False), "Please enter some text to convert to speech."
-    # Format pitch and rate strings
     pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
     rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     try:
-        # Start progress tracking
         progress(0, "Preparing text...")
         def update_progress(value, status):
             progress(value, status)
         srt_path, audio_path = await generate_accurate_srt(
-            text,
-            voice_options[voice],
-            rate_str,
-            pitch_str,
-            words_per_line,
-            lines_per_segment,
             progress_callback=update_progress,
             parallel=parallel_processing
         )
-        # Generate Markdown links for download that open in a new tab
-        srt_download_link = f'<a href="file={srt_path}" download="subtitles.srt" target="_blank">Download SRT</a>'
-        audio_download_link = f'<a href="file={audio_path}" download="audio.mp3" target="_blank">Download Audio</a>'
-        # Return the paths for gr.Audio and Markdown for download links
-        return (
-            audio_path,
-            gr.update(value=srt_download_link, visible=True), # Use gr.Markdown for SRT download
-            gr.update(value=audio_download_link, visible=True), # Use gr.Markdown for Audio download
-            gr.update(value="", visible=False), # Hide error message
-            "" # Clear error message
-        )
     except TTSError as e:
-        # Return specific TTS error
-        return None, gr.update(value="", visible=False), gr.update(value="", visible=False), gr.update(value=f"TTS Error: {str(e)}", visible=True), f"TTS Error: {str(e)}"
     except Exception as e:
         # Return any other error
-        return None, gr.update(value="", visible=False), gr.update(value="", visible=False), gr.update(value=f"Unexpected error: {str(e)}", visible=True), f"Unexpected error: {str(e)}"
 # Voice options dictionary
 voice_options = {
-    "Andrew Male": "en-US-AndrewNeural",
-    "Jenny Female": "en-US-JennyNeural",
-    "Guy Male": "en-US-GuyNeural",
-    "Ana Female": "en-US-AnaNeural",
-    "Aria Female": "en-US-AriaNeural",
-    "Brian Male": "en-US-BrianNeural",
-    "Christopher Male": "en-US-ChristopherNeural",
-    "Eric Male": "en-US-EricNeural",
-    "Michelle Male": "en-US-MichelleNeural",
-    "Roger Male": "en-US-RogerNeural",
-    "Natasha Female": "en-AU-NatashaNeural",
-    "William Male": "en-AU-WilliamNeural",
-    "Clara Female": "en-CA-ClaraNeural",
-    "Liam Female ": "en-CA-LiamNeural",
-    "Libby Female": "en-GB-LibbyNeural",
-    "Maisie": "en-GB-MaisieNeural",
-    "Ryan": "en-GB-RyanNeural",
-    "Sonia": "en-GB-SoniaNeural",
-    "Thomas": "en-GB-ThomasNeural",
-    "Sam": "en-HK-SamNeural",
-    "Yan": "en-HK-YanNeural",
-    "Connor": "en-IE-ConnorNeural",
-    "Emily": "en-IE-EmilyNeural",
-    "Neerja": "en-IN-NeerjaNeural",
-    "Prabhat": "en-IN-PrabhatNeural",
-    "Asilia": "en-KE-AsiliaNeural",
-    "Chilemba": "en-KE-ChilembaNeural",
-    "Abeo": "en-NG-AbeoNeural",
-    "Ezinne": "en-NG-EzinneNeural",
-    "Mitchell": "en-NZ-MitchellNeural",
-    "James": "en-PH-JamesNeural",
-    "Rosa": "en-PH-RosaNeural",
-    "Luna": "en-SG-LunaNeural",
-    "Wayne": "en-SG-WayneNeural",
-    "Elimu": "en-TZ-ElimuNeural",
-    "Imani": "en-TZ-ImaniNeural",
-    "Leah": "en-ZA-LeahNeural",
-    "Luke": "en-ZA-LukeNeural"
-    # Add other voices as needed
 }
-# Register cleanup on exit
 import atexit
 atexit.register(file_manager.cleanup_all)
-# Create Gradio interface
 with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
@@ -529,85 +411,41 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
-            voice_dropdown = gr.Dropdown(
-                label="Select Voice",
-                choices=list(voice_options.keys()),
-                value="Jenny Female"
-            )
-            pitch_slider = gr.Slider(
-                label="Pitch Adjustment (Hz)",
-                minimum=-10,
-                maximum=10,
-                value=0,
-                step=1
-            )
-            rate_slider = gr.Slider(
-                label="Rate Adjustment (%)",
-                minimum=-25,
-                maximum=25,
-                value=0,
-                step=1
-            )
     with gr.Row():
         with gr.Column():
-            words_per_line = gr.Slider(
-                label="Words per Line",
-                minimum=3,
-                maximum=12,
-                value=6,
-                step=1,
-                info="Controls how many words appear on each line of the subtitle"
-            )
         with gr.Column():
-            lines_per_segment = gr.Slider(
-                label="Lines per Segment",
-                minimum=1,
-                maximum=4,
-                value=2,
-                step=1,
-                info="Controls how many lines appear in each subtitle segment"
-            )
         with gr.Column():
-            parallel_processing = gr.Checkbox(
-                label="Enable Parallel Processing",
-                value=True,
-                info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
-            )
-    submit_btn = gr.Button("Generate Audio & Subtitles")
-    # Add error message component
-    error_output = gr.Textbox(label="Status", visible=False)
     with gr.Row():
-        with gr.Column():
-            audio_output = gr.Audio(label="Preview Audio")
-        with gr.Column():
-            # Change gr.File to gr.Markdown for download links
-            srt_download_link = gr.Markdown(value="", visible=False, label="Download SRT")
-            audio_download_link = gr.Markdown(value="", visible=False, label="Download Audio")
-    # Handle button click with manual error handling instead of .catch()
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
-            text_input,
-            pitch_slider,
-            rate_slider,
-            voice_dropdown,
-            words_per_line,
-            lines_per_segment,
-            parallel_processing
         ],
         outputs=[
-            audio_output,
-            srt_download_link, # Output to Markdown component
-            audio_download_link, # Output to Markdown component
-            error_output,
-            error_output
         ],
         api_name="generate"
     )

         # Adjust for punctuation density
         punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
+        if len(words) > 0:
+            complexity *= (1 + (punct_count / len(words)) * 0.5)
         return complexity
         text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
         text = re.sub(r'\s+([.!?,;:])', r'\1', text)
         segments = []
         words = text.split()
         i = 0
         while i < len(words):
+            # Dynamically select a chunk to analyze for breaks
+            chunk_end = i + int(self.max_segment_words)
+            chunk_text = ' '.join(words[i:chunk_end])
+            complexity = self.analyze_sentence_complexity(chunk_text)
+            breaks = self.find_natural_breaks(chunk_text)
+            best_break = -1
+            best_weight = -1
+            # Find the best break point within the ideal segment length
+            ideal_length = self.words_per_line * self.lines_per_segment
             for break_idx, weight in breaks:
+                # Prioritize breaks closer to the ideal length
+                distance_penalty = 1 - (abs(break_idx - ideal_length) / ideal_length) * 0.5
+                score = weight * distance_penalty
+                if score > best_weight:
+                    best_break = break_idx
+                    best_weight = score
+            if best_break == -1:
+                # If no break found, split at the ideal length or end of text
+                best_break = min(ideal_length, len(words) - 1 - i)
+            segment_words = words[i : i + best_break + 1]
             segment_text = ' '.join(segment_words)
             lines = self.split_into_lines(segment_text)
             final_segment_text = '\n'.join(lines)
             current_line.append(word)
             word_count += 1
             is_break = (
                 word_count >= self.words_per_line or
                 any(word.endswith(p) for p in '.!?') or
                  any(word.endswith(p) for p in ',;:'))
             )
+            if is_break and len(words) > word_count:
                 lines.append(' '.join(current_line))
                 current_line = []
                 word_count = 0
         return lines
 class TTSError(Exception):
     """Custom exception for TTS processing errors"""
     pass
 async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
     """Process a complete segment as a single TTS unit with improved error handling"""
+    temp_dir = tempfile.gettempdir()
+    audio_file = os.path.join(temp_dir, f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
     try:
         segment_text = ' '.join(segment.text.split('\n'))
         tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
         try:
             segment.audio = AudioSegment.from_file(audio_file)
             silence = AudioSegment.silent(duration=30)
             segment.audio = silence + segment.audio + silence
             segment.duration = len(segment.audio)
             try:
                 os.remove(audio_file)
             except Exception:
+                pass
 class FileManager:
     """Manages temporary and output files with cleanup capabilities"""
     def __init__(self):
         self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
         self.output_files = []
+        self.max_files_to_keep = 5
     def create_output_paths(self):
         """Create paths for output files"""
         unique_id = str(uuid.uuid4())
     def cleanup_old_files(self):
         """Clean up old output files, keeping only the most recent ones"""
         if len(self.output_files) > self.max_files_to_keep:
+            old_files_to_remove = self.output_files[:-self.max_files_to_keep]
+            for srt_path, audio_path in old_files_to_remove:
                 try:
+                    if os.path.exists(srt_path): os.remove(srt_path)
+                    if os.path.exists(audio_path): os.remove(audio_path)
                 except Exception:
+                    pass
             self.output_files = self.output_files[-self.max_files_to_keep:]
     def cleanup_all(self):
         """Clean up all managed files"""
         for srt_path, audio_path in self.output_files:
             try:
+                if os.path.exists(srt_path): os.remove(srt_path)
+                if os.path.exists(audio_path): os.remove(audio_path)
             except Exception:
+                pass
         try:
+            if os.path.exists(self.temp_dir): os.rmdir(self.temp_dir)
         except Exception:
+            pass
 file_manager = FileManager()
 async def generate_accurate_srt(
+    text: str, voice: str, rate: str, pitch: str,
+    words_per_line: int, lines_per_segment: int,
+    progress_callback=None, parallel: bool = True, max_workers: int = 4
 ) -> Tuple[str, str]:
     """Generate accurate SRT with parallel processing option"""
     processor = TextProcessor(words_per_line, lines_per_segment)
     segments = processor.split_into_segments(text)
     total_segments = len(segments)
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
+    processed_segments = []
     if parallel and total_segments > 1:
         semaphore = asyncio.Semaphore(max_workers)
+        processed_count = 0
         async def process_with_semaphore(segment):
             async with semaphore:
                 nonlocal processed_count
+                result = await process_segment_with_timing(segment, voice, rate, pitch)
+                processed_count += 1
+                if progress_callback:
+                    progress = 0.1 + (0.8 * processed_count / total_segments)
+                    progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
+                return result
+        tasks = [process_with_semaphore(s) for s in segments]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        for res in results:
+            if isinstance(res, Exception):
+                raise TTSError(f"A task failed during parallel processing: {res}")
+            processed_segments.append(res)
     else:
         for i, segment in enumerate(segments):
+            processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
+            processed_segments.append(processed_segment)
+            if progress_callback:
+                progress = 0.1 + (0.8 * (i + 1) / total_segments)
+                progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
+        srt_content += f"{segment.id}\n{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n{segment.text}\n\n"
         final_audio = final_audio.append(segment.audio, crossfade=0)
         current_time = segment.end_time
     srt_path, audio_path = file_manager.create_output_paths()
     try:
+        export_params = {'format': 'mp3', 'bitrate': '192k', 'parameters': ['-ar', '44100', '-ac', '2', '-qscale:a', '2']}
         final_audio.export(audio_path, **export_params)
+        with open(srt_path, "w", encoding='utf-8') as f: f.write(srt_content)
     except Exception as e:
         raise TTSError(f"Failed to export final files: {str(e)}")
     if progress_callback:
         progress_callback(1.0, "Complete!")
     return srt_path, audio_path
 async def process_text_with_progress(
+    text, pitch, rate, voice, words_per_line,
+    lines_per_segment, parallel_processing,
     progress=gr.Progress()
 ):
+    """
+    Processes the text, generates audio and SRT, and returns paths and HTML links.
+    The returned links are configured to open in a new browser tab.
+    """
     if not text or text.strip() == "":
+        return None, "", True, "Please enter some text to convert to speech."
     pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
     rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     try:
         progress(0, "Preparing text...")
         def update_progress(value, status):
             progress(value, status)
         srt_path, audio_path = await generate_accurate_srt(
+            text, voice_options[voice], rate_str, pitch_str,
+            words_per_line, lines_per_segment,
             progress_callback=update_progress,
             parallel=parallel_processing
         )
+        # MODIFICATION: Create HTML for download links that open in a new tab
+        download_html = f"""
+        <div style="text-align: center; padding-top: 10px;">
+            <a href="/file={srt_path}" target="_blank" download="subtitles.srt" style="font-weight: 600; color: #0b5ed7; text-decoration: none; margin-right: 20px;">📥 Download SRT File</a>
+            <a href="/file={audio_path}" target="_blank" download="audio.mp3" style="font-weight: 600; color: #0b5ed7; text-decoration: none;">📥 Download Audio File</a>
+        </div>
+        """
+        # MODIFICATION: Return audio preview path, HTML links, and hide error
+        return audio_path, download_html, False, ""
     except TTSError as e:
+        # Return specific TTS error, clearing the audio preview and download links
+        return None, "", True, f"TTS Error: {str(e)}"
     except Exception as e:
         # Return any other error
+        return None, "", True, f"Unexpected error: {str(e)}"
 # Voice options dictionary
 voice_options = {
+    "Andrew Male": "en-US-AndrewNeural", "Jenny Female": "en-US-JennyNeural", "Guy Male": "en-US-GuyNeural",
+    "Ana Female": "en-US-AnaNeural", "Aria Female": "en-US-AriaNeural", "Brian Male": "en-US-BrianNeural",
+    "Christopher Male": "en-US-ChristopherNeural", "Eric Male": "en-US-EricNeural", "Michelle Male": "en-US-MichelleNeural",
+    "Roger Male": "en-US-RogerNeural", "Natasha Female": "en-AU-NatashaNeural", "William Male": "en-AU-WilliamNeural",
+    "Clara Female": "en-CA-ClaraNeural", "Liam Female ": "en-CA-LiamNeural", "Libby Female": "en-GB-LibbyNeural",
+    "Maisie": "en-GB-MaisieNeural", "Ryan": "en-GB-RyanNeural", "Sonia": "en-GB-SoniaNeural",
+    "Thomas": "en-GB-ThomasNeural", "Sam": "en-HK-SamNeural", "Yan": "en-HK-YanNeural",
+    "Connor": "en-IE-ConnorNeural", "Emily": "en-IE-EmilyNeural", "Neerja": "en-IN-NeerjaNeural",
+    "Prabhat": "en-IN-PrabhatNeural", "Asilia": "en-KE-AsiliaNeural", "Chilemba": "en-KE-ChilembaNeural",
+    "Abeo": "en-NG-AbeoNeural", "Ezinne": "en-NG-EzinneNeural", "Mitchell": "en-NZ-MitchellNeural",
+    "James": "en-PH-JamesNeural", "Rosa": "en-PH-RosaNeural", "Luna": "en-SG-LunaNeural",
+    "Wayne": "en-SG-WayneNeural", "Elimu": "en-TZ-ElimuNeural", "Imani": "en-TZ-ImaniNeural",
+    "Leah": "en-ZA-LeahNeural", "Luke": "en-ZA-LukeNeural"
 }
 import atexit
 atexit.register(file_manager.cleanup_all)
 with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
     with gr.Row():
         with gr.Column(scale=3):
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
+            voice_dropdown = gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Jenny Female")
+            pitch_slider = gr.Slider(label="Pitch Adjustment (Hz)", minimum=-10, maximum=10, value=0, step=1)
+            rate_slider = gr.Slider(label="Rate Adjustment (%)", minimum=-25, maximum=25, value=0, step=1)
     with gr.Row():
         with gr.Column():
+            words_per_line = gr.Slider(label="Words per Line", minimum=3, maximum=12, value=6, step=1, info="Words per subtitle line.")
         with gr.Column():
+            lines_per_segment = gr.Slider(label="Lines per Segment", minimum=1, maximum=4, value=2, step=1, info="Lines per subtitle block.")
         with gr.Column():
+            parallel_processing = gr.Checkbox(label="Enable Parallel Processing", value=True, info="Faster conversion for longer texts.")
+    submit_btn = gr.Button("Generate Audio & Subtitles", variant="primary")
+    error_output = gr.Textbox(label="Status", visible=False, interactive=False)
+    # MODIFICATION: Changed the output area
     with gr.Row():
+        with gr.Column(scale=2):
+            audio_preview = gr.Audio(label="Preview Audio")
+        with gr.Column(scale=1):
+            download_links_output = gr.HTML(label="Download Files")
+    # MODIFICATION: Updated the .click() event outputs
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
+            text_input, pitch_slider, rate_slider, voice_dropdown,
+            words_per_line, lines_per_segment, parallel_processing
         ],
         outputs=[
+            audio_preview,           # Output for the audio player
+            download_links_output,   # Output for the HTML download links
+            error_output,            # First update to error_output (visibility)
+            error_output             # Second update to error_output (value)
         ],
         api_name="generate"
     )