gds2

Runtime error

App Files Files Community

hivecorp commited on May 24, 2025

Commit

f2939e1

verified ·

1 Parent(s): 62612dc

Update app.py

Browse files

Files changed (1) hide show

app.py +668 -89

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ class TimingManager:
     def get_timing(self, duration):
         start_time = self.current_time
         end_time = start_time + duration
-        self.current_time = end_time + duration + self.segment_gap # Ensure a small gap between segments
         return start_time, end_time
 def get_audio_length(audio_file):
@@ -184,7 +184,6 @@ class TextProcessor:
         return lines
-# IMPROVEMENT 1: Enhanced Error Handling
 class TTSError(Exception):
     """Custom exception for TTS processing errors"""
     pass
@@ -226,7 +225,6 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
             except Exception:
                 pass  # Ignore deletion errors
-# IMPROVEMENT 2: Better File Management with cleanup
 class FileManager:
     """Manages temporary and output files with cleanup capabilities"""
     def __init__(self):
@@ -281,10 +279,8 @@ class FileManager:
         except Exception:
             pass  # Ignore if directory isn't empty or can't be removed
-# Create global file manager
 file_manager = FileManager()
-# IMPROVEMENT 3: Parallel Processing for Segments
 async def generate_accurate_srt(
     text: str,
     voice: str,
@@ -303,16 +299,12 @@ async def generate_accurate_srt(
     total_segments = len(segments)
     processed_segments = []
-    # Update progress to show segmentation is complete
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
     if parallel and total_segments > 1:
-        # Process segments in parallel
         processed_count = 0
         segment_tasks = []
-        # Create a semaphore to limit concurrent tasks
         semaphore = asyncio.Semaphore(max_workers)
         async def process_with_semaphore(segment):
@@ -326,18 +318,15 @@ async def generate_accurate_srt(
                         progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
                     return result
                 except Exception as e:
-                    # Handle errors in individual segments
                     processed_count += 1
                     if progress_callback:
                         progress = 0.1 + (0.8 * processed_count / total_segments)
                         progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
                     raise
-        # Create tasks for all segments
         for segment in segments:
             segment_tasks.append(process_with_semaphore(segment))
-        # Run all tasks and collect results
         try:
             processed_segments = await asyncio.gather(*segment_tasks)
         except Exception as e:
@@ -345,7 +334,6 @@ async def generate_accurate_srt(
                 progress_callback(0.9, f"Error during parallel processing: {str(e)}")
             raise TTSError(f"Failed during parallel processing: {str(e)}")
     else:
-        # Process segments sequentially (original method)
         for i, segment in enumerate(segments):
             try:
                 processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
@@ -359,48 +347,39 @@ async def generate_accurate_srt(
                     progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
                 raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
-    # Sort segments by ID to ensure correct order
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
-    # Now combine the segments in the correct order
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
-        # Calculate precise timing
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
-        # Add to SRT with precise timing
         srt_content += (
             f"{segment.id}\n"
             f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
             f"{segment.text}\n\n"
         )
-        # Add to final audio with precise positioning
         final_audio = final_audio.append(segment.audio, crossfade=0)
-        # Update timing with precise gap
         current_time = segment.end_time
-    # Export with high precision
     srt_path, audio_path = file_manager.create_output_paths()
     try:
-        # Export with optimized quality settings and compression
         export_params = {
             'format': 'mp3',
-            'bitrate': '192k',  # Reduced from 320k but still high quality
             'parameters': [
-                '-ar', '44100',  # Standard sample rate
-                '-ac', '2',      # Stereo
-                '-compression_level', '0',  # Best compression
-                '-qscale:a', '2'  # High quality VBR encoding
             ]
         }
         final_audio.export(audio_path, **export_params)
@@ -417,6 +396,7 @@ async def generate_accurate_srt(
     return srt_path, audio_path
 async def process_text_with_progress(
     text,
     pitch,
@@ -427,31 +407,26 @@ async def process_text_with_progress(
     parallel_processing,
     progress=gr.Progress()
 ):
-    # Initialize outputs to their default 'hidden' state
-    # gr.Audio expects None or a path.
-    # gr.Markdown expects a string. An empty string effectively hides the content.
-    output_audio = None
-    output_srt_link = ""
-    output_audio_link = ""
-    output_error = ""
     # Input validation
     if not text or text.strip() == "":
-        output_error = "Please enter some text to convert to speech."
-        # Update visibility of error_output only when an error occurs
         return (
             output_audio,
-            output_srt_link,
-            output_audio_link,
-            gr.update(value=output_error, visible=True)
         )
-    # Format pitch and rate strings
     pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
     rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     try:
-        # Start progress tracking
         progress(0, "Preparing text...")
         def update_progress(value, status):
@@ -468,50 +443,106 @@ async def process_text_with_progress(
             parallel=parallel_processing
         )
-        # Generate Markdown links for download that open in a new tab
-        output_srt_link = f'<a href="file={srt_path}" download="subtitles.srt" target="_blank">Download SRT</a>'
-        output_audio_link = f'<a href="file={audio_path}" download="audio.mp3" target="_blank">Download Audio</a>'
-        output_audio = audio_path # For the gr.Audio preview component
-        output_error = "" # Clear any previous error
-        # Return updated values. Note: No explicit `visible=True` for Markdown here.
-        # The presence of content makes them appear.
         return (
-            output_audio,
-            gr.update(value=output_srt_link, visible=True), # Now explicit visible=True here
-            gr.update(value=output_audio_link, visible=True), # Now explicit visible=True here
-            gr.update(value=output_error, visible=False) # Hide error message on success
         )
     except TTSError as e:
-        output_error = f"TTS Error: {str(e)}"
     except Exception as e:
-        output_error = f"Unexpected error: {str(e)}"
-    # Unified error return block
     return (
-        None, # audio_output should be None on error
         gr.update(value="", visible=False), # Hide SRT download link
         gr.update(value="", visible=False), # Hide Audio download link
-        gr.update(value=output_error, visible=True) # Show error message
     )
-# Voice options dictionary
-voice_options = {
-    "Andrew Male": "en-US-AndrewNeural",
-    "Jenny Female": "en-US-JennyNeural",
-    "Guy Male": "en-US-GuyNeural",
-    "Ana Female": "en-US-AnaNeural",
-    "Aria Female": "en-US-AriaNeural",
-    "Brian Male": "en-US-BrianNeural",
-    "Christopher Male": "en-US-ChristopherNeural",
-    "Eric Male": "en-US-EricNeural",
-    "Michelle Male": "en-US-MichelleNeural",
-    "Roger Male": "en-US-RogerNeural",
-    "Natasha Female": "en-AU-NatashaNeural",
-    "William Male": "en-AU-WilliamNeural",
-    "Clara Female": "en-CA-ClaraNeural",
-    "Liam Female ": "en-CA-LiamNeural",
-    "Libby Female": "en-GB-LibbyNeural",
     "Maisie": "en-GB-MaisieNeural",
     "Ryan": "en-GB-RyanNeural",
     "Sonia": "en-GB-SoniaNeural",
@@ -535,15 +566,545 @@ voice_options = {
     "Imani": "en-TZ-ImaniNeural",
     "Leah": "en-ZA-LeahNeural",
     "Luke": "en-ZA-LukeNeural"
-    # Add other voices as needed
 }
-# Register cleanup on exit
-import atexit
 atexit.register(file_manager.cleanup_all)
 # Create Gradio interface
-with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
@@ -552,10 +1113,17 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
-            voice_dropdown = gr.Dropdown(
                 label="Select Voice",
-                choices=list(voice_options.keys()),
-                value="Jenny Female"
             )
             pitch_slider = gr.Slider(
                 label="Pitch Adjustment (Hz)",
@@ -597,28 +1165,39 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
                 value=True,
                 info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
             )
     submit_btn = gr.Button("Generate Audio & Subtitles")
-    # Error message component - initially hidden
     error_output = gr.Textbox(label="Status", visible=False, interactive=False)
     with gr.Row():
         with gr.Column():
             audio_output = gr.Audio(label="Preview Audio")
         with gr.Column():
-            # These are the actual components that will display the links
-            srt_download_link = gr.Markdown(value="", visible=False, label="Download SRT Link")
-            audio_download_link = gr.Markdown(value="", visible=False, label="Download Audio Link")
-    # Handle button click with manual error handling
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
             text_input,
             pitch_slider,
             rate_slider,
-            voice_dropdown,
             words_per_line,
             lines_per_segment,
             parallel_processing

     def get_timing(self, duration):
         start_time = self.current_time
         end_time = start_time + duration
+        self.current_time = end_time + self.segment_gap
         return start_time, end_time
 def get_audio_length(audio_file):
         return lines
 class TTSError(Exception):
     """Custom exception for TTS processing errors"""
     pass
             except Exception:
                 pass  # Ignore deletion errors
 class FileManager:
     """Manages temporary and output files with cleanup capabilities"""
     def __init__(self):
         except Exception:
             pass  # Ignore if directory isn't empty or can't be removed
 file_manager = FileManager()
 async def generate_accurate_srt(
     text: str,
     voice: str,
     total_segments = len(segments)
     processed_segments = []
     if progress_callback:
         progress_callback(0.1, "Text segmentation complete")
     if parallel and total_segments > 1:
         processed_count = 0
         segment_tasks = []
         semaphore = asyncio.Semaphore(max_workers)
         async def process_with_semaphore(segment):
                         progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
                     return result
                 except Exception as e:
                     processed_count += 1
                     if progress_callback:
                         progress = 0.1 + (0.8 * processed_count / total_segments)
                         progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
                     raise
         for segment in segments:
             segment_tasks.append(process_with_semaphore(segment))
         try:
             processed_segments = await asyncio.gather(*segment_tasks)
         except Exception as e:
                 progress_callback(0.9, f"Error during parallel processing: {str(e)}")
             raise TTSError(f"Failed during parallel processing: {str(e)}")
     else:
         for i, segment in enumerate(segments):
             try:
                 processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
                     progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
                 raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
     processed_segments.sort(key=lambda s: s.id)
     if progress_callback:
         progress_callback(0.9, "Finalizing audio and subtitles")
     current_time = 0
     final_audio = AudioSegment.empty()
     srt_content = ""
     for segment in processed_segments:
         segment.start_time = current_time
         segment.end_time = current_time + segment.duration
         srt_content += (
             f"{segment.id}\n"
             f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
             f"{segment.text}\n\n"
         )
         final_audio = final_audio.append(segment.audio, crossfade=0)
         current_time = segment.end_time
     srt_path, audio_path = file_manager.create_output_paths()
     try:
         export_params = {
             'format': 'mp3',
+            'bitrate': '192k',
             'parameters': [
+                '-ar', '44100',
+                '-ac', '2',
+                '-compression_level', '0',
+                '-qscale:a', '2'
             ]
         }
         final_audio.export(audio_path, **export_params)
     return srt_path, audio_path
+# This function is now correctly aligned to return types expected by the UI
 async def process_text_with_progress(
     text,
     pitch,
     parallel_processing,
     progress=gr.Progress()
 ):
+    # Initialize outputs to their default 'hidden' state by providing empty strings
+    # and setting visible=False via gr.update.
+    output_audio = None # gr.Audio expects None or a path
+    output_srt_link_html = gr.update(value="", visible=False) # gr.HTML expects a string
+    output_audio_link_html = gr.update(value="", visible=False) # gr.HTML expects a string
+    output_error_message = gr.update(value="", visible=False) # gr.Textbox expects a string
     # Input validation
     if not text or text.strip() == "":
         return (
             output_audio,
+            output_srt_link_html,
+            output_audio_link_html,
+            gr.update(value="Please enter some text to convert to speech.", visible=True)
         )
     pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
     rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
     try:
         progress(0, "Preparing text...")
         def update_progress(value, status):
             parallel=parallel_processing
         )
+        # Create HTML strings for download links. Gradio serves files using "file=" prefix.
+        srt_download_html = f"""
+        <a href="file={srt_path}" download="subtitles.srt" target="_blank"
+           style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
+           onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
+           onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';">
+           Download SRT File
+        </a>
+        """
+        audio_download_html = f"""
+        <a href="file={audio_path}" download="audio.mp3" target="_blank"
+           style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
+           onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
+           onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';">
+           Download Audio File
+        </a>
+        """
         return (
+            audio_path, # Path for gr.Audio preview
+            gr.update(value=srt_download_html, visible=True), # HTML link for SRT download
+            gr.update(value=audio_download_html, visible=True), # HTML link for Audio download
+            gr.update(value="", visible=False) # Hide error message
         )
     except TTSError as e:
+        error_message = f"TTS Error: {str(e)}"
     except Exception as e:
+        error_message = f"Unexpected error: {str(e)}"
     return (
+        None, # Clear audio output on error
         gr.update(value="", visible=False), # Hide SRT download link
         gr.update(value="", visible=False), # Hide Audio download link
+        gr.update(value=error_message, visible=True) # Show error message
     )
+# This function is not used in the final version of the code, but kept for context from your example.
+def create_download_link(audio_path):
+    if audio_path is None:
+        return None
+    filename = Path(audio_path).name
+    # Gradio handles file serving with "file=" prefix directly, no need for base_url
+    file_url = f"file={audio_path}"
+    return f"""
+    <a href="{file_url}"
+        download="{filename}"
+        target="_blank"
+        rel="noopener noreferrer"
+        style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
+        onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
+        onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';"
+        onclick="event.preventDefault(); fetch(this.href).then(resp => resp.blob()).then(blob => {{
+            const url = window.URL.createObjectURL(blob);
+            const a = document.createElement('a');
+            a.style.display = 'none';
+            a.href = url;
+            a.download = '{filename}';
+            document.body.appendChild(a);
+            a.click();
+            window.URL.revokeObjectURL(url);
+            document.body.removeChild(a);
+        }});">
+        Download Audio File
+    </a>
+    """
+def cleanup_file(filepath, delay=300):
+    def delete_file():
+        try:
+            if os.path.exists(filepath):
+                os.remove(filepath)
+                print(f"Cleaned up file: {filepath}")
+        except Exception as e:
+            print(f"Error cleaning up file {filepath}: {e}")
+    Timer(delay, delete_file).start()
+# --- Voice Options and Gradio Interface ---
+language_dict = {
+    "Hindi": {
+    "Madhur": "hi-IN-MadhurNeural",
+    "Swara": "hi-IN-SwaraNeural"
+  },
+  "English": {
+    "Jenny": "en-US-JennyNeural",
+    "Guy": "en-US-GuyNeural",
+    "Ana": "en-US-AnaNeural",
+    "Aria": "en-US-AriaNeural",
+    "Brian": "en-US-BrianNeural",
+    "Christopher": "en-US-ChristopherNeural",
+    "Eric": "en-US-EricNeural",
+    "Michelle": "en-US-MichelleNeural",
+    "Roger": "en-US-RogerNeural",
+    "Natasha": "en-AU-NatashaNeural",
+    "William": "en-AU-WilliamNeural",
+    "Clara": "en-CA-ClaraNeural",
+    "Liam": "en-CA-LiamNeural",
+    "Libby": "en-GB-LibbyNeural",
     "Maisie": "en-GB-MaisieNeural",
     "Ryan": "en-GB-RyanNeural",
     "Sonia": "en-GB-SoniaNeural",
     "Imani": "en-TZ-ImaniNeural",
     "Leah": "en-ZA-LeahNeural",
     "Luke": "en-ZA-LukeNeural"
+  },
+  "Spanish": {
+    "Elena": "es-AR-ElenaNeural",
+    "Tomas": "es-AR-TomasNeural",
+    "Marcelo": "es-BO-MarceloNeural",
+    "Sofia": "es-BO-SofiaNeural",
+    "Gonzalo": "es-CO-GonzaloNeural",
+    "Salome": "es-CO-SalomeNeural",
+    "Juan": "es-CR-JuanNeural",
+    "Maria": "es-CR-MariaNeural",
+    "Belkys": "es-CU-BelkysNeural",
+    "Emilio": "es-DO-EmilioNeural",
+    "Ramona": "es-DO-RamonaNeural",
+    "Andrea": "es-EC-AndreaNeural",
+    "Luis": "es-EC-LuisNeural",
+    "Alvaro": "es-ES-AlvaroNeural",
+    "Elvira": "es-ES-ElviraNeural",
+    "Teresa": "es-GQ-TeresaNeural",
+    "Andres": "es-GT-AndresNeural",
+    "Marta": "es-GT-MartaNeural",
+    "Carlos": "es-HN-CarlosNeural",
+    "Karla": "es-HN-KarlaNeural",
+    "Federico": "es-NI-FedericoNeural",
+    "Yolanda": "es-NI-YolandaNeural",
+    "Margarita": "es-PA-MargaritaNeural",
+    "Roberto": "es-PA-RobertoNeural",
+    "Alex": "es-PE-AlexNeural",
+    "Camila": "es-PE-CamilaNeural",
+    "Karina": "es-PR-KarinaNeural",
+    "Victor": "es-PR-VictorNeural",
+    "Mario": "es-PY-MarioNeural",
+    "Tania": "es-PY-TaniaNeural",
+    "Lorena": "es-SV-LorenaNeural",
+    "Rodrigo": "es-SV-RodrigoNeural",
+    "Alonso": "es-US-AlonsoNeural",
+    "Paloma": "es-US-PalomaNeural",
+    "Mateo": "es-UY-MateoNeural",
+    "Valentina": "es-UY-ValentinaNeural",
+    "Paola": "es-VE-PaolaNeural",
+    "Sebastian": "es-VE-SebastianNeural"
+  },
+  "Arabic": {
+    "Hamed": "ar-SA-HamedNeural",
+    "Zariyah": "ar-SA-ZariyahNeural",
+    "Fatima": "ar-AE-FatimaNeural",
+    "Hamdan": "ar-AE-HamdanNeural",
+    "Ali": "ar-BH-AliNeural",
+    "Laila": "ar-BH-LailaNeural",
+    "Ismael": "ar-DZ-IsmaelNeural",
+    "Salma": "ar-EG-SalmaNeural",
+    "Shakir": "ar-EG-ShakirNeural",
+    "Bassel": "ar-IQ-BasselNeural",
+    "Rana": "ar-IQ-RanaNeural",
+    "Sana": "ar-JO-SanaNeural",
+    "Taim": "ar-JO-TaimNeural",
+    "Fahed": "ar-KW-FahedNeural",
+    "Noura": "ar-KW-NouraNeural",
+    "Layla": "ar-LB-LaylaNeural",
+    "Rami": "ar-LB-RamiNeural",
+    "Iman": "ar-LY-ImanNeural",
+    "Omar": "ar-LY-OmarNeural",
+    "Jamal": "ar-MA-JamalNeural",
+    "Mouna": "ar-MA-MounaNeural",
+    "Abdullah": "ar-OM-AbdullahNeural",
+    "Aysha": "ar-OM-AyshaNeural",
+    "Amal": "ar-QA-AmalNeural",
+    "Moaz": "ar-QA-MoazNeural",
+    "Amany": "ar-SY-AmanyNeural",
+    "Laith": "ar-SY-LaithNeural",
+    "Hedi": "ar-TN-HediNeural",
+    "Reem": "ar-TN-ReemNeural",
+    "Maryam": "ar-YE-MaryamNeural",
+    "Saleh": "ar-YE-SalehNeural"
+  },
+  "Korean": {
+    "Sun-Hi": "ko-KR-SunHiNeural",
+    "InJoon": "ko-KR-InJoonNeural"
+  },
+  "Thai": {
+    "Premwadee": "th-TH-PremwadeeNeural",
+    "Niwat": "th-TH-NiwatNeural"
+  },
+  "Vietnamese": {
+    "HoaiMy": "vi-VN-HoaiMyNeural",
+    "NamMinh": "vi-VN-NamMinhNeural"
+  },
+  "Japanese": {
+    "Nanami": "ja-JP-NanamiNeural",
+    "Keita": "ja-JP-KeitaNeural"
+  },
+  "French": {
+    "Denise": "fr-FR-DeniseNeural",
+    "Eloise": "fr-FR-EloiseNeural",
+    "Henri": "fr-FR-HenriNeural",
+    "Sylvie": "fr-CA-SylvieNeural",
+    "Antoine": "fr-CA-AntoineNeural",
+    "Jean": "fr-CA-JeanNeural",
+    "Ariane": "fr-CH-ArianeNeural",
+    "Fabrice": "fr-CH-FabriceNeural",
+    "Charline": "fr-BE-CharlineNeural",
+    "Gerard": "fr-BE-GerardNeural"
+  },
+  "Portuguese": {
+    "Francisca": "pt-BR-FranciscaNeural",
+    "Antonio": "pt-BR-AntonioNeural",
+    "Duarte": "pt-PT-DuarteNeural",
+    "Raquel": "pt-PT-RaquelNeural"
+  },
+  "Indonesian": {
+    "Ardi": "id-ID-ArdiNeural",
+    "Gadis": "id-ID-GadisNeural"
+  },
+  "Hebrew": {
+    "Avri": "he-IL-AvriNeural",
+    "Hila": "he-IL-HilaNeural"
+  },
+  "Italian": {
+    "Isabella": "it-IT-IsabellaNeural",
+    "Diego": "it-IT-DiegoNeural",
+    "Elsa": "it-IT-ElsaNeural"
+  },
+  "Dutch": {
+    "Colette": "nl-NL-ColetteNeural",
+    "Fenna": "nl-NL-FennaNeural",
+    "Maarten": "nl-NL-MaartenNeural",
+    "Arnaud": "nl-BE-ArnaudNeural",
+    "Dena": "nl-BE-DenaNeural"
+  },
+  "Malay": {
+    "Osman": "ms-MY-OsmanNeural",
+    "Yasmin": "ms-MY-YasminNeural"
+  },
+  "Norwegian": {
+    "Pernille": "nb-NO-PernilleNeural",
+    "Finn": "nb-NO-FinnNeural"
+  },
+  "Swedish": {
+    "Sofie": "sv-SE-SofieNeural",
+    "Mattias": "sv-SE-MattiasNeural"
+  },
+  "Greek": {
+    "Athina": "el-GR-AthinaNeural",
+    "Nestoras": "el-GR-NestorasNeural"
+  },
+  "German": {
+    "Katja": "de-DE-KatjaNeural",
+    "Amala": "de-DE-AmalaNeural",
+    "Conrad": "de-DE-ConradNeural",
+    "Killian": "de-DE-KillianNeural",
+    "Ingrid": "de-AT-IngridNeural",
+    "Jonas": "de-AT-JonasNeural",
+    "Jan": "de-CH-JanNeural",
+    "Leni": "de-CH-LeniNeural"
+  },
+  "Afrikaans": {
+    "Adri": "af-ZA-AdriNeural",
+    "Willem": "af-ZA-WillemNeural"
+  },
+  "Amharic": {
+    "Ameha": "am-ET-AmehaNeural",
+    "Mekdes": "am-ET-MekdesNeural"
+  },
+  "Azerbaijani": {
+    "Babek": "az-AZ-BabekNeural",
+    "Banu": "az-AZ-BanuNeural"
+  },
+  "Bulgarian": {
+    "Borislav": "bg-BG-BorislavNeural",
+    "Kalina": "bg-BG-KalinaNeural"
+  },
+  "Bengali": {
+    "Nabanita": "bn-BD-NabanitaNeural",
+    "Pradeep": "bn-BD-PradeepNeural",
+    "Bashkar": "bn-IN-BashkarNeural",
+    "Tanishaa": "bn-IN-TanishaaNeural"
+  },
+  "Bosnian": {
+    "Goran": "bs-BA-GoranNeural",
+    "Vesna": "bs-BA-VesnaNeural"
+  },
+  "Catalan": {
+    "Joana": "ca-ES-JoanaNeural",
+    "Enric": "ca-ES-EnricNeural"
+  },
+  "Czech": {
+    "Antonin": "cs-CZ-AntoninNeural",
+    "Vlasta": "cs-CZ-VlastaNeural"
+  },
+  "Welsh": {
+    "Aled": "cy-GB-AledNeural",
+    "Nia": "cy-GB-NiaNeural"
+  },
+  "Danish": {
+    "Christel": "da-DK-ChristelNeural",
+    "Jeppe": "da-DK-JeppeNeural"
+  },
+  "Estonian": {
+    "Anu": "et-EE-AnuNeural",
+    "Kert": "et-EE-KertNeural"
+  },
+  "Persian": {
+    "Dilara": "fa-IR-DilaraNeural",
+    "Farid": "fa-IR-FaridNeural"
+  },
+  "Finnish": {
+    "Harri": "fi-FI-HarriNeural",
+    "Noora": "fi-FI-NooraNeural"
+  },
+  "Irish": {
+    "Colm": "ga-IE-ColmNeural",
+    "Orla": "ga-IE-OrlaNeural"
+  },
+  "Galician": {
+    "Roi": "gl-ES-RoiNeural",
+    "Sabela": "gl-ES-SabelaNeural"
+  },
+  "Gujarati": {
+    "Dhwani": "gu-IN-DhwaniNeural",
+    "Niranjan": "gu-IN-NiranjanNeural"
+  },
+  "Croatian": {
+    "Gabrijela": "hr-HR-GabrijelaNeural",
+    "Srecko": "hr-HR-SreckoNeural"
+  },
+  "Hungarian": {
+    "Noemi": "hu-HU-NoemiNeural",
+    "Tamas": "hu-HU-TamasNeural"
+  },
+  "Icelandic": {
+    "Gudrun": "is-IS-GudrunNeural",
+    "Gunnar": "is-IS-GunnarNeural"
+  },
+  "Javanese": {
+    "Dimas": "jv-ID-DimasNeural",
+    "Siti": "jv-ID-SitiNeural"
+  },
+  "Georgian": {
+    "Eka": "ka-GE-EkaNeural",
+    "Giorgi": "ka-GE-GiorgiNeural"
+  },
+  "Kazakh": {
+    "Aigul": "kk-KZ-AigulNeural",
+    "Daulet": "kk-KZ-DauletNeural"
+  },
+  "Khmer": {
+    "Piseth": "km-KH-PisethNeural",
+    "Sreymom": "km-KH-SreymomNeural"
+  },
+  "Kannada": {
+    "Gagan": "kn-IN-GaganNeural",
+    "Sapna": "kn-IN-SapnaNeural"
+  },
+  "Lao": {
+    "Chanthavong": "lo-LA-ChanthavongNeural",
+    "Keomany": "lo-LA-KeomanyNeural"
+  },
+  "Lithuanian": {
+    "Leonas": "lt-LT-LeonasNeural",
+    "Ona": "lt-LT-OnaNeural"
+  },
+  "Latvian": {
+    "Everita": "lv-LV-EveritaNeural",
+    "Nils": "lv-LV-NilsNeural"
+  },
+  "Macedonian": {
+    "Aleksandar": "mk-MK-AleksandarNeural",
+    "Marija": "mk-MK-MarijaNeural"
+  },
+  "Malayalam": {
+    "Midhun": "ml-IN-MidhunNeural",
+    "Sobhana": "ml-IN-SobhanaNeural"
+  },
+  "Mongolian": {
+    "Bataa": "mn-MN-BataaNeural",
+    "Yesui": "mn-MN-YesuiNeural"
+  },
+  "Marathi": {
+    "Aarohi": "mr-IN-AarohiNeural",
+    "Manohar": "mr-IN-ManoharNeural"
+  },
+  "Maltese": {
+    "Grace": "mt-MT-GraceNeural",
+    "Joseph": "mt-MT-JosephNeural"
+  },
+  "Burmese": {
+    "Nilar": "my-MM-NilarNeural",
+    "Thiha": "my-MM-ThihaNeural"
+  },
+  "Nepali": {
+    "Hemkala": "ne-NP-HemkalaNeural",
+    "Sagar": "ne-NP-SagarNeural"
+  },
+  "Polish": {
+    "Marek": "pl-PL-MarekNeural",
+    "Zofia": "pl-PL-ZofiaNeural"
+  },
+  "Pashto": {
+    "Gul Nawaz": "ps-AF-GulNawazNeural",
+    "Latifa": "ps-AF-LatifaNeural"
+  },
+  "Romanian": {
+    "Alina": "ro-RO-AlinaNeural",
+    "Emil": "ro-RO-EmilNeural"
+  },
+  "Russian": {
+    "Svetlana": "ru-RU-SvetlanaNeural",
+    "Dmitry": "ru-RU-DmitryNeural"
+  },
+  "Sinhala": {
+    "Sameera": "si-LK-SameeraNeural",
+    "Thilini": "si-LK-ThiliniNeural"
+  },
+  "Slovak": {
+    "Lukas": "sk-SK-LukasNeural",
+    "Viktoria": "sk-SK-ViktoriaNeural"
+  },
+  "Slovenian": {
+    "Petra": "sl-SI-PetraNeural",
+    "Rok": "sl-SI-RokNeural"
+  },
+  "Somali": {
+    "Muuse": "so-SO-MuuseNeural",
+    "Ubax": "so-SO-UbaxNeural"
+  },
+  "Albanian": {
+    "Anila": "sq-AL-AnilaNeural",
+    "Ilir": "sq-AL-IlirNeural"
+  },
+  "Serbian": {
+    "Nicholas": "sr-RS-NicholasNeural",
+    "Sophie": "sr-RS-SophieNeural"
+  },
+  "Sundanese": {
+    "Jajang": "su-ID-JajangNeural",
+    "Tuti": "su-ID-TutiNeural"
+  },
+  "Swahili": {
+    "Rafiki": "sw-KE-RafikiNeural",
+    "Zuri": "sw-KE-ZuriNeural",
+    "Daudi": "sw-TZ-DaudiNeural",
+    "Rehema": "sw-TZ-RehemaNeural"
+    },
+  "Tamil": {
+    "Pallavi": "ta-IN-PallaviNeural",
+    "Valluvar": "ta-IN-ValluvarNeural",
+    "Kumar": "ta-LK-KumarNeural",
+    "Saranya": "ta-LK-SaranyaNeural",
+    "Kani": "ta-MY-KaniNeural",
+    "Surya": "ta-MY-SuryaNeural",
+    "Anbu": "ta-SG-AnbuNeural"
+  },
+  "Telugu": {
+    "Mohan": "te-IN-MohanNeural",
+    "Shruti": "te-IN-ShrutiNeural"
+  },
+  "Turkish": {
+    "Ahmet": "tr-TR-AhmetNeural",
+    "Emel": "tr-TR-EmelNeural"
+  },
+  "Ukrainian": {
+    "Ostap": "uk-UA-OstapNeural",
+    "Polina": "uk-UA-PolinaNeural"
+  },
+  "Urdu": {
+    "Gul": "ur-IN-GulNeural",
+    "Salman": "ur-IN-SalmanNeural",
+    "Asad": "ur-PK-AsadNeural",
+    "Uzma": "ur-PK-UzmaNeural"
+  },
+  "Uzbek": {
+    "Madina": "uz-UZ-MadinaNeural",
+    "Sardor": "uz-UZ-SardorNeural"
+  },
+  "Mandarin": {
+    "Xiaoxiao": "zh-CN-XiaoxiaoNeural",
+    "Yunyang": "zh-CN-YunyangNeural",
+    "Yunxi": "zh-CN-YunxiNeural",
+    "Xiaoyi": "zh-CN-XiaoyiNeural",
+    "Yunjian": "zh-CN-YunjianNeural",
+    "Yunxia": "zh-CN-YunxiaNeural",
+    "Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
+    "Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
+    "HiuMaan": "zh-HK-HiuMaanNeural",
+    "HiuGaai": "zh-HK-HiuGaaiNeural",
+    "WanLung": "zh-HK-WanLungNeural",
+    "HsiaoChen": "zh-TW-HsiaoChenNeural",
+    "HsiaoYu": "zh-TW-HsiaoYuNeural",
+    "YunJhe": "zh-TW-YunJheNeural"
+  },
+  "Zulu": {
+    "Thando": "zu-ZA-ThandoNeural",
+    "Themba": "zu-ZA-ThembaNeural"
+  }
 }
+# Ensure these have initial values, even if temporary
+default_language = "English"
+default_speaker = language_dict[default_language][list(language_dict[default_language].keys())[0]] # Set to first English speaker
+def get_speakers(language):
+    speakers = list(language_dict[language].keys())
+    # Return gr.update to set choices and selected value
+    return gr.update(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
 atexit.register(file_manager.cleanup_all)
 # Create Gradio interface
+with gr.Blocks(title="Advanced TTS with Configurable SRT Generation",
+    css="""
+        :root {
+            --primary-color: #4776E6;
+            --secondary-color: #8E54E9;
+            --background-light: #ffffff;
+            --card-light: #f8f9fa;
+            --text-dark: #2d3436;
+            --text-gray: #636e72;
+            --border-color: #e0e0e0;
+        }
+        @media (max-width: 768px) {
+            .container {
+                padding: 10px !important;
+            }
+            .header h1 {
+                font-size: 1.5em !important;
+            }
+        }
+        body {
+            background-color: var(--background-light);
+        }
+        .container {
+            background-color: var(--background-light);
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        .header {
+            text-align: center;
+            margin-bottom: 30px;
+            background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+            padding: 25px;
+            border-radius: 15px;
+            color: white;
+            box-shadow: 0 4px 15px rgba(71, 118, 230, 0.2);
+        }
+        .input-section, .output-section {
+            background-color: var(--card-light);
+            padding: 25px;
+            border-radius: 15px;
+            margin-bottom: 20px;
+            box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
+            border: 1px solid var(--border-color);
+            width: 100%;
+        }
+        .input-box textarea {
+            min-height: 120px !important;
+            font-size: 16px !important;
+            border: 1px solid var(--border-color) !important;
+            border-radius: 10px !important;
+            padding: 15px !important;
+            width: 100% !important;
+        }
+        .dropdown {
+            width: 100% !important;
+        }
+        select, input[type="text"] {
+            width: 100% !important;
+            padding: 12px !important;
+            border-radius: 8px !important;
+            border: 1px solid var(--border-color) !important;
+        }
+        .generate-btn {
+            background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)) !important;
+            padding: 15px 30px !important;
+            border-radius: 10px !important;
+            font-weight: 600 !important;
+            letter-spacing: 0.5px !important;
+            width: 100% !important;
+            margin-top: 15px !important;
+        }
+        .generate-btn:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 5px 15px rgba(71, 118, 230, 0.3) !important;
+        }
+        .download-btn {
+            margin-top: 20px;
+            text-align: center;
+        }
+        .download-btn a {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
+            color: white;
+            padding: 12px 25px;
+            border-radius: 10px;
+            text-decoration: none;
+            font-weight: 600;
+            letter-spacing: 0.5px;
+            transition: all 0.3s ease;
+            gap: 8px;
+            width: 100%;
+            max-width: 300px;
+        }
+        .download-btn a:before {
+            content: "⬇️";
+            font-size: 1.2em;
+        }
+        .download-btn a:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 5px 15px rgba(71, 118, 230, 0.3);
+        }
+        /* Audio player styling */
+        audio {
+            width: 100% !important;
+            margin: 15px 0 !important;
+            border-radius: 10px !important;
+        }
+        /* Hide output text - this CSS is from your original file, ensure it's intentional */
+        #output-text {
+            display: none !important;
+        }
+    """
+) as app: # Changed demo to app for consistency
     gr.Markdown("# Advanced TTS with Configurable SRT Generation")
     gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
             text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
         with gr.Column(scale=2):
+            language = gr.Dropdown( # Changed to language for consistency
+                label="Select Language",
+                choices=list(language_dict.keys()),
+                value=default_language,
+                interactive=True
+            )
+            speaker = gr.Dropdown( # Changed to speaker for consistency
                 label="Select Voice",
+                choices=list(language_dict[default_language].keys()), # Initialize with default language's speakers
+                value=list(language_dict[default_language].keys())[0], # Default to first speaker of default language
+                interactive=True # Should be interactive if it changes based on language
             )
             pitch_slider = gr.Slider(
                 label="Pitch Adjustment (Hz)",
                 value=True,
                 info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
             )
+            tashkeel_checkbox = gr.Checkbox( # Moved here for better layout
+                label="Tashkeel (Arabic Only)",
+                value=False,
+                visible=False, # Initially hidden
+                interactive=True
+            )
     submit_btn = gr.Button("Generate Audio & Subtitles")
     error_output = gr.Textbox(label="Status", visible=False, interactive=False)
     with gr.Row():
         with gr.Column():
             audio_output = gr.Audio(label="Preview Audio")
         with gr.Column():
+            # Use gr.HTML for download links
+            srt_download_link = gr.HTML(value="", visible=False, label="Download SRT")
+            audio_download_link = gr.HTML(value="", visible=False, label="Download Audio")
+    # Event Handlers
+    language.change(
+        fn=get_speakers,
+        inputs=[language],
+        outputs=[speaker, tashkeel_checkbox] # Ensure correct output for dropdown and checkbox
+    )
     submit_btn.click(
         fn=process_text_with_progress,
         inputs=[
             text_input,
             pitch_slider,
             rate_slider,
+            speaker, # Use 'speaker' here as it holds the actual voice code
             words_per_line,
             lines_per_segment,
             parallel_processing