hivecorp commited on
Commit
f2939e1
·
verified ·
1 Parent(s): 62612dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +668 -89
app.py CHANGED
@@ -20,7 +20,7 @@ class TimingManager:
20
  def get_timing(self, duration):
21
  start_time = self.current_time
22
  end_time = start_time + duration
23
- self.current_time = end_time + duration + self.segment_gap # Ensure a small gap between segments
24
  return start_time, end_time
25
 
26
  def get_audio_length(audio_file):
@@ -184,7 +184,6 @@ class TextProcessor:
184
 
185
  return lines
186
 
187
- # IMPROVEMENT 1: Enhanced Error Handling
188
  class TTSError(Exception):
189
  """Custom exception for TTS processing errors"""
190
  pass
@@ -226,7 +225,6 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
226
  except Exception:
227
  pass # Ignore deletion errors
228
 
229
- # IMPROVEMENT 2: Better File Management with cleanup
230
  class FileManager:
231
  """Manages temporary and output files with cleanup capabilities"""
232
  def __init__(self):
@@ -281,10 +279,8 @@ class FileManager:
281
  except Exception:
282
  pass # Ignore if directory isn't empty or can't be removed
283
 
284
- # Create global file manager
285
  file_manager = FileManager()
286
 
287
- # IMPROVEMENT 3: Parallel Processing for Segments
288
  async def generate_accurate_srt(
289
  text: str,
290
  voice: str,
@@ -303,16 +299,12 @@ async def generate_accurate_srt(
303
  total_segments = len(segments)
304
  processed_segments = []
305
 
306
- # Update progress to show segmentation is complete
307
  if progress_callback:
308
  progress_callback(0.1, "Text segmentation complete")
309
 
310
  if parallel and total_segments > 1:
311
- # Process segments in parallel
312
  processed_count = 0
313
  segment_tasks = []
314
-
315
- # Create a semaphore to limit concurrent tasks
316
  semaphore = asyncio.Semaphore(max_workers)
317
 
318
  async def process_with_semaphore(segment):
@@ -326,18 +318,15 @@ async def generate_accurate_srt(
326
  progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
327
  return result
328
  except Exception as e:
329
- # Handle errors in individual segments
330
  processed_count += 1
331
  if progress_callback:
332
  progress = 0.1 + (0.8 * processed_count / total_segments)
333
  progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
334
  raise
335
 
336
- # Create tasks for all segments
337
  for segment in segments:
338
  segment_tasks.append(process_with_semaphore(segment))
339
 
340
- # Run all tasks and collect results
341
  try:
342
  processed_segments = await asyncio.gather(*segment_tasks)
343
  except Exception as e:
@@ -345,7 +334,6 @@ async def generate_accurate_srt(
345
  progress_callback(0.9, f"Error during parallel processing: {str(e)}")
346
  raise TTSError(f"Failed during parallel processing: {str(e)}")
347
  else:
348
- # Process segments sequentially (original method)
349
  for i, segment in enumerate(segments):
350
  try:
351
  processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
@@ -359,48 +347,39 @@ async def generate_accurate_srt(
359
  progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
360
  raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
361
 
362
- # Sort segments by ID to ensure correct order
363
  processed_segments.sort(key=lambda s: s.id)
364
 
365
  if progress_callback:
366
  progress_callback(0.9, "Finalizing audio and subtitles")
367
 
368
- # Now combine the segments in the correct order
369
  current_time = 0
370
  final_audio = AudioSegment.empty()
371
  srt_content = ""
372
 
373
  for segment in processed_segments:
374
- # Calculate precise timing
375
  segment.start_time = current_time
376
  segment.end_time = current_time + segment.duration
377
 
378
- # Add to SRT with precise timing
379
  srt_content += (
380
  f"{segment.id}\n"
381
  f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
382
  f"{segment.text}\n\n"
383
  )
384
 
385
- # Add to final audio with precise positioning
386
  final_audio = final_audio.append(segment.audio, crossfade=0)
387
-
388
- # Update timing with precise gap
389
  current_time = segment.end_time
390
 
391
- # Export with high precision
392
  srt_path, audio_path = file_manager.create_output_paths()
393
 
394
  try:
395
- # Export with optimized quality settings and compression
396
  export_params = {
397
  'format': 'mp3',
398
- 'bitrate': '192k', # Reduced from 320k but still high quality
399
  'parameters': [
400
- '-ar', '44100', # Standard sample rate
401
- '-ac', '2', # Stereo
402
- '-compression_level', '0', # Best compression
403
- '-qscale:a', '2' # High quality VBR encoding
404
  ]
405
  }
406
  final_audio.export(audio_path, **export_params)
@@ -417,6 +396,7 @@ async def generate_accurate_srt(
417
 
418
  return srt_path, audio_path
419
 
 
420
  async def process_text_with_progress(
421
  text,
422
  pitch,
@@ -427,31 +407,26 @@ async def process_text_with_progress(
427
  parallel_processing,
428
  progress=gr.Progress()
429
  ):
430
- # Initialize outputs to their default 'hidden' state
431
- # gr.Audio expects None or a path.
432
- # gr.Markdown expects a string. An empty string effectively hides the content.
433
- output_audio = None
434
- output_srt_link = ""
435
- output_audio_link = ""
436
- output_error = ""
437
 
438
  # Input validation
439
  if not text or text.strip() == "":
440
- output_error = "Please enter some text to convert to speech."
441
- # Update visibility of error_output only when an error occurs
442
  return (
443
  output_audio,
444
- output_srt_link,
445
- output_audio_link,
446
- gr.update(value=output_error, visible=True)
447
  )
448
 
449
- # Format pitch and rate strings
450
  pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
451
  rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
452
 
453
  try:
454
- # Start progress tracking
455
  progress(0, "Preparing text...")
456
 
457
  def update_progress(value, status):
@@ -468,50 +443,106 @@ async def process_text_with_progress(
468
  parallel=parallel_processing
469
  )
470
 
471
- # Generate Markdown links for download that open in a new tab
472
- output_srt_link = f'<a href="file={srt_path}" download="subtitles.srt" target="_blank">Download SRT</a>'
473
- output_audio_link = f'<a href="file={audio_path}" download="audio.mp3" target="_blank">Download Audio</a>'
474
- output_audio = audio_path # For the gr.Audio preview component
475
- output_error = "" # Clear any previous error
476
-
477
- # Return updated values. Note: No explicit `visible=True` for Markdown here.
478
- # The presence of content makes them appear.
 
 
 
 
 
 
 
 
 
 
479
  return (
480
- output_audio,
481
- gr.update(value=output_srt_link, visible=True), # Now explicit visible=True here
482
- gr.update(value=output_audio_link, visible=True), # Now explicit visible=True here
483
- gr.update(value=output_error, visible=False) # Hide error message on success
484
  )
485
  except TTSError as e:
486
- output_error = f"TTS Error: {str(e)}"
487
  except Exception as e:
488
- output_error = f"Unexpected error: {str(e)}"
489
 
490
- # Unified error return block
491
  return (
492
- None, # audio_output should be None on error
493
  gr.update(value="", visible=False), # Hide SRT download link
494
  gr.update(value="", visible=False), # Hide Audio download link
495
- gr.update(value=output_error, visible=True) # Show error message
496
  )
497
 
498
- # Voice options dictionary
499
- voice_options = {
500
- "Andrew Male": "en-US-AndrewNeural",
501
- "Jenny Female": "en-US-JennyNeural",
502
- "Guy Male": "en-US-GuyNeural",
503
- "Ana Female": "en-US-AnaNeural",
504
- "Aria Female": "en-US-AriaNeural",
505
- "Brian Male": "en-US-BrianNeural",
506
- "Christopher Male": "en-US-ChristopherNeural",
507
- "Eric Male": "en-US-EricNeural",
508
- "Michelle Male": "en-US-MichelleNeural",
509
- "Roger Male": "en-US-RogerNeural",
510
- "Natasha Female": "en-AU-NatashaNeural",
511
- "William Male": "en-AU-WilliamNeural",
512
- "Clara Female": "en-CA-ClaraNeural",
513
- "Liam Female ": "en-CA-LiamNeural",
514
- "Libby Female": "en-GB-LibbyNeural",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  "Maisie": "en-GB-MaisieNeural",
516
  "Ryan": "en-GB-RyanNeural",
517
  "Sonia": "en-GB-SoniaNeural",
@@ -535,15 +566,545 @@ voice_options = {
535
  "Imani": "en-TZ-ImaniNeural",
536
  "Leah": "en-ZA-LeahNeural",
537
  "Luke": "en-ZA-LukeNeural"
538
- # Add other voices as needed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  }
540
 
541
- # Register cleanup on exit
542
- import atexit
 
 
 
 
 
 
 
543
  atexit.register(file_manager.cleanup_all)
544
 
545
  # Create Gradio interface
546
- with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  gr.Markdown("# Advanced TTS with Configurable SRT Generation")
548
  gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
549
 
@@ -552,10 +1113,17 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
552
  text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
553
 
554
  with gr.Column(scale=2):
555
- voice_dropdown = gr.Dropdown(
 
 
 
 
 
 
556
  label="Select Voice",
557
- choices=list(voice_options.keys()),
558
- value="Jenny Female"
 
559
  )
560
  pitch_slider = gr.Slider(
561
  label="Pitch Adjustment (Hz)",
@@ -597,28 +1165,39 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
597
  value=True,
598
  info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
599
  )
 
 
 
 
 
 
600
 
601
  submit_btn = gr.Button("Generate Audio & Subtitles")
602
 
603
- # Error message component - initially hidden
604
  error_output = gr.Textbox(label="Status", visible=False, interactive=False)
605
 
606
  with gr.Row():
607
  with gr.Column():
608
  audio_output = gr.Audio(label="Preview Audio")
609
  with gr.Column():
610
- # These are the actual components that will display the links
611
- srt_download_link = gr.Markdown(value="", visible=False, label="Download SRT Link")
612
- audio_download_link = gr.Markdown(value="", visible=False, label="Download Audio Link")
613
-
614
- # Handle button click with manual error handling
 
 
 
 
 
 
615
  submit_btn.click(
616
  fn=process_text_with_progress,
617
  inputs=[
618
  text_input,
619
  pitch_slider,
620
  rate_slider,
621
- voice_dropdown,
622
  words_per_line,
623
  lines_per_segment,
624
  parallel_processing
 
20
  def get_timing(self, duration):
21
  start_time = self.current_time
22
  end_time = start_time + duration
23
+ self.current_time = end_time + self.segment_gap
24
  return start_time, end_time
25
 
26
  def get_audio_length(audio_file):
 
184
 
185
  return lines
186
 
 
187
  class TTSError(Exception):
188
  """Custom exception for TTS processing errors"""
189
  pass
 
225
  except Exception:
226
  pass # Ignore deletion errors
227
 
 
228
  class FileManager:
229
  """Manages temporary and output files with cleanup capabilities"""
230
  def __init__(self):
 
279
  except Exception:
280
  pass # Ignore if directory isn't empty or can't be removed
281
 
 
282
  file_manager = FileManager()
283
 
 
284
  async def generate_accurate_srt(
285
  text: str,
286
  voice: str,
 
299
  total_segments = len(segments)
300
  processed_segments = []
301
 
 
302
  if progress_callback:
303
  progress_callback(0.1, "Text segmentation complete")
304
 
305
  if parallel and total_segments > 1:
 
306
  processed_count = 0
307
  segment_tasks = []
 
 
308
  semaphore = asyncio.Semaphore(max_workers)
309
 
310
  async def process_with_semaphore(segment):
 
318
  progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
319
  return result
320
  except Exception as e:
 
321
  processed_count += 1
322
  if progress_callback:
323
  progress = 0.1 + (0.8 * processed_count / total_segments)
324
  progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
325
  raise
326
 
 
327
  for segment in segments:
328
  segment_tasks.append(process_with_semaphore(segment))
329
 
 
330
  try:
331
  processed_segments = await asyncio.gather(*segment_tasks)
332
  except Exception as e:
 
334
  progress_callback(0.9, f"Error during parallel processing: {str(e)}")
335
  raise TTSError(f"Failed during parallel processing: {str(e)}")
336
  else:
 
337
  for i, segment in enumerate(segments):
338
  try:
339
  processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
 
347
  progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
348
  raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
349
 
 
350
  processed_segments.sort(key=lambda s: s.id)
351
 
352
  if progress_callback:
353
  progress_callback(0.9, "Finalizing audio and subtitles")
354
 
 
355
  current_time = 0
356
  final_audio = AudioSegment.empty()
357
  srt_content = ""
358
 
359
  for segment in processed_segments:
 
360
  segment.start_time = current_time
361
  segment.end_time = current_time + segment.duration
362
 
 
363
  srt_content += (
364
  f"{segment.id}\n"
365
  f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
366
  f"{segment.text}\n\n"
367
  )
368
 
 
369
  final_audio = final_audio.append(segment.audio, crossfade=0)
 
 
370
  current_time = segment.end_time
371
 
 
372
  srt_path, audio_path = file_manager.create_output_paths()
373
 
374
  try:
 
375
  export_params = {
376
  'format': 'mp3',
377
+ 'bitrate': '192k',
378
  'parameters': [
379
+ '-ar', '44100',
380
+ '-ac', '2',
381
+ '-compression_level', '0',
382
+ '-qscale:a', '2'
383
  ]
384
  }
385
  final_audio.export(audio_path, **export_params)
 
396
 
397
  return srt_path, audio_path
398
 
399
+ # This function is now correctly aligned to return types expected by the UI
400
  async def process_text_with_progress(
401
  text,
402
  pitch,
 
407
  parallel_processing,
408
  progress=gr.Progress()
409
  ):
410
+ # Initialize outputs to their default 'hidden' state by providing empty strings
411
+ # and setting visible=False via gr.update.
412
+ output_audio = None # gr.Audio expects None or a path
413
+ output_srt_link_html = gr.update(value="", visible=False) # gr.HTML expects a string
414
+ output_audio_link_html = gr.update(value="", visible=False) # gr.HTML expects a string
415
+ output_error_message = gr.update(value="", visible=False) # gr.Textbox expects a string
 
416
 
417
  # Input validation
418
  if not text or text.strip() == "":
 
 
419
  return (
420
  output_audio,
421
+ output_srt_link_html,
422
+ output_audio_link_html,
423
+ gr.update(value="Please enter some text to convert to speech.", visible=True)
424
  )
425
 
 
426
  pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
427
  rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
428
 
429
  try:
 
430
  progress(0, "Preparing text...")
431
 
432
  def update_progress(value, status):
 
443
  parallel=parallel_processing
444
  )
445
 
446
+ # Create HTML strings for download links. Gradio serves files using "file=" prefix.
447
+ srt_download_html = f"""
448
+ <a href="file={srt_path}" download="subtitles.srt" target="_blank"
449
+ style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
450
+ onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
451
+ onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';">
452
+ Download SRT File
453
+ </a>
454
+ """
455
+ audio_download_html = f"""
456
+ <a href="file={audio_path}" download="audio.mp3" target="_blank"
457
+ style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
458
+ onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
459
+ onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';">
460
+ Download Audio File
461
+ </a>
462
+ """
463
+
464
  return (
465
+ audio_path, # Path for gr.Audio preview
466
+ gr.update(value=srt_download_html, visible=True), # HTML link for SRT download
467
+ gr.update(value=audio_download_html, visible=True), # HTML link for Audio download
468
+ gr.update(value="", visible=False) # Hide error message
469
  )
470
  except TTSError as e:
471
+ error_message = f"TTS Error: {str(e)}"
472
  except Exception as e:
473
+ error_message = f"Unexpected error: {str(e)}"
474
 
 
475
  return (
476
+ None, # Clear audio output on error
477
  gr.update(value="", visible=False), # Hide SRT download link
478
  gr.update(value="", visible=False), # Hide Audio download link
479
+ gr.update(value=error_message, visible=True) # Show error message
480
  )
481
 
482
+ # This function is not used in the final version of the code, but kept for context from your example.
483
+ def create_download_link(audio_path):
484
+ if audio_path is None:
485
+ return None
486
+
487
+ filename = Path(audio_path).name
488
+ # Gradio handles file serving with "file=" prefix directly, no need for base_url
489
+ file_url = f"file={audio_path}"
490
+
491
+ return f"""
492
+ <a href="{file_url}"
493
+ download="{filename}"
494
+ target="_blank"
495
+ rel="noopener noreferrer"
496
+ style="display: inline-block; padding: 10px 20px; background: linear-gradient(135deg, #4776E6, #8E54E9); color: white; text-decoration: none; border-radius: 8px; font-weight: 600; transition: all 0.3s ease;"
497
+ onmouseover="this.style.transform='translateY(-2px)'; this.style.boxShadow='0 5px 15px rgba(71, 118, 230, 0.3)';"
498
+ onmouseout="this.style.transform='translateY(0)'; this.style.boxShadow='none';"
499
+ onclick="event.preventDefault(); fetch(this.href).then(resp => resp.blob()).then(blob => {{
500
+ const url = window.URL.createObjectURL(blob);
501
+ const a = document.createElement('a');
502
+ a.style.display = 'none';
503
+ a.href = url;
504
+ a.download = '{filename}';
505
+ document.body.appendChild(a);
506
+ a.click();
507
+ window.URL.revokeObjectURL(url);
508
+ document.body.removeChild(a);
509
+ }});">
510
+ Download Audio File
511
+ </a>
512
+ """
513
+
514
+ def cleanup_file(filepath, delay=300):
515
+ def delete_file():
516
+ try:
517
+ if os.path.exists(filepath):
518
+ os.remove(filepath)
519
+ print(f"Cleaned up file: {filepath}")
520
+ except Exception as e:
521
+ print(f"Error cleaning up file {filepath}: {e}")
522
+
523
+ Timer(delay, delete_file).start()
524
+
525
+ # --- Voice Options and Gradio Interface ---
526
+ language_dict = {
527
+ "Hindi": {
528
+ "Madhur": "hi-IN-MadhurNeural",
529
+ "Swara": "hi-IN-SwaraNeural"
530
+ },
531
+ "English": {
532
+ "Jenny": "en-US-JennyNeural",
533
+ "Guy": "en-US-GuyNeural",
534
+ "Ana": "en-US-AnaNeural",
535
+ "Aria": "en-US-AriaNeural",
536
+ "Brian": "en-US-BrianNeural",
537
+ "Christopher": "en-US-ChristopherNeural",
538
+ "Eric": "en-US-EricNeural",
539
+ "Michelle": "en-US-MichelleNeural",
540
+ "Roger": "en-US-RogerNeural",
541
+ "Natasha": "en-AU-NatashaNeural",
542
+ "William": "en-AU-WilliamNeural",
543
+ "Clara": "en-CA-ClaraNeural",
544
+ "Liam": "en-CA-LiamNeural",
545
+ "Libby": "en-GB-LibbyNeural",
546
  "Maisie": "en-GB-MaisieNeural",
547
  "Ryan": "en-GB-RyanNeural",
548
  "Sonia": "en-GB-SoniaNeural",
 
566
  "Imani": "en-TZ-ImaniNeural",
567
  "Leah": "en-ZA-LeahNeural",
568
  "Luke": "en-ZA-LukeNeural"
569
+ },
570
+ "Spanish": {
571
+ "Elena": "es-AR-ElenaNeural",
572
+ "Tomas": "es-AR-TomasNeural",
573
+ "Marcelo": "es-BO-MarceloNeural",
574
+ "Sofia": "es-BO-SofiaNeural",
575
+ "Gonzalo": "es-CO-GonzaloNeural",
576
+ "Salome": "es-CO-SalomeNeural",
577
+ "Juan": "es-CR-JuanNeural",
578
+ "Maria": "es-CR-MariaNeural",
579
+ "Belkys": "es-CU-BelkysNeural",
580
+ "Emilio": "es-DO-EmilioNeural",
581
+ "Ramona": "es-DO-RamonaNeural",
582
+ "Andrea": "es-EC-AndreaNeural",
583
+ "Luis": "es-EC-LuisNeural",
584
+ "Alvaro": "es-ES-AlvaroNeural",
585
+ "Elvira": "es-ES-ElviraNeural",
586
+ "Teresa": "es-GQ-TeresaNeural",
587
+ "Andres": "es-GT-AndresNeural",
588
+ "Marta": "es-GT-MartaNeural",
589
+ "Carlos": "es-HN-CarlosNeural",
590
+ "Karla": "es-HN-KarlaNeural",
591
+ "Federico": "es-NI-FedericoNeural",
592
+ "Yolanda": "es-NI-YolandaNeural",
593
+ "Margarita": "es-PA-MargaritaNeural",
594
+ "Roberto": "es-PA-RobertoNeural",
595
+ "Alex": "es-PE-AlexNeural",
596
+ "Camila": "es-PE-CamilaNeural",
597
+ "Karina": "es-PR-KarinaNeural",
598
+ "Victor": "es-PR-VictorNeural",
599
+ "Mario": "es-PY-MarioNeural",
600
+ "Tania": "es-PY-TaniaNeural",
601
+ "Lorena": "es-SV-LorenaNeural",
602
+ "Rodrigo": "es-SV-RodrigoNeural",
603
+ "Alonso": "es-US-AlonsoNeural",
604
+ "Paloma": "es-US-PalomaNeural",
605
+ "Mateo": "es-UY-MateoNeural",
606
+ "Valentina": "es-UY-ValentinaNeural",
607
+ "Paola": "es-VE-PaolaNeural",
608
+ "Sebastian": "es-VE-SebastianNeural"
609
+ },
610
+ "Arabic": {
611
+ "Hamed": "ar-SA-HamedNeural",
612
+ "Zariyah": "ar-SA-ZariyahNeural",
613
+ "Fatima": "ar-AE-FatimaNeural",
614
+ "Hamdan": "ar-AE-HamdanNeural",
615
+ "Ali": "ar-BH-AliNeural",
616
+ "Laila": "ar-BH-LailaNeural",
617
+ "Ismael": "ar-DZ-IsmaelNeural",
618
+ "Salma": "ar-EG-SalmaNeural",
619
+ "Shakir": "ar-EG-ShakirNeural",
620
+ "Bassel": "ar-IQ-BasselNeural",
621
+ "Rana": "ar-IQ-RanaNeural",
622
+ "Sana": "ar-JO-SanaNeural",
623
+ "Taim": "ar-JO-TaimNeural",
624
+ "Fahed": "ar-KW-FahedNeural",
625
+ "Noura": "ar-KW-NouraNeural",
626
+ "Layla": "ar-LB-LaylaNeural",
627
+ "Rami": "ar-LB-RamiNeural",
628
+ "Iman": "ar-LY-ImanNeural",
629
+ "Omar": "ar-LY-OmarNeural",
630
+ "Jamal": "ar-MA-JamalNeural",
631
+ "Mouna": "ar-MA-MounaNeural",
632
+ "Abdullah": "ar-OM-AbdullahNeural",
633
+ "Aysha": "ar-OM-AyshaNeural",
634
+ "Amal": "ar-QA-AmalNeural",
635
+ "Moaz": "ar-QA-MoazNeural",
636
+ "Amany": "ar-SY-AmanyNeural",
637
+ "Laith": "ar-SY-LaithNeural",
638
+ "Hedi": "ar-TN-HediNeural",
639
+ "Reem": "ar-TN-ReemNeural",
640
+ "Maryam": "ar-YE-MaryamNeural",
641
+ "Saleh": "ar-YE-SalehNeural"
642
+ },
643
+ "Korean": {
644
+ "Sun-Hi": "ko-KR-SunHiNeural",
645
+ "InJoon": "ko-KR-InJoonNeural"
646
+ },
647
+ "Thai": {
648
+ "Premwadee": "th-TH-PremwadeeNeural",
649
+ "Niwat": "th-TH-NiwatNeural"
650
+ },
651
+ "Vietnamese": {
652
+ "HoaiMy": "vi-VN-HoaiMyNeural",
653
+ "NamMinh": "vi-VN-NamMinhNeural"
654
+ },
655
+ "Japanese": {
656
+ "Nanami": "ja-JP-NanamiNeural",
657
+ "Keita": "ja-JP-KeitaNeural"
658
+ },
659
+ "French": {
660
+ "Denise": "fr-FR-DeniseNeural",
661
+ "Eloise": "fr-FR-EloiseNeural",
662
+ "Henri": "fr-FR-HenriNeural",
663
+ "Sylvie": "fr-CA-SylvieNeural",
664
+ "Antoine": "fr-CA-AntoineNeural",
665
+ "Jean": "fr-CA-JeanNeural",
666
+ "Ariane": "fr-CH-ArianeNeural",
667
+ "Fabrice": "fr-CH-FabriceNeural",
668
+ "Charline": "fr-BE-CharlineNeural",
669
+ "Gerard": "fr-BE-GerardNeural"
670
+ },
671
+ "Portuguese": {
672
+ "Francisca": "pt-BR-FranciscaNeural",
673
+ "Antonio": "pt-BR-AntonioNeural",
674
+ "Duarte": "pt-PT-DuarteNeural",
675
+ "Raquel": "pt-PT-RaquelNeural"
676
+ },
677
+ "Indonesian": {
678
+ "Ardi": "id-ID-ArdiNeural",
679
+ "Gadis": "id-ID-GadisNeural"
680
+ },
681
+ "Hebrew": {
682
+ "Avri": "he-IL-AvriNeural",
683
+ "Hila": "he-IL-HilaNeural"
684
+ },
685
+ "Italian": {
686
+ "Isabella": "it-IT-IsabellaNeural",
687
+ "Diego": "it-IT-DiegoNeural",
688
+ "Elsa": "it-IT-ElsaNeural"
689
+ },
690
+ "Dutch": {
691
+ "Colette": "nl-NL-ColetteNeural",
692
+ "Fenna": "nl-NL-FennaNeural",
693
+ "Maarten": "nl-NL-MaartenNeural",
694
+ "Arnaud": "nl-BE-ArnaudNeural",
695
+ "Dena": "nl-BE-DenaNeural"
696
+ },
697
+ "Malay": {
698
+ "Osman": "ms-MY-OsmanNeural",
699
+ "Yasmin": "ms-MY-YasminNeural"
700
+ },
701
+ "Norwegian": {
702
+ "Pernille": "nb-NO-PernilleNeural",
703
+ "Finn": "nb-NO-FinnNeural"
704
+ },
705
+ "Swedish": {
706
+ "Sofie": "sv-SE-SofieNeural",
707
+ "Mattias": "sv-SE-MattiasNeural"
708
+ },
709
+ "Greek": {
710
+ "Athina": "el-GR-AthinaNeural",
711
+ "Nestoras": "el-GR-NestorasNeural"
712
+ },
713
+ "German": {
714
+ "Katja": "de-DE-KatjaNeural",
715
+ "Amala": "de-DE-AmalaNeural",
716
+ "Conrad": "de-DE-ConradNeural",
717
+ "Killian": "de-DE-KillianNeural",
718
+ "Ingrid": "de-AT-IngridNeural",
719
+ "Jonas": "de-AT-JonasNeural",
720
+ "Jan": "de-CH-JanNeural",
721
+ "Leni": "de-CH-LeniNeural"
722
+ },
723
+ "Afrikaans": {
724
+ "Adri": "af-ZA-AdriNeural",
725
+ "Willem": "af-ZA-WillemNeural"
726
+ },
727
+ "Amharic": {
728
+ "Ameha": "am-ET-AmehaNeural",
729
+ "Mekdes": "am-ET-MekdesNeural"
730
+ },
731
+ "Azerbaijani": {
732
+ "Babek": "az-AZ-BabekNeural",
733
+ "Banu": "az-AZ-BanuNeural"
734
+ },
735
+ "Bulgarian": {
736
+ "Borislav": "bg-BG-BorislavNeural",
737
+ "Kalina": "bg-BG-KalinaNeural"
738
+ },
739
+ "Bengali": {
740
+ "Nabanita": "bn-BD-NabanitaNeural",
741
+ "Pradeep": "bn-BD-PradeepNeural",
742
+ "Bashkar": "bn-IN-BashkarNeural",
743
+ "Tanishaa": "bn-IN-TanishaaNeural"
744
+ },
745
+ "Bosnian": {
746
+ "Goran": "bs-BA-GoranNeural",
747
+ "Vesna": "bs-BA-VesnaNeural"
748
+ },
749
+ "Catalan": {
750
+ "Joana": "ca-ES-JoanaNeural",
751
+ "Enric": "ca-ES-EnricNeural"
752
+ },
753
+ "Czech": {
754
+ "Antonin": "cs-CZ-AntoninNeural",
755
+ "Vlasta": "cs-CZ-VlastaNeural"
756
+ },
757
+ "Welsh": {
758
+ "Aled": "cy-GB-AledNeural",
759
+ "Nia": "cy-GB-NiaNeural"
760
+ },
761
+ "Danish": {
762
+ "Christel": "da-DK-ChristelNeural",
763
+ "Jeppe": "da-DK-JeppeNeural"
764
+ },
765
+ "Estonian": {
766
+ "Anu": "et-EE-AnuNeural",
767
+ "Kert": "et-EE-KertNeural"
768
+ },
769
+ "Persian": {
770
+ "Dilara": "fa-IR-DilaraNeural",
771
+ "Farid": "fa-IR-FaridNeural"
772
+ },
773
+ "Finnish": {
774
+ "Harri": "fi-FI-HarriNeural",
775
+ "Noora": "fi-FI-NooraNeural"
776
+ },
777
+ "Irish": {
778
+ "Colm": "ga-IE-ColmNeural",
779
+ "Orla": "ga-IE-OrlaNeural"
780
+ },
781
+ "Galician": {
782
+ "Roi": "gl-ES-RoiNeural",
783
+ "Sabela": "gl-ES-SabelaNeural"
784
+ },
785
+ "Gujarati": {
786
+ "Dhwani": "gu-IN-DhwaniNeural",
787
+ "Niranjan": "gu-IN-NiranjanNeural"
788
+ },
789
+ "Croatian": {
790
+ "Gabrijela": "hr-HR-GabrijelaNeural",
791
+ "Srecko": "hr-HR-SreckoNeural"
792
+ },
793
+ "Hungarian": {
794
+ "Noemi": "hu-HU-NoemiNeural",
795
+ "Tamas": "hu-HU-TamasNeural"
796
+ },
797
+ "Icelandic": {
798
+ "Gudrun": "is-IS-GudrunNeural",
799
+ "Gunnar": "is-IS-GunnarNeural"
800
+ },
801
+ "Javanese": {
802
+ "Dimas": "jv-ID-DimasNeural",
803
+ "Siti": "jv-ID-SitiNeural"
804
+ },
805
+ "Georgian": {
806
+ "Eka": "ka-GE-EkaNeural",
807
+ "Giorgi": "ka-GE-GiorgiNeural"
808
+ },
809
+ "Kazakh": {
810
+ "Aigul": "kk-KZ-AigulNeural",
811
+ "Daulet": "kk-KZ-DauletNeural"
812
+ },
813
+ "Khmer": {
814
+ "Piseth": "km-KH-PisethNeural",
815
+ "Sreymom": "km-KH-SreymomNeural"
816
+ },
817
+ "Kannada": {
818
+ "Gagan": "kn-IN-GaganNeural",
819
+ "Sapna": "kn-IN-SapnaNeural"
820
+ },
821
+ "Lao": {
822
+ "Chanthavong": "lo-LA-ChanthavongNeural",
823
+ "Keomany": "lo-LA-KeomanyNeural"
824
+ },
825
+ "Lithuanian": {
826
+ "Leonas": "lt-LT-LeonasNeural",
827
+ "Ona": "lt-LT-OnaNeural"
828
+ },
829
+ "Latvian": {
830
+ "Everita": "lv-LV-EveritaNeural",
831
+ "Nils": "lv-LV-NilsNeural"
832
+ },
833
+ "Macedonian": {
834
+ "Aleksandar": "mk-MK-AleksandarNeural",
835
+ "Marija": "mk-MK-MarijaNeural"
836
+ },
837
+ "Malayalam": {
838
+ "Midhun": "ml-IN-MidhunNeural",
839
+ "Sobhana": "ml-IN-SobhanaNeural"
840
+ },
841
+ "Mongolian": {
842
+ "Bataa": "mn-MN-BataaNeural",
843
+ "Yesui": "mn-MN-YesuiNeural"
844
+ },
845
+ "Marathi": {
846
+ "Aarohi": "mr-IN-AarohiNeural",
847
+ "Manohar": "mr-IN-ManoharNeural"
848
+ },
849
+ "Maltese": {
850
+ "Grace": "mt-MT-GraceNeural",
851
+ "Joseph": "mt-MT-JosephNeural"
852
+ },
853
+ "Burmese": {
854
+ "Nilar": "my-MM-NilarNeural",
855
+ "Thiha": "my-MM-ThihaNeural"
856
+ },
857
+ "Nepali": {
858
+ "Hemkala": "ne-NP-HemkalaNeural",
859
+ "Sagar": "ne-NP-SagarNeural"
860
+ },
861
+ "Polish": {
862
+ "Marek": "pl-PL-MarekNeural",
863
+ "Zofia": "pl-PL-ZofiaNeural"
864
+ },
865
+ "Pashto": {
866
+ "Gul Nawaz": "ps-AF-GulNawazNeural",
867
+ "Latifa": "ps-AF-LatifaNeural"
868
+ },
869
+ "Romanian": {
870
+ "Alina": "ro-RO-AlinaNeural",
871
+ "Emil": "ro-RO-EmilNeural"
872
+ },
873
+ "Russian": {
874
+ "Svetlana": "ru-RU-SvetlanaNeural",
875
+ "Dmitry": "ru-RU-DmitryNeural"
876
+ },
877
+ "Sinhala": {
878
+ "Sameera": "si-LK-SameeraNeural",
879
+ "Thilini": "si-LK-ThiliniNeural"
880
+ },
881
+ "Slovak": {
882
+ "Lukas": "sk-SK-LukasNeural",
883
+ "Viktoria": "sk-SK-ViktoriaNeural"
884
+ },
885
+ "Slovenian": {
886
+ "Petra": "sl-SI-PetraNeural",
887
+ "Rok": "sl-SI-RokNeural"
888
+ },
889
+ "Somali": {
890
+ "Muuse": "so-SO-MuuseNeural",
891
+ "Ubax": "so-SO-UbaxNeural"
892
+ },
893
+ "Albanian": {
894
+ "Anila": "sq-AL-AnilaNeural",
895
+ "Ilir": "sq-AL-IlirNeural"
896
+ },
897
+ "Serbian": {
898
+ "Nicholas": "sr-RS-NicholasNeural",
899
+ "Sophie": "sr-RS-SophieNeural"
900
+ },
901
+ "Sundanese": {
902
+ "Jajang": "su-ID-JajangNeural",
903
+ "Tuti": "su-ID-TutiNeural"
904
+ },
905
+ "Swahili": {
906
+ "Rafiki": "sw-KE-RafikiNeural",
907
+ "Zuri": "sw-KE-ZuriNeural",
908
+ "Daudi": "sw-TZ-DaudiNeural",
909
+ "Rehema": "sw-TZ-RehemaNeural"
910
+ },
911
+ "Tamil": {
912
+ "Pallavi": "ta-IN-PallaviNeural",
913
+ "Valluvar": "ta-IN-ValluvarNeural",
914
+ "Kumar": "ta-LK-KumarNeural",
915
+ "Saranya": "ta-LK-SaranyaNeural",
916
+ "Kani": "ta-MY-KaniNeural",
917
+ "Surya": "ta-MY-SuryaNeural",
918
+ "Anbu": "ta-SG-AnbuNeural"
919
+ },
920
+ "Telugu": {
921
+ "Mohan": "te-IN-MohanNeural",
922
+ "Shruti": "te-IN-ShrutiNeural"
923
+ },
924
+ "Turkish": {
925
+ "Ahmet": "tr-TR-AhmetNeural",
926
+ "Emel": "tr-TR-EmelNeural"
927
+ },
928
+ "Ukrainian": {
929
+ "Ostap": "uk-UA-OstapNeural",
930
+ "Polina": "uk-UA-PolinaNeural"
931
+ },
932
+ "Urdu": {
933
+ "Gul": "ur-IN-GulNeural",
934
+ "Salman": "ur-IN-SalmanNeural",
935
+ "Asad": "ur-PK-AsadNeural",
936
+ "Uzma": "ur-PK-UzmaNeural"
937
+ },
938
+ "Uzbek": {
939
+ "Madina": "uz-UZ-MadinaNeural",
940
+ "Sardor": "uz-UZ-SardorNeural"
941
+ },
942
+ "Mandarin": {
943
+ "Xiaoxiao": "zh-CN-XiaoxiaoNeural",
944
+ "Yunyang": "zh-CN-YunyangNeural",
945
+ "Yunxi": "zh-CN-YunxiNeural",
946
+ "Xiaoyi": "zh-CN-XiaoyiNeural",
947
+ "Yunjian": "zh-CN-YunjianNeural",
948
+ "Yunxia": "zh-CN-YunxiaNeural",
949
+ "Xiaobei": "zh-CN-liaoning-XiaobeiNeural",
950
+ "Xiaoni": "zh-CN-shaanxi-XiaoniNeural",
951
+ "HiuMaan": "zh-HK-HiuMaanNeural",
952
+ "HiuGaai": "zh-HK-HiuGaaiNeural",
953
+ "WanLung": "zh-HK-WanLungNeural",
954
+ "HsiaoChen": "zh-TW-HsiaoChenNeural",
955
+ "HsiaoYu": "zh-TW-HsiaoYuNeural",
956
+ "YunJhe": "zh-TW-YunJheNeural"
957
+ },
958
+ "Zulu": {
959
+ "Thando": "zu-ZA-ThandoNeural",
960
+ "Themba": "zu-ZA-ThembaNeural"
961
+ }
962
  }
963
 
964
+ # Ensure these have initial values, even if temporary
965
+ default_language = "English"
966
+ default_speaker = language_dict[default_language][list(language_dict[default_language].keys())[0]] # Set to first English speaker
967
+
968
+ def get_speakers(language):
969
+ speakers = list(language_dict[language].keys())
970
+ # Return gr.update to set choices and selected value
971
+ return gr.update(choices=speakers, value=speakers[0], interactive=True), gr.Checkbox(visible=language == "Arabic", interactive=True)
972
+
973
  atexit.register(file_manager.cleanup_all)
974
 
975
  # Create Gradio interface
976
+ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation",
977
+ css="""
978
+ :root {
979
+ --primary-color: #4776E6;
980
+ --secondary-color: #8E54E9;
981
+ --background-light: #ffffff;
982
+ --card-light: #f8f9fa;
983
+ --text-dark: #2d3436;
984
+ --text-gray: #636e72;
985
+ --border-color: #e0e0e0;
986
+ }
987
+
988
+ @media (max-width: 768px) {
989
+ .container {
990
+ padding: 10px !important;
991
+ }
992
+ .header h1 {
993
+ font-size: 1.5em !important;
994
+ }
995
+ }
996
+
997
+ body {
998
+ background-color: var(--background-light);
999
+ }
1000
+
1001
+ .container {
1002
+ background-color: var(--background-light);
1003
+ max-width: 1200px;
1004
+ margin: 0 auto;
1005
+ padding: 20px;
1006
+ }
1007
+
1008
+ .header {
1009
+ text-align: center;
1010
+ margin-bottom: 30px;
1011
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
1012
+ padding: 25px;
1013
+ border-radius: 15px;
1014
+ color: white;
1015
+ box-shadow: 0 4px 15px rgba(71, 118, 230, 0.2);
1016
+ }
1017
+
1018
+ .input-section, .output-section {
1019
+ background-color: var(--card-light);
1020
+ padding: 25px;
1021
+ border-radius: 15px;
1022
+ margin-bottom: 20px;
1023
+ box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
1024
+ border: 1px solid var(--border-color);
1025
+ width: 100%;
1026
+ }
1027
+
1028
+ .input-box textarea {
1029
+ min-height: 120px !important;
1030
+ font-size: 16px !important;
1031
+ border: 1px solid var(--border-color) !important;
1032
+ border-radius: 10px !important;
1033
+ padding: 15px !important;
1034
+ width: 100% !important;
1035
+ }
1036
+
1037
+ .dropdown {
1038
+ width: 100% !important;
1039
+ }
1040
+
1041
+ select, input[type="text"] {
1042
+ width: 100% !important;
1043
+ padding: 12px !important;
1044
+ border-radius: 8px !important;
1045
+ border: 1px solid var(--border-color) !important;
1046
+ }
1047
+
1048
+ .generate-btn {
1049
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color)) !important;
1050
+ padding: 15px 30px !important;
1051
+ border-radius: 10px !important;
1052
+ font-weight: 600 !important;
1053
+ letter-spacing: 0.5px !important;
1054
+ width: 100% !important;
1055
+ margin-top: 15px !important;
1056
+ }
1057
+
1058
+ .generate-btn:hover {
1059
+ transform: translateY(-2px);
1060
+ box-shadow: 0 5px 15px rgba(71, 118, 230, 0.3) !important;
1061
+ }
1062
+
1063
+ .download-btn {
1064
+ margin-top: 20px;
1065
+ text-align: center;
1066
+ }
1067
+
1068
+ .download-btn a {
1069
+ display: inline-flex;
1070
+ align-items: center;
1071
+ justify-content: center;
1072
+ background: linear-gradient(135deg, var(--primary-color), var(--secondary-color));
1073
+ color: white;
1074
+ padding: 12px 25px;
1075
+ border-radius: 10px;
1076
+ text-decoration: none;
1077
+ font-weight: 600;
1078
+ letter-spacing: 0.5px;
1079
+ transition: all 0.3s ease;
1080
+ gap: 8px;
1081
+ width: 100%;
1082
+ max-width: 300px;
1083
+ }
1084
+
1085
+ .download-btn a:before {
1086
+ content: "⬇️";
1087
+ font-size: 1.2em;
1088
+ }
1089
+
1090
+ .download-btn a:hover {
1091
+ transform: translateY(-2px);
1092
+ box-shadow: 0 5px 15px rgba(71, 118, 230, 0.3);
1093
+ }
1094
+
1095
+ /* Audio player styling */
1096
+ audio {
1097
+ width: 100% !important;
1098
+ margin: 15px 0 !important;
1099
+ border-radius: 10px !important;
1100
+ }
1101
+
1102
+ /* Hide output text - this CSS is from your original file, ensure it's intentional */
1103
+ #output-text {
1104
+ display: none !important;
1105
+ }
1106
+ """
1107
+ ) as app: # Changed demo to app for consistency
1108
  gr.Markdown("# Advanced TTS with Configurable SRT Generation")
1109
  gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
1110
 
 
1113
  text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
1114
 
1115
  with gr.Column(scale=2):
1116
+ language = gr.Dropdown( # Changed to language for consistency
1117
+ label="Select Language",
1118
+ choices=list(language_dict.keys()),
1119
+ value=default_language,
1120
+ interactive=True
1121
+ )
1122
+ speaker = gr.Dropdown( # Changed to speaker for consistency
1123
  label="Select Voice",
1124
+ choices=list(language_dict[default_language].keys()), # Initialize with default language's speakers
1125
+ value=list(language_dict[default_language].keys())[0], # Default to first speaker of default language
1126
+ interactive=True # Should be interactive if it changes based on language
1127
  )
1128
  pitch_slider = gr.Slider(
1129
  label="Pitch Adjustment (Hz)",
 
1165
  value=True,
1166
  info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
1167
  )
1168
+ tashkeel_checkbox = gr.Checkbox( # Moved here for better layout
1169
+ label="Tashkeel (Arabic Only)",
1170
+ value=False,
1171
+ visible=False, # Initially hidden
1172
+ interactive=True
1173
+ )
1174
 
1175
  submit_btn = gr.Button("Generate Audio & Subtitles")
1176
 
 
1177
  error_output = gr.Textbox(label="Status", visible=False, interactive=False)
1178
 
1179
  with gr.Row():
1180
  with gr.Column():
1181
  audio_output = gr.Audio(label="Preview Audio")
1182
  with gr.Column():
1183
+ # Use gr.HTML for download links
1184
+ srt_download_link = gr.HTML(value="", visible=False, label="Download SRT")
1185
+ audio_download_link = gr.HTML(value="", visible=False, label="Download Audio")
1186
+
1187
+ # Event Handlers
1188
+ language.change(
1189
+ fn=get_speakers,
1190
+ inputs=[language],
1191
+ outputs=[speaker, tashkeel_checkbox] # Ensure correct output for dropdown and checkbox
1192
+ )
1193
+
1194
  submit_btn.click(
1195
  fn=process_text_with_progress,
1196
  inputs=[
1197
  text_input,
1198
  pitch_slider,
1199
  rate_slider,
1200
+ speaker, # Use 'speaker' here as it holds the actual voice code
1201
  words_per_line,
1202
  lines_per_segment,
1203
  parallel_processing