hivecorp commited on
Commit
be31ce6
·
verified ·
1 Parent(s): 4c25f4a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +331 -178
app.py CHANGED
@@ -5,14 +5,12 @@ import os
5
  import asyncio
6
  import uuid
7
  import re
 
 
8
  from concurrent.futures import ThreadPoolExecutor
9
- from typing import List, Tuple, Optional
10
  import math
11
  from dataclasses import dataclass
12
- import hashlib
13
- import json
14
- from pathlib import Path
15
- from tqdm.asyncio import tqdm
16
 
17
  class TimingManager:
18
  def __init__(self):
@@ -186,194 +184,278 @@ class TextProcessor:
186
 
187
  return lines
188
 
189
- class AudioCache:
190
- def __init__(self, cache_dir="./cache"):
191
- self.cache_dir = Path(cache_dir)
192
- self.cache_dir.mkdir(exist_ok=True)
193
-
194
- def get_cache_key(self, text: str, voice: str, rate: str, pitch: str) -> str:
195
- data = f"{text}{voice}{rate}{pitch}".encode()
196
- return hashlib.md5(data).hexdigest()
197
-
198
- def get_cached_audio(self, cache_key: str) -> Optional[AudioSegment]:
199
- cache_file = self.cache_dir / f"{cache_key}.wav"
200
- if cache_file.exists():
201
- return AudioSegment.from_file(str(cache_file))
202
- return None
203
-
204
- def cache_audio(self, cache_key: str, audio: AudioSegment):
205
- cache_file = self.cache_dir / f"{cache_key}.wav"
206
- audio.export(str(cache_file), format="wav")
207
-
208
- class SSMLBuilder:
209
- def __init__(self):
210
- self.content = []
211
-
212
- def add_text(self, text: str):
213
- self.content.append(text)
214
- return self
215
-
216
- def add_break(self, strength: str = "medium"):
217
- self.content.append(f'<break strength="{strength}"/>')
218
- return self
219
-
220
- def add_prosody(self, text: str, rate: str = "medium", pitch: str = "medium"):
221
- self.content.append(
222
- f'<prosody rate="{rate}" pitch="{pitch}">{text}</prosody>'
223
- )
224
- return self
225
-
226
- def add_sentence(self, text: str):
227
- self.content.append(f'<s>{text}</s>')
228
- return self
229
-
230
- def __str__(self):
231
- return (
232
- '<?xml version="1.0"?>'
233
- '<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis">'
234
- f'{"".join(self.content)}'
235
- '</speak>'
236
- )
237
-
238
- class SpeechEnhancer:
239
- @staticmethod
240
- def add_speech_marks(text: str) -> str:
241
- """Add SSML marks for better speech control"""
242
- ssml = SSMLBuilder()
243
-
244
- # Split text and add appropriate SSML tags
245
- sentences = text.split('. ')
246
- for i, sentence in enumerate(sentences):
247
- sentence = sentence.strip()
248
- if not sentence:
249
- continue
250
-
251
- ssml.add_sentence(sentence)
252
-
253
- # Add appropriate breaks between sentences
254
- if i < len(sentences) - 1:
255
- ssml.add_break("strong")
256
-
257
- # Add breaks at commas
258
- if ',' in sentence:
259
- parts = sentence.split(',')
260
- for part in parts[:-1]:
261
- ssml.add_break("medium")
262
-
263
- return str(ssml)
264
-
265
- @staticmethod
266
- def enhance_timing(segment: Segment) -> Segment:
267
- """Add natural pauses based on punctuation"""
268
- if segment.audio:
269
- for punct, pause_ms in {'.': 400, '!': 400, '?': 400, ',': 200, ';': 300}.items():
270
- if punct in segment.text:
271
- silence = AudioSegment.silent(duration=pause_ms)
272
- segment.audio = segment.audio.append(silence, crossfade=50)
273
- return segment
274
 
275
- async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str, cache: AudioCache) -> Segment:
276
- """Process segment with enhanced speech features"""
277
- cache_key = cache.get_cache_key(segment.text, voice, rate, pitch)
278
- cached_audio = cache.get_cached_audio(cache_key)
279
-
280
- if cached_audio:
281
- segment.audio = cached_audio
282
- segment.duration = len(cached_audio)
283
- return segment
284
-
285
  try:
286
- enhanced_text = SpeechEnhancer.add_speech_marks(segment.text)
287
- tts = edge_tts.Communicate(enhanced_text, voice, rate=rate, pitch=pitch)
 
 
 
 
 
 
288
 
289
- audio_file = f"temp_segment_{segment.id}_{uuid.uuid4()}.wav"
290
- await tts.save(audio_file)
291
 
292
- segment.audio = AudioSegment.from_file(audio_file)
293
- segment = SpeechEnhancer.enhance_timing(segment)
294
- segment.duration = len(segment.audio)
 
 
 
 
 
295
 
296
- cache.cache_audio(cache_key, segment.audio)
297
  return segment
298
  except Exception as e:
299
- print(f"Error processing segment {segment.id}: {str(e)}")
 
300
  raise
301
  finally:
302
  if os.path.exists(audio_file):
303
- os.remove(audio_file)
 
 
 
304
 
305
- async def generate_accurate_srt(text: str, voice: str, rate: str, pitch: str, words_per_line: int, lines_per_segment: int, enable_ssml: bool, use_cache: bool, pause_after_period: int, pause_after_comma: int) -> Tuple[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  processor = TextProcessor(words_per_line, lines_per_segment)
307
  segments = processor.split_into_segments(text)
308
 
309
- # Process segments sequentially for better timing control
310
  processed_segments = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  current_time = 0
312
  final_audio = AudioSegment.empty()
313
  srt_content = ""
314
- cache = AudioCache() if use_cache else None
315
 
316
- for segment in tqdm(segments, desc="Processing segments"):
317
- # Process segment
318
- processed_segment = await process_segment_with_timing(segment, voice, rate, pitch, cache)
319
-
320
  # Calculate precise timing
321
- processed_segment.start_time = current_time
322
- processed_segment.end_time = current_time + processed_segment.duration
323
 
324
  # Add to SRT with precise timing
325
  srt_content += (
326
- f"{processed_segment.id}\n"
327
- f"{format_time_ms(processed_segment.start_time)} --> {format_time_ms(processed_segment.end_time)}\n"
328
- f"{processed_segment.text}\n\n"
329
  )
330
 
331
  # Add to final audio with precise positioning
332
- final_audio = final_audio.append(processed_segment.audio, crossfade=0)
333
 
334
  # Update timing with precise gap
335
- current_time = processed_segment.end_time
336
- processed_segments.append(processed_segment)
337
 
338
  # Export with high precision
339
- unique_id = uuid.uuid4()
340
- audio_path = f"final_audio_{unique_id}.mp3"
341
- srt_path = f"final_subtitles_{unique_id}.srt"
342
 
343
- # Export with high quality settings for precise timing
344
- final_audio.export(
345
- audio_path,
346
- format="mp3",
347
- bitrate="320k",
348
- parameters=["-ar", "48000", "-ac", "2"]
349
- )
 
 
 
 
 
 
 
 
350
 
351
- with open(srt_path, "w", encoding='utf-8') as f:
352
- f.write(srt_content)
353
 
354
  return srt_path, audio_path
355
 
356
- async def process_text(text, pitch, rate, voice, words_per_line, lines_per_segment, enable_ssml, use_cache, pause_after_period, pause_after_comma):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  # Format pitch and rate strings
358
  pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
359
  rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
360
 
361
- srt_path, audio_path = await generate_accurate_srt(
362
- text,
363
- voice_options[voice],
364
- rate_str,
365
- pitch_str,
366
- words_per_line,
367
- lines_per_segment,
368
- enable_ssml,
369
- use_cache,
370
- pause_after_period,
371
- pause_after_comma
372
- )
373
-
374
- return srt_path, audio_path, audio_path
 
 
 
 
 
 
 
 
 
375
 
376
- # Voice options dictionary (same as before)
377
  voice_options = {
378
  "Andrew Male": "en-US-AndrewNeural",
379
  "Jenny Female": "en-US-JennyNeural",
@@ -413,32 +495,103 @@ voice_options = {
413
  "Imani": "en-TZ-ImaniNeural",
414
  "Leah": "en-ZA-LeahNeural",
415
  "Luke": "en-ZA-LukeNeural"
416
- # Add other voices here...
417
  }
418
 
 
 
 
 
419
  # Create Gradio interface
420
- app = gr.Interface(
421
- fn=process_text,
422
- inputs=[
423
- gr.Textbox(label="Enter Text", lines=10),
424
- gr.Slider(label="Pitch Adjustment (Hz)", minimum=-10, maximum=10, value=0, step=1),
425
- gr.Slider(label="Rate Adjustment (%)", minimum=-25, maximum=25, value=0, step=1),
426
- gr.Dropdown(label="Select Voice", choices=list(voice_options.keys()), value="Jenny Female"),
427
- gr.Slider(label="Words per Line", minimum=3, maximum=12, value=6, step=1),
428
- gr.Slider(label="Lines per Segment", minimum=1, maximum=4, value=2, step=1),
429
- gr.Checkbox(label="Enable SSML Enhancement", value=True),
430
- gr.Checkbox(label="Use Audio Cache", value=True),
431
- gr.Slider(label="Pause After Period (ms)", minimum=200, maximum=800, value=400, step=50),
432
- gr.Slider(label="Pause After Comma (ms)", minimum=100, maximum=400, value=200, step=50)
433
- ],
434
- outputs=[
435
- gr.File(label="Download SRT"),
436
- gr.File(label="Download Audio"),
437
- gr.Audio(label="Preview Audio"),
438
- gr.HTML(label="Processing Status")
439
- ],
440
- title="Advanced TTS with Configurable SRT Generation",
441
- description="Generate perfectly synchronized audio and subtitles with natural speech patterns."
442
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
- app.launch()
 
 
5
  import asyncio
6
  import uuid
7
  import re
8
+ import time
9
+ import tempfile
10
  from concurrent.futures import ThreadPoolExecutor
11
+ from typing import List, Tuple, Optional, Dict, Any
12
  import math
13
  from dataclasses import dataclass
 
 
 
 
14
 
15
  class TimingManager:
16
  def __init__(self):
 
184
 
185
  return lines
186
 
187
+ # IMPROVEMENT 1: Enhanced Error Handling
188
+ class TTSError(Exception):
189
+ """Custom exception for TTS processing errors"""
190
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
193
+ """Process a complete segment as a single TTS unit with improved error handling"""
194
+ audio_file = os.path.join(tempfile.gettempdir(), f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
 
 
 
 
 
 
 
195
  try:
196
+ # Process the entire segment text as one unit, replacing newlines with spaces
197
+ segment_text = ' '.join(segment.text.split('\n'))
198
+ tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
199
+
200
+ try:
201
+ await tts.save(audio_file)
202
+ except Exception as e:
203
+ raise TTSError(f"Failed to generate audio for segment {segment.id}: {str(e)}")
204
 
205
+ if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
206
+ raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
207
 
208
+ try:
209
+ segment.audio = AudioSegment.from_file(audio_file)
210
+ # Add small silence at start and end for natural spacing
211
+ silence = AudioSegment.silent(duration=50)
212
+ segment.audio = silence + segment.audio + silence
213
+ segment.duration = len(segment.audio)
214
+ except Exception as e:
215
+ raise TTSError(f"Failed to process audio file for segment {segment.id}: {str(e)}")
216
 
 
217
  return segment
218
  except Exception as e:
219
+ if not isinstance(e, TTSError):
220
+ raise TTSError(f"Unexpected error processing segment {segment.id}: {str(e)}")
221
  raise
222
  finally:
223
  if os.path.exists(audio_file):
224
+ try:
225
+ os.remove(audio_file)
226
+ except Exception:
227
+ pass # Ignore deletion errors
228
 
229
+ # IMPROVEMENT 2: Better File Management with cleanup
230
+ class FileManager:
231
+ """Manages temporary and output files with cleanup capabilities"""
232
+ def __init__(self):
233
+ self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
234
+ self.output_files = []
235
+ self.max_files_to_keep = 5 # Keep only the 5 most recent output pairs
236
+
237
+ def get_temp_path(self, prefix):
238
+ """Get a path for a temporary file"""
239
+ return os.path.join(self.temp_dir, f"{prefix}_{uuid.uuid4()}")
240
+
241
+ def create_output_paths(self):
242
+ """Create paths for output files"""
243
+ unique_id = str(uuid.uuid4())
244
+ audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
245
+ srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
246
+
247
+ self.output_files.append((srt_path, audio_path))
248
+ self.cleanup_old_files()
249
+
250
+ return srt_path, audio_path
251
+
252
+ def cleanup_old_files(self):
253
+ """Clean up old output files, keeping only the most recent ones"""
254
+ if len(self.output_files) > self.max_files_to_keep:
255
+ old_files = self.output_files[:-self.max_files_to_keep]
256
+ for srt_path, audio_path in old_files:
257
+ try:
258
+ if os.path.exists(srt_path):
259
+ os.remove(srt_path)
260
+ if os.path.exists(audio_path):
261
+ os.remove(audio_path)
262
+ except Exception:
263
+ pass # Ignore deletion errors
264
+
265
+ # Update the list to only include files we're keeping
266
+ self.output_files = self.output_files[-self.max_files_to_keep:]
267
+
268
+ def cleanup_all(self):
269
+ """Clean up all managed files"""
270
+ for srt_path, audio_path in self.output_files:
271
+ try:
272
+ if os.path.exists(srt_path):
273
+ os.remove(srt_path)
274
+ if os.path.exists(audio_path):
275
+ os.remove(audio_path)
276
+ except Exception:
277
+ pass # Ignore deletion errors
278
+
279
+ try:
280
+ os.rmdir(self.temp_dir)
281
+ except Exception:
282
+ pass # Ignore if directory isn't empty or can't be removed
283
+
284
+ # Create global file manager
285
+ file_manager = FileManager()
286
+
287
+ # IMPROVEMENT 3: Parallel Processing for Segments
288
+ async def generate_accurate_srt(
289
+ text: str,
290
+ voice: str,
291
+ rate: str,
292
+ pitch: str,
293
+ words_per_line: int,
294
+ lines_per_segment: int,
295
+ progress_callback=None,
296
+ parallel: bool = True,
297
+ max_workers: int = 4
298
+ ) -> Tuple[str, str]:
299
+ """Generate accurate SRT with parallel processing option"""
300
  processor = TextProcessor(words_per_line, lines_per_segment)
301
  segments = processor.split_into_segments(text)
302
 
303
+ total_segments = len(segments)
304
  processed_segments = []
305
+
306
+ # Update progress to show segmentation is complete
307
+ if progress_callback:
308
+ progress_callback(0.1, "Text segmentation complete")
309
+
310
+ if parallel and total_segments > 1:
311
+ # Process segments in parallel
312
+ processed_count = 0
313
+ segment_tasks = []
314
+
315
+ # Create a semaphore to limit concurrent tasks
316
+ semaphore = asyncio.Semaphore(max_workers)
317
+
318
+ async def process_with_semaphore(segment):
319
+ async with semaphore:
320
+ nonlocal processed_count
321
+ try:
322
+ result = await process_segment_with_timing(segment, voice, rate, pitch)
323
+ processed_count += 1
324
+ if progress_callback:
325
+ progress = 0.1 + (0.8 * processed_count / total_segments)
326
+ progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
327
+ return result
328
+ except Exception as e:
329
+ # Handle errors in individual segments
330
+ processed_count += 1
331
+ if progress_callback:
332
+ progress = 0.1 + (0.8 * processed_count / total_segments)
333
+ progress_callback(progress, f"Error in segment {segment.id}: {str(e)}")
334
+ raise
335
+
336
+ # Create tasks for all segments
337
+ for segment in segments:
338
+ segment_tasks.append(process_with_semaphore(segment))
339
+
340
+ # Run all tasks and collect results
341
+ try:
342
+ processed_segments = await asyncio.gather(*segment_tasks)
343
+ except Exception as e:
344
+ if progress_callback:
345
+ progress_callback(0.9, f"Error during parallel processing: {str(e)}")
346
+ raise TTSError(f"Failed during parallel processing: {str(e)}")
347
+ else:
348
+ # Process segments sequentially (original method)
349
+ for i, segment in enumerate(segments):
350
+ try:
351
+ processed_segment = await process_segment_with_timing(segment, voice, rate, pitch)
352
+ processed_segments.append(processed_segment)
353
+
354
+ if progress_callback:
355
+ progress = 0.1 + (0.8 * (i + 1) / total_segments)
356
+ progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
357
+ except Exception as e:
358
+ if progress_callback:
359
+ progress_callback(0.9, f"Error processing segment {segment.id}: {str(e)}")
360
+ raise TTSError(f"Failed to process segment {segment.id}: {str(e)}")
361
+
362
+ # Sort segments by ID to ensure correct order
363
+ processed_segments.sort(key=lambda s: s.id)
364
+
365
+ if progress_callback:
366
+ progress_callback(0.9, "Finalizing audio and subtitles")
367
+
368
+ # Now combine the segments in the correct order
369
  current_time = 0
370
  final_audio = AudioSegment.empty()
371
  srt_content = ""
 
372
 
373
+ for segment in processed_segments:
 
 
 
374
  # Calculate precise timing
375
+ segment.start_time = current_time
376
+ segment.end_time = current_time + segment.duration
377
 
378
  # Add to SRT with precise timing
379
  srt_content += (
380
+ f"{segment.id}\n"
381
+ f"{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n"
382
+ f"{segment.text}\n\n"
383
  )
384
 
385
  # Add to final audio with precise positioning
386
+ final_audio = final_audio.append(segment.audio, crossfade=0)
387
 
388
  # Update timing with precise gap
389
+ current_time = segment.end_time
 
390
 
391
  # Export with high precision
392
+ srt_path, audio_path = file_manager.create_output_paths()
 
 
393
 
394
+ try:
395
+ # Export with high quality settings for precise timing
396
+ final_audio.export(
397
+ audio_path,
398
+ format="mp3",
399
+ bitrate="320k",
400
+ parameters=["-ar", "48000", "-ac", "2"]
401
+ )
402
+
403
+ with open(srt_path, "w", encoding='utf-8') as f:
404
+ f.write(srt_content)
405
+ except Exception as e:
406
+ if progress_callback:
407
+ progress_callback(1.0, f"Error exporting final files: {str(e)}")
408
+ raise TTSError(f"Failed to export final files: {str(e)}")
409
 
410
+ if progress_callback:
411
+ progress_callback(1.0, "Complete!")
412
 
413
  return srt_path, audio_path
414
 
415
+ # IMPROVEMENT 4: Progress Reporting
416
+ async def process_text_with_progress(
417
+ text,
418
+ pitch,
419
+ rate,
420
+ voice,
421
+ words_per_line,
422
+ lines_per_segment,
423
+ parallel_processing,
424
+ progress=gr.Progress()
425
+ ):
426
+ # Input validation
427
+ if not text or text.strip() == "":
428
+ raise gr.Error("Please enter some text to convert to speech.")
429
+
430
  # Format pitch and rate strings
431
  pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
432
  rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
433
 
434
+ try:
435
+ # Start progress tracking
436
+ progress(0, "Preparing text...")
437
+
438
+ def update_progress(value, status):
439
+ progress(value, status)
440
+
441
+ srt_path, audio_path = await generate_accurate_srt(
442
+ text,
443
+ voice_options[voice],
444
+ rate_str,
445
+ pitch_str,
446
+ words_per_line,
447
+ lines_per_segment,
448
+ progress_callback=update_progress,
449
+ parallel=parallel_processing
450
+ )
451
+
452
+ return srt_path, audio_path, audio_path
453
+ except TTSError as e:
454
+ raise gr.Error(f"TTS Error: {str(e)}")
455
+ except Exception as e:
456
+ raise gr.Error(f"Unexpected error: {str(e)}")
457
 
458
+ # Voice options dictionary
459
  voice_options = {
460
  "Andrew Male": "en-US-AndrewNeural",
461
  "Jenny Female": "en-US-JennyNeural",
 
495
  "Imani": "en-TZ-ImaniNeural",
496
  "Leah": "en-ZA-LeahNeural",
497
  "Luke": "en-ZA-LukeNeural"
498
+ # Add other voices as needed
499
  }
500
 
501
+ # Register cleanup on exit
502
+ import atexit
503
+ atexit.register(file_manager.cleanup_all)
504
+
505
  # Create Gradio interface
506
+ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
507
+ gr.Markdown("# Advanced TTS with Configurable SRT Generation")
508
+ gr.Markdown("Generate perfectly synchronized audio and subtitles with natural speech patterns.")
509
+
510
+ with gr.Row():
511
+ with gr.Column(scale=3):
512
+ text_input = gr.Textbox(label="Enter Text", lines=10, placeholder="Enter your text here...")
513
+
514
+ with gr.Column(scale=2):
515
+ voice_dropdown = gr.Dropdown(
516
+ label="Select Voice",
517
+ choices=list(voice_options.keys()),
518
+ value="Jenny Female"
519
+ )
520
+ pitch_slider = gr.Slider(
521
+ label="Pitch Adjustment (Hz)",
522
+ minimum=-10,
523
+ maximum=10,
524
+ value=0,
525
+ step=1
526
+ )
527
+ rate_slider = gr.Slider(
528
+ label="Rate Adjustment (%)",
529
+ minimum=-25,
530
+ maximum=25,
531
+ value=0,
532
+ step=1
533
+ )
534
+
535
+ with gr.Row():
536
+ with gr.Column():
537
+ words_per_line = gr.Slider(
538
+ label="Words per Line",
539
+ minimum=3,
540
+ maximum=12,
541
+ value=6,
542
+ step=1,
543
+ info="Controls how many words appear on each line of the subtitle"
544
+ )
545
+ with gr.Column():
546
+ lines_per_segment = gr.Slider(
547
+ label="Lines per Segment",
548
+ minimum=1,
549
+ maximum=4,
550
+ value=2,
551
+ step=1,
552
+ info="Controls how many lines appear in each subtitle segment"
553
+ )
554
+ with gr.Column():
555
+ parallel_processing = gr.Checkbox(
556
+ label="Enable Parallel Processing",
557
+ value=True,
558
+ info="Process multiple segments simultaneously for faster conversion (recommended for longer texts)"
559
+ )
560
+
561
+ submit_btn = gr.Button("Generate Audio & Subtitles")
562
+
563
+ with gr.Row():
564
+ with gr.Column():
565
+ audio_output = gr.Audio(label="Preview Audio")
566
+ with gr.Column():
567
+ srt_file = gr.File(label="Download SRT")
568
+ audio_file = gr.File(label="Download Audio")
569
+
570
+ # Add error message component
571
+ error_output = gr.Textbox(label="Status", visible=False)
572
+
573
+ # Handle button click
574
+ submit_btn.click(
575
+ fn=process_text_with_progress,
576
+ inputs=[
577
+ text_input,
578
+ pitch_slider,
579
+ rate_slider,
580
+ voice_dropdown,
581
+ words_per_line,
582
+ lines_per_segment,
583
+ parallel_processing
584
+ ],
585
+ outputs=[
586
+ srt_file,
587
+ audio_file,
588
+ audio_output
589
+ ],
590
+ api_name="generate"
591
+ ).catch(
592
+ fn=lambda e: {"visible": True, "value": str(e)},
593
+ outputs=[error_output]
594
+ )
595
 
596
+ if __name__ == "__main__":
597
+ app.launch()