hivecorp commited on
Commit
bcbb7e7
·
verified ·
1 Parent(s): 5e0b3ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -124
app.py CHANGED
@@ -12,6 +12,9 @@ from typing import List, Tuple, Optional, Dict, Any
12
  import math
13
  from dataclasses import dataclass
14
 
 
 
 
15
  class TimingManager:
16
  def __init__(self):
17
  self.current_time = 0
@@ -41,178 +44,115 @@ class Segment:
41
  end_time: int = 0
42
  duration: int = 0
43
  audio: Optional[AudioSegment] = None
44
- lines: List[str] = None # Add lines field for display purposes only
45
 
46
  class TextProcessor:
47
  def __init__(self, words_per_line: int, lines_per_segment: int):
48
  self.words_per_line = words_per_line
49
  self.lines_per_segment = lines_per_segment
50
  self.min_segment_words = 3
51
- self.max_segment_words = words_per_line * lines_per_segment * 1.5 # Allow 50% more for natural breaks
52
  self.punctuation_weights = {
53
- '.': 1.0, # Strong break
54
- '!': 1.0,
55
- '?': 1.0,
56
- ';': 0.8, # Medium-strong break
57
- ':': 0.7,
58
- ',': 0.5, # Medium break
59
- '-': 0.3, # Weak break
60
- '(': 0.2,
61
- ')': 0.2
62
  }
63
 
64
  def analyze_sentence_complexity(self, text: str) -> float:
65
- """Analyze sentence complexity to determine optimal segment length"""
66
  words = text.split()
 
67
  complexity = 1.0
68
-
69
- # Adjust for sentence length
70
  if len(words) > self.words_per_line * 2:
71
  complexity *= 1.2
72
-
73
- # Adjust for punctuation density
74
  punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
75
- if len(words) > 0:
76
- complexity *= (1 + (punct_count / len(words)) * 0.5)
77
-
78
  return complexity
79
 
80
  def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]:
81
- """Find natural break points with their weights"""
82
  breaks = []
83
  words = text.split()
84
-
85
  for i, word in enumerate(words):
86
  weight = 0
87
-
88
- # Check for punctuation
89
  for punct, punct_weight in self.punctuation_weights.items():
90
  if word.endswith(punct):
91
  weight = max(weight, punct_weight)
92
-
93
- # Check for natural phrase boundaries
94
  phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'}
95
  if i < len(words) - 1 and words[i+1].lower() in phrase_starters:
96
  weight = max(weight, 0.6)
97
-
98
- # Check for conjunctions at natural points
99
  if i > self.min_segment_words:
100
  conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'}
101
  if word.lower() in conjunctions:
102
  weight = max(weight, 0.4)
103
-
104
  if weight > 0:
105
  breaks.append((i, weight))
106
-
107
  return breaks
108
 
109
  def split_into_segments(self, text: str) -> List[Segment]:
110
- # Normalize text and add proper spacing around punctuation
111
  text = re.sub(r'\s+', ' ', text.strip())
112
  text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
113
  text = re.sub(r'\s+([.!?,;:])', r'\1', text)
114
-
115
  segments = []
116
  words = text.split()
117
-
118
  i = 0
119
  while i < len(words):
120
- # Dynamically select a chunk to analyze for breaks
121
  chunk_end = i + int(self.max_segment_words)
122
  chunk_text = ' '.join(words[i:chunk_end])
123
  complexity = self.analyze_sentence_complexity(chunk_text)
124
  breaks = self.find_natural_breaks(chunk_text)
125
-
126
  best_break = -1
127
  best_weight = -1
128
-
129
- # Find the best break point within the ideal segment length
130
  ideal_length = self.words_per_line * self.lines_per_segment
131
-
132
  for break_idx, weight in breaks:
133
- # Prioritize breaks closer to the ideal length
134
  distance_penalty = 1 - (abs(break_idx - ideal_length) / ideal_length) * 0.5
135
  score = weight * distance_penalty
136
-
137
  if score > best_weight:
138
  best_break = break_idx
139
  best_weight = score
140
-
141
  if best_break == -1:
142
- # If no break found, split at the ideal length or end of text
143
  best_break = min(ideal_length, len(words) - 1 - i)
144
-
145
  segment_words = words[i : i + best_break + 1]
146
  segment_text = ' '.join(segment_words)
147
-
148
  lines = self.split_into_lines(segment_text)
149
  final_segment_text = '\n'.join(lines)
150
-
151
- segments.append(Segment(
152
- id=len(segments) + 1,
153
- text=final_segment_text
154
- ))
155
-
156
  i += best_break + 1
157
-
158
  return segments
159
 
160
  def split_into_lines(self, text: str) -> List[str]:
161
- """Split segment text into natural lines"""
162
  words = text.split()
163
  lines = []
164
  current_line = []
165
  word_count = 0
166
-
167
  for word in words:
168
  current_line.append(word)
169
  word_count += 1
170
-
171
- is_break = (
172
- word_count >= self.words_per_line or
173
- any(word.endswith(p) for p in '.!?') or
174
- (word_count >= self.words_per_line * 0.7 and
175
- any(word.endswith(p) for p in ',;:'))
176
- )
177
-
178
  if is_break and len(words) > word_count:
179
  lines.append(' '.join(current_line))
180
  current_line = []
181
  word_count = 0
182
-
183
  if current_line:
184
  lines.append(' '.join(current_line))
185
-
186
  return lines
187
 
188
  class TTSError(Exception):
189
- """Custom exception for TTS processing errors"""
190
  pass
191
 
192
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
193
- """Process a complete segment as a single TTS unit with improved error handling"""
194
  temp_dir = tempfile.gettempdir()
195
  audio_file = os.path.join(temp_dir, f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
196
  try:
197
  segment_text = ' '.join(segment.text.split('\n'))
198
  tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
199
-
200
- try:
201
- await tts.save(audio_file)
202
- except Exception as e:
203
- raise TTSError(f"Failed to generate audio for segment {segment.id}: {str(e)}")
204
-
205
  if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
206
  raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
207
-
208
- try:
209
- segment.audio = AudioSegment.from_file(audio_file)
210
- silence = AudioSegment.silent(duration=30)
211
- segment.audio = silence + segment.audio + silence
212
- segment.duration = len(segment.audio)
213
- except Exception as e:
214
- raise TTSError(f"Failed to process audio file for segment {segment.id}: {str(e)}")
215
-
216
  return segment
217
  except Exception as e:
218
  if not isinstance(e, TTSError):
@@ -226,25 +166,20 @@ async def process_segment_with_timing(segment: Segment, voice: str, rate: str, p
226
  pass
227
 
228
  class FileManager:
229
- """Manages temporary and output files with cleanup capabilities"""
230
  def __init__(self):
231
  self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
232
  self.output_files = []
233
  self.max_files_to_keep = 5
234
 
235
  def create_output_paths(self):
236
- """Create paths for output files"""
237
  unique_id = str(uuid.uuid4())
238
  audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
239
  srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
240
-
241
  self.output_files.append((srt_path, audio_path))
242
  self.cleanup_old_files()
243
-
244
  return srt_path, audio_path
245
 
246
  def cleanup_old_files(self):
247
- """Clean up old output files, keeping only the most recent ones"""
248
  if len(self.output_files) > self.max_files_to_keep:
249
  old_files_to_remove = self.output_files[:-self.max_files_to_keep]
250
  for srt_path, audio_path in old_files_to_remove:
@@ -256,7 +191,6 @@ class FileManager:
256
  self.output_files = self.output_files[-self.max_files_to_keep:]
257
 
258
  def cleanup_all(self):
259
- """Clean up all managed files"""
260
  for srt_path, audio_path in self.output_files:
261
  try:
262
  if os.path.exists(srt_path): os.remove(srt_path)
@@ -275,19 +209,15 @@ async def generate_accurate_srt(
275
  words_per_line: int, lines_per_segment: int,
276
  progress_callback=None, parallel: bool = True, max_workers: int = 4
277
  ) -> Tuple[str, str]:
278
- """Generate accurate SRT with parallel processing option"""
279
  processor = TextProcessor(words_per_line, lines_per_segment)
280
  segments = processor.split_into_segments(text)
281
  total_segments = len(segments)
282
-
283
  if progress_callback:
284
  progress_callback(0.1, "Text segmentation complete")
285
-
286
  processed_segments = []
287
  if parallel and total_segments > 1:
288
  semaphore = asyncio.Semaphore(max_workers)
289
  processed_count = 0
290
-
291
  async def process_with_semaphore(segment):
292
  async with semaphore:
293
  nonlocal processed_count
@@ -297,10 +227,8 @@ async def generate_accurate_srt(
297
  progress = 0.1 + (0.8 * processed_count / total_segments)
298
  progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
299
  return result
300
-
301
  tasks = [process_with_semaphore(s) for s in segments]
302
  results = await asyncio.gather(*tasks, return_exceptions=True)
303
-
304
  for res in results:
305
  if isinstance(res, Exception):
306
  raise TTSError(f"A task failed during parallel processing: {res}")
@@ -312,11 +240,9 @@ async def generate_accurate_srt(
312
  if progress_callback:
313
  progress = 0.1 + (0.8 * (i + 1) / total_segments)
314
  progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
315
-
316
  processed_segments.sort(key=lambda s: s.id)
317
  if progress_callback:
318
  progress_callback(0.9, "Finalizing audio and subtitles")
319
-
320
  current_time = 0
321
  final_audio = AudioSegment.empty()
322
  srt_content = ""
@@ -326,33 +252,75 @@ async def generate_accurate_srt(
326
  srt_content += f"{segment.id}\n{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n{segment.text}\n\n"
327
  final_audio = final_audio.append(segment.audio, crossfade=0)
328
  current_time = segment.end_time
329
-
330
  srt_path, audio_path = file_manager.create_output_paths()
331
- try:
332
- export_params = {'format': 'mp3', 'bitrate': '192k', 'parameters': ['-ar', '44100', '-ac', '2', '-qscale:a', '2']}
333
- final_audio.export(audio_path, **export_params)
334
- with open(srt_path, "w", encoding='utf-8') as f: f.write(srt_content)
335
- except Exception as e:
336
- raise TTSError(f"Failed to export final files: {str(e)}")
337
-
338
  if progress_callback:
339
  progress_callback(1.0, "Complete!")
340
  return srt_path, audio_path
341
 
342
- # MODIFICATION: This function now returns a gr.update() object for the error textbox
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  async def process_text_with_progress(
344
  text, pitch, rate, voice, words_per_line,
345
  lines_per_segment, parallel_processing,
346
  progress=gr.Progress()
347
  ):
348
  """
349
- Processes text and returns audio, HTML links, and a gr.update object for status.
 
350
  """
 
351
  if not text or text.strip() == "":
352
- return None, "", gr.update(visible=True, value="Please enter some text to convert to speech.")
353
 
354
- pitch_str = f"{pitch:+d}Hz" if pitch != 0 else "+0Hz"
355
- rate_str = f"{rate:+d}%" if rate != 0 else "+0%"
356
 
357
  try:
358
  progress(0, "Preparing text...")
@@ -367,21 +335,19 @@ async def process_text_with_progress(
367
  parallel=parallel_processing
368
  )
369
 
370
- download_html = f"""
371
- <div style="text-align: center; padding-top: 10px;">
372
- <a href="/file={srt_path}" target="_blank" download="subtitles.srt" style="font-weight: 600; color: #0b5ed7; text-decoration: none; margin-right: 20px;">📥 Download SRT File</a>
373
- <a href="/file={audio_path}" target="_blank" download="audio.mp3" style="font-weight: 600; color: #0b5ed7; text-decoration: none;">📥 Download Audio File</a>
374
- </div>
375
- """
376
 
377
- # Return audio path, HTML links, and a gr.update object to hide the status
378
- return audio_path, download_html, gr.update(visible=False, value="")
379
- except TTSError as e:
380
- return None, "", gr.update(visible=True, value=f"TTS Error: {str(e)}")
381
  except Exception as e:
382
- return None, "", gr.update(visible=True, value=f"Unexpected error: {str(e)}")
 
 
 
 
383
 
384
- # Voice options dictionary
385
  voice_options = {
386
  "Andrew Male": "en-US-AndrewNeural", "Jenny Female": "en-US-JennyNeural", "Guy Male": "en-US-GuyNeural",
387
  "Ana Female": "en-US-AnaNeural", "Aria Female": "en-US-AriaNeural", "Brian Male": "en-US-BrianNeural",
@@ -422,15 +388,18 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
422
  parallel_processing = gr.Checkbox(label="Enable Parallel Processing", value=True, info="Faster conversion for longer texts.")
423
 
424
  submit_btn = gr.Button("Generate Audio & Subtitles", variant="primary")
425
- error_output = gr.Textbox(label="Status", visible=False, interactive=False)
426
 
 
 
427
  with gr.Row():
428
  with gr.Column(scale=2):
 
429
  audio_preview = gr.Audio(label="Preview Audio")
430
  with gr.Column(scale=1):
431
- download_links_output = gr.HTML(label="Download Files")
432
-
433
- # MODIFICATION: The outputs list is now simplified
 
434
  submit_btn.click(
435
  fn=process_text_with_progress,
436
  inputs=[
@@ -439,11 +408,11 @@ with gr.Blocks(title="Advanced TTS with Configurable SRT Generation") as app:
439
  ],
440
  outputs=[
441
  audio_preview,
442
- download_links_output,
443
- error_output,
444
  ],
445
  api_name="generate"
446
  )
 
447
 
448
  if __name__ == "__main__":
449
  app.launch()
 
12
  import math
13
  from dataclasses import dataclass
14
 
15
+ # No changes to these classes and helper functions
16
+ # (TimingManager, Segment, TextProcessor, TTSError, etc.)
17
+ # ...
18
  class TimingManager:
19
  def __init__(self):
20
  self.current_time = 0
 
44
  end_time: int = 0
45
  duration: int = 0
46
  audio: Optional[AudioSegment] = None
47
+ lines: List[str] = None
48
 
49
  class TextProcessor:
50
  def __init__(self, words_per_line: int, lines_per_segment: int):
51
  self.words_per_line = words_per_line
52
  self.lines_per_segment = lines_per_segment
53
  self.min_segment_words = 3
54
+ self.max_segment_words = words_per_line * lines_per_segment * 1.5
55
  self.punctuation_weights = {
56
+ '.': 1.0, '!': 1.0, '?': 1.0, ';': 0.8, ':': 0.7,
57
+ ',': 0.5, '-': 0.3, '(': 0.2, ')': 0.2
 
 
 
 
 
 
 
58
  }
59
 
60
  def analyze_sentence_complexity(self, text: str) -> float:
 
61
  words = text.split()
62
+ if not words: return 1.0
63
  complexity = 1.0
 
 
64
  if len(words) > self.words_per_line * 2:
65
  complexity *= 1.2
 
 
66
  punct_count = sum(text.count(p) for p in self.punctuation_weights.keys())
67
+ complexity *= (1 + (punct_count / len(words)) * 0.5)
 
 
68
  return complexity
69
 
70
  def find_natural_breaks(self, text: str) -> List[Tuple[int, float]]:
 
71
  breaks = []
72
  words = text.split()
 
73
  for i, word in enumerate(words):
74
  weight = 0
 
 
75
  for punct, punct_weight in self.punctuation_weights.items():
76
  if word.endswith(punct):
77
  weight = max(weight, punct_weight)
 
 
78
  phrase_starters = {'however', 'therefore', 'moreover', 'furthermore', 'meanwhile', 'although', 'because'}
79
  if i < len(words) - 1 and words[i+1].lower() in phrase_starters:
80
  weight = max(weight, 0.6)
 
 
81
  if i > self.min_segment_words:
82
  conjunctions = {'and', 'but', 'or', 'nor', 'for', 'yet', 'so'}
83
  if word.lower() in conjunctions:
84
  weight = max(weight, 0.4)
 
85
  if weight > 0:
86
  breaks.append((i, weight))
 
87
  return breaks
88
 
89
  def split_into_segments(self, text: str) -> List[Segment]:
 
90
  text = re.sub(r'\s+', ' ', text.strip())
91
  text = re.sub(r'([.!?,;:])\s*', r'\1 ', text)
92
  text = re.sub(r'\s+([.!?,;:])', r'\1', text)
 
93
  segments = []
94
  words = text.split()
 
95
  i = 0
96
  while i < len(words):
 
97
  chunk_end = i + int(self.max_segment_words)
98
  chunk_text = ' '.join(words[i:chunk_end])
99
  complexity = self.analyze_sentence_complexity(chunk_text)
100
  breaks = self.find_natural_breaks(chunk_text)
 
101
  best_break = -1
102
  best_weight = -1
 
 
103
  ideal_length = self.words_per_line * self.lines_per_segment
 
104
  for break_idx, weight in breaks:
 
105
  distance_penalty = 1 - (abs(break_idx - ideal_length) / ideal_length) * 0.5
106
  score = weight * distance_penalty
 
107
  if score > best_weight:
108
  best_break = break_idx
109
  best_weight = score
 
110
  if best_break == -1:
 
111
  best_break = min(ideal_length, len(words) - 1 - i)
 
112
  segment_words = words[i : i + best_break + 1]
113
  segment_text = ' '.join(segment_words)
 
114
  lines = self.split_into_lines(segment_text)
115
  final_segment_text = '\n'.join(lines)
116
+ segments.append(Segment(id=len(segments) + 1, text=final_segment_text))
 
 
 
 
 
117
  i += best_break + 1
 
118
  return segments
119
 
120
  def split_into_lines(self, text: str) -> List[str]:
 
121
  words = text.split()
122
  lines = []
123
  current_line = []
124
  word_count = 0
 
125
  for word in words:
126
  current_line.append(word)
127
  word_count += 1
128
+ is_break = (word_count >= self.words_per_line or
129
+ any(word.endswith(p) for p in '.!?') or
130
+ (word_count >= self.words_per_line * 0.7 and
131
+ any(word.endswith(p) for p in ',;:')))
 
 
 
 
132
  if is_break and len(words) > word_count:
133
  lines.append(' '.join(current_line))
134
  current_line = []
135
  word_count = 0
 
136
  if current_line:
137
  lines.append(' '.join(current_line))
 
138
  return lines
139
 
140
  class TTSError(Exception):
 
141
  pass
142
 
143
  async def process_segment_with_timing(segment: Segment, voice: str, rate: str, pitch: str) -> Segment:
 
144
  temp_dir = tempfile.gettempdir()
145
  audio_file = os.path.join(temp_dir, f"temp_segment_{segment.id}_{uuid.uuid4()}.wav")
146
  try:
147
  segment_text = ' '.join(segment.text.split('\n'))
148
  tts = edge_tts.Communicate(segment_text, voice, rate=rate, pitch=pitch)
149
+ await tts.save(audio_file)
 
 
 
 
 
150
  if not os.path.exists(audio_file) or os.path.getsize(audio_file) == 0:
151
  raise TTSError(f"Generated audio file is empty or missing for segment {segment.id}")
152
+ segment.audio = AudioSegment.from_file(audio_file)
153
+ silence = AudioSegment.silent(duration=30)
154
+ segment.audio = silence + segment.audio + silence
155
+ segment.duration = len(segment.audio)
 
 
 
 
 
156
  return segment
157
  except Exception as e:
158
  if not isinstance(e, TTSError):
 
166
  pass
167
 
168
  class FileManager:
 
169
  def __init__(self):
170
  self.temp_dir = tempfile.mkdtemp(prefix="tts_app_")
171
  self.output_files = []
172
  self.max_files_to_keep = 5
173
 
174
  def create_output_paths(self):
 
175
  unique_id = str(uuid.uuid4())
176
  audio_path = os.path.join(self.temp_dir, f"final_audio_{unique_id}.mp3")
177
  srt_path = os.path.join(self.temp_dir, f"final_subtitles_{unique_id}.srt")
 
178
  self.output_files.append((srt_path, audio_path))
179
  self.cleanup_old_files()
 
180
  return srt_path, audio_path
181
 
182
  def cleanup_old_files(self):
 
183
  if len(self.output_files) > self.max_files_to_keep:
184
  old_files_to_remove = self.output_files[:-self.max_files_to_keep]
185
  for srt_path, audio_path in old_files_to_remove:
 
191
  self.output_files = self.output_files[-self.max_files_to_keep:]
192
 
193
  def cleanup_all(self):
 
194
  for srt_path, audio_path in self.output_files:
195
  try:
196
  if os.path.exists(srt_path): os.remove(srt_path)
 
209
  words_per_line: int, lines_per_segment: int,
210
  progress_callback=None, parallel: bool = True, max_workers: int = 4
211
  ) -> Tuple[str, str]:
 
212
  processor = TextProcessor(words_per_line, lines_per_segment)
213
  segments = processor.split_into_segments(text)
214
  total_segments = len(segments)
 
215
  if progress_callback:
216
  progress_callback(0.1, "Text segmentation complete")
 
217
  processed_segments = []
218
  if parallel and total_segments > 1:
219
  semaphore = asyncio.Semaphore(max_workers)
220
  processed_count = 0
 
221
  async def process_with_semaphore(segment):
222
  async with semaphore:
223
  nonlocal processed_count
 
227
  progress = 0.1 + (0.8 * processed_count / total_segments)
228
  progress_callback(progress, f"Processed {processed_count}/{total_segments} segments")
229
  return result
 
230
  tasks = [process_with_semaphore(s) for s in segments]
231
  results = await asyncio.gather(*tasks, return_exceptions=True)
 
232
  for res in results:
233
  if isinstance(res, Exception):
234
  raise TTSError(f"A task failed during parallel processing: {res}")
 
240
  if progress_callback:
241
  progress = 0.1 + (0.8 * (i + 1) / total_segments)
242
  progress_callback(progress, f"Processed {i + 1}/{total_segments} segments")
 
243
  processed_segments.sort(key=lambda s: s.id)
244
  if progress_callback:
245
  progress_callback(0.9, "Finalizing audio and subtitles")
 
246
  current_time = 0
247
  final_audio = AudioSegment.empty()
248
  srt_content = ""
 
252
  srt_content += f"{segment.id}\n{format_time_ms(segment.start_time)} --> {format_time_ms(segment.end_time)}\n{segment.text}\n\n"
253
  final_audio = final_audio.append(segment.audio, crossfade=0)
254
  current_time = segment.end_time
 
255
  srt_path, audio_path = file_manager.create_output_paths()
256
+ export_params = {'format': 'mp3', 'bitrate': '192k', 'parameters': ['-ar', '44100', '-ac', '2', '-qscale:a', '2']}
257
+ final_audio.export(audio_path, **export_params)
258
+ with open(srt_path, "w", encoding='utf-8') as f: f.write(srt_content)
 
 
 
 
259
  if progress_callback:
260
  progress_callback(1.0, "Complete!")
261
  return srt_path, audio_path
262
 
263
+ ### MODIFICATION START ###
264
+ # This new function creates the HTML for the download buttons using the JavaScript strategy.
265
+ def create_download_links_html(srt_path: str, audio_path: str) -> str:
266
+ """Generates an HTML string with JS-powered download links."""
267
+ if not srt_path or not audio_path:
268
+ return ""
269
+
270
+ srt_filename = os.path.basename(srt_path)
271
+ audio_filename = os.path.basename(audio_path)
272
+
273
+ # This JavaScript function handles the download without navigating the page.
274
+ js_download_logic = """
275
+ event.preventDefault();
276
+ fetch(this.href).then(resp => resp.blob()).then(blob => {
277
+ const url = window.URL.createObjectURL(blob);
278
+ const a = document.createElement('a');
279
+ a.style.display = 'none';
280
+ a.href = url;
281
+ a.download = this.getAttribute('download');
282
+ document.body.appendChild(a);
283
+ a.click();
284
+ window.URL.revokeObjectURL(url);
285
+ document.body.removeChild(a);
286
+ });
287
+ """
288
+
289
+ # Use the /file= relative path which Gradio provides for serving files.
290
+ srt_url = f"/file={srt_path}"
291
+ audio_url = f"/file={audio_path}"
292
+
293
+ # Combine both links into a single HTML string.
294
+ html = f"""
295
+ <div style="text-align: center; padding: 10px 0;">
296
+ <a href="{srt_url}" download="{srt_filename}" onclick="{js_download_logic}"
297
+ style="display: inline-block; padding: 8px 15px; background-color: #0b5ed7; color: white; text-decoration: none; border-radius: 5px; font-weight: 600; margin-right: 15px; cursor: pointer;">
298
+ 📥 Download SRT
299
+ </a>
300
+ <a href="{audio_url}" download="{audio_filename}" onclick="{js_download_logic}"
301
+ style="display: inline-block; padding: 8px 15px; background-color: #0b5ed7; color: white; text-decoration: none; border-radius: 5px; font-weight: 600; cursor: pointer;">
302
+ 📥 Download Audio
303
+ </a>
304
+ </div>
305
+ """
306
+ return html
307
+
308
+ # This main processing function is now simplified.
309
  async def process_text_with_progress(
310
  text, pitch, rate, voice, words_per_line,
311
  lines_per_segment, parallel_processing,
312
  progress=gr.Progress()
313
  ):
314
  """
315
+ Processes text, returns an audio path for the preview and an HTML string
316
+ that contains either the download links or an error message.
317
  """
318
+ # On validation failure, return None for the audio preview and an error HTML.
319
  if not text or text.strip() == "":
320
+ return None, "<p style='color:red; text-align:center;'>Please enter some text to convert.</p>"
321
 
322
+ pitch_str = f"{pitch:+d}Hz"
323
+ rate_str = f"{rate:+d}%"
324
 
325
  try:
326
  progress(0, "Preparing text...")
 
335
  parallel=parallel_processing
336
  )
337
 
338
+ # Get the JS-powered download links HTML.
339
+ download_html = create_download_links_html(srt_path, audio_path)
 
 
 
 
340
 
341
+ # Return the audio path for the player and the HTML for the download/status area.
342
+ return audio_path, download_html
343
+
 
344
  except Exception as e:
345
+ # On processing error, return None for audio and an error HTML.
346
+ error_message = f"An error occurred: {str(e)}"
347
+ return None, f"<p style='color:red; text-align:center;'>{error_message}</p>"
348
+
349
+ ### MODIFICATION END ###
350
 
 
351
  voice_options = {
352
  "Andrew Male": "en-US-AndrewNeural", "Jenny Female": "en-US-JennyNeural", "Guy Male": "en-US-GuyNeural",
353
  "Ana Female": "en-US-AnaNeural", "Aria Female": "en-US-AriaNeural", "Brian Male": "en-US-BrianNeural",
 
388
  parallel_processing = gr.Checkbox(label="Enable Parallel Processing", value=True, info="Faster conversion for longer texts.")
389
 
390
  submit_btn = gr.Button("Generate Audio & Subtitles", variant="primary")
 
391
 
392
+ ### MODIFICATION START ###
393
+ # The output area is simplified.
394
  with gr.Row():
395
  with gr.Column(scale=2):
396
+ # This component is for the audio player preview.
397
  audio_preview = gr.Audio(label="Preview Audio")
398
  with gr.Column(scale=1):
399
+ # This single HTML component will hold EITHER the download links OR an error message.
400
+ status_and_download_output = gr.HTML(label="Status & Downloads")
401
+
402
+ # The .click() event is now simpler and more robust.
403
  submit_btn.click(
404
  fn=process_text_with_progress,
405
  inputs=[
 
408
  ],
409
  outputs=[
410
  audio_preview,
411
+ status_and_download_output
 
412
  ],
413
  api_name="generate"
414
  )
415
+ ### MODIFICATION END ###
416
 
417
  if __name__ == "__main__":
418
  app.launch()