sreepathi-ravikumar commited on
Commit
b6cac61
·
verified ·
1 Parent(s): 68b396b

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +172 -110
video2.py CHANGED
@@ -42,165 +42,218 @@ warnings.filterwarnings('ignore')
42
  nest_asyncio.apply()
43
 
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  VOICE_EN = "en-IN-NeerjaNeural"
46
 
 
 
 
 
 
 
 
 
 
 
47
  def clean_text_for_tts(text):
48
- """Cleans text before TTS so only the spoken words are read."""
49
  if not text:
50
  return ""
51
  text = str(text).strip()
52
  text = html.unescape(text)
53
- # Remove URLs
54
- text = re.sub(r'https?://[^\s<>"\']+', '', text)
55
- text = re.sub(r'www\.[^\s<>"\']+', '', text)
56
- # Remove XML/HTML/SSML tags
57
- text = re.sub(r'<[^>]*>', '', text)
58
- text = re.sub(r'[<>]', '', text)
59
- text = re.sub(r'[\{\}\[\]]', '', text)
60
- # Remove problematic special characters
61
- text = re.sub(r'[#@$%^&*_+=|\\`~]', '', text)
62
- # Replace escape sequences
63
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
64
- # Remove unwanted SSML keywords
 
65
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
66
- text = re.sub(f'\\b{keyword}\\b', '', text, flags=re.IGNORECASE)
67
- # Unicode normalization and spacing
68
  text = unicodedata.normalize('NFKD', text)
69
- text = re.sub(r'\s+', ' ', text)
70
  return text.strip()
71
 
72
- async def generate_safe_audio(text, voice):
73
- """Generate clean, plain text audio using edge-tts."""
74
- cleaned_text = clean_text_for_tts(text)
75
- if not cleaned_text:
76
- return None
77
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
78
- fname = temp_file.name
79
- temp_file.close()
80
- try:
81
- comm = edge_tts.Communicate(cleaned_text, voice=voice)
82
- await comm.save(fname)
83
- return fname
84
- except Exception as e:
85
- print(f"Error generating audio: {e}")
86
- return None
 
 
 
 
 
87
 
 
88
  def smart_text_chunking(text, max_chars=80):
89
- """Split text into sensible, natural-length chunks for TTS."""
90
  text = clean_text_for_tts(text)
91
  if not text:
92
- return []
93
- sentences = re.split(r'(?<=[.!?])\s+', text)
 
94
  chunks = []
 
95
  for sentence in sentences:
96
  sentence = sentence.strip()
97
  if not sentence:
98
  continue
 
99
  if len(sentence) <= max_chars:
100
  chunks.append(sentence)
101
  else:
102
- sub_parts = re.split(r'(?<=[,;:])\s+', sentence)
103
  for part in sub_parts:
104
  part = part.strip()
105
- if part:
106
- if len(part) <= max_chars:
107
- chunks.append(part)
108
- else:
109
- words = part.split()
110
- current_chunk = ""
111
- for word in words:
112
- if len(current_chunk + " " + word) <= max_chars:
113
- current_chunk += " " + word if current_chunk else word
114
- else:
115
- if current_chunk:
116
- chunks.append(current_chunk.strip())
117
- current_chunk = word
118
- if current_chunk:
119
- chunks.append(current_chunk.strip())
120
- return [chunk for chunk in chunks if chunk.strip()]
 
 
 
 
121
 
122
- async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
123
- """Main fixed function for bilingual TTS output with concurrent audio generation for speed."""
124
- print("Starting fixed bilingual TTS processing...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  try:
126
  chunks = smart_text_chunking(text)
127
  if not chunks:
128
  print("Error: No valid text chunks after cleaning")
129
  return None
130
- print(f"Processing {len(chunks)} text chunks...")
 
131
 
132
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
 
 
 
 
 
133
  tasks = []
134
  for i, chunk in enumerate(chunks):
135
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
136
- if is_bilingual_tamil:
137
- voice = VOICE_TA if is_tamil else VOICE_EN
138
- else:
139
- voice = VOICE_TA
140
- lang_label = "Tamil" if is_tamil else "English"
141
- print(f"Chunk {i+1}/{len(chunks)} ({lang_label}): {chunk[:40]}...")
142
- tasks.append(generate_safe_audio(chunk, voice))
143
 
 
144
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
145
- processed_audio_files = [f for f in audio_files if isinstance(f, str)] # Filter successful files
146
- errors = [e for e in audio_files if isinstance(e, Exception)]
147
- if errors:
148
- for e in errors:
149
- print(f"Warning: Audio generation error: {e}")
150
 
151
  if not processed_audio_files:
152
  print("Error: No audio was successfully generated")
153
  return None
154
 
155
- merged_audio = None
156
- for audio_file in processed_audio_files:
157
- try:
158
- segment = AudioSegment.from_file(audio_file)
159
- segment = normalize(segment)
160
- # Only strip silence if segment is reasonably long
161
- if len(segment) > 200:
162
- try:
163
- segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
164
- except Exception as e:
165
- print(f" (Info) Skipped strip_silence: {e}")
166
- if merged_audio is None:
167
- merged_audio = segment
168
- else:
169
- pause = AudioSegment.silent(duration=200)
170
- merged_audio += pause + segment
171
- except Exception as audio_error:
172
- print(f"Warning: Error processing audio: {audio_error}")
173
- continue
174
 
175
- if merged_audio is None:
176
  print("Error: No audio segments were successfully processed")
177
  return None
178
 
179
- # Improved quality: Apply overall compression and normalization
180
- merged_audio = merged_audio.compress_dynamic_range(threshold=-20.0, ratio=4.0, attack=5.0, release=50.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  merged_audio = normalize(merged_audio)
182
 
183
- merged_audio.export(output_file, format="mp3", bitrate="192k") # Increased bitrate for better quality
 
184
  print(f"✅ Audio successfully generated: {output_file}")
185
 
186
- # Cleanup temp files
187
- for temp_file in processed_audio_files:
188
- try:
189
- if os.path.exists(temp_file):
190
- os.unlink(temp_file)
191
- except:
192
- pass
193
-
194
  return output_file
 
195
  except Exception as main_error:
196
  print(f"Main error in bilingual TTS: {main_error}")
197
  return None
198
 
199
- # USAGE EXAMPLE
200
- async def run_fixed_tts(text_input, output_file, lang):
201
- await bilingual_tts_fixed(text_input, output_file, lang)
202
-
203
- async def generate_tts(id, lines, lang):
204
  voice = {
205
  "English": "en-US-JennyNeural",
206
  "Tamil": "ta-IN-PallaviNeural",
@@ -235,25 +288,33 @@ async def generate_tts(id, lines, lang):
235
  "Czech": "cs-CZ-VlastaNeural",
236
  "Hungarian": "hu-HU-NoemiNeural"
237
  }
 
238
  audio_name = f"audio{id}.mp3"
239
- audio_path = os.path.join(AUDIO_DIR, audio_name) # Assuming AUDIO_DIR is defined elsewhere
 
240
  if "&&&" in lang:
241
  listf = lang.split("&&&")
242
  text = listf[0].strip()
243
  lang_name = listf[1].strip()
244
- voice_to_use = voice[lang_name]
245
  else:
246
- text = lines[id] # Assuming lines is a dict or list indexed by id
247
- voice_to_use = voice[lang]
248
- output = await run_fixed_tts(text, audio_path, voice_to_use)
249
- if os.path.exists(audio_path):
 
 
 
250
  audio = MP3(audio_path)
251
  duration = audio.info.length
252
  return duration, audio_path
 
253
  return None, None
254
 
255
  def audio_func(id, lines, lang):
256
- return asyncio.run(generate_tts(id, lines, lang))
 
 
257
  #-----------------------------
258
  #---------------------------------
259
  import os
@@ -288,4 +349,5 @@ def video_func(id, lines, lang):
288
  return final_video_path
289
  else:
290
  print("Video generation failed.")
291
- return None
 
 
42
  nest_asyncio.apply()
43
 
44
 
45
+ import re
46
+ import html
47
+ import unicodedata
48
+ import tempfile
49
+ import os
50
+ import asyncio
51
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
52
+ from functools import lru_cache
53
+ import edge_tts
54
+ from pydub import AudioSegment
55
+ from pydub.effects import normalize
56
+ from mutagen.mp3 import MP3
57
+
58
  VOICE_EN = "en-IN-NeerjaNeural"
59
 
60
+ # Pre-compiled regex patterns for speed (compiled once, reused many times)
61
+ URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
62
+ TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
63
+ BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
64
+ SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
65
+ WHITESPACE_PATTERN = re.compile(r'\s+')
66
+ SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
67
+ SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
68
+
69
+ @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
70
  def clean_text_for_tts(text):
71
+ """Cleans text before TTS with optimized regex and caching."""
72
  if not text:
73
  return ""
74
  text = str(text).strip()
75
  text = html.unescape(text)
76
+
77
+ # Use pre-compiled patterns (much faster)
78
+ text = URL_PATTERN.sub('', text)
79
+ text = TAG_PATTERN.sub('', text)
80
+ text = BRACKET_PATTERN.sub('', text)
81
+ text = SPECIAL_CHAR_PATTERN.sub('', text)
 
 
 
 
82
  text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
83
+
84
+ # Batch remove keywords (faster than multiple re.sub calls)
85
  for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
86
+ text = text.replace(keyword, '').replace(keyword.upper(), '')
87
+
88
  text = unicodedata.normalize('NFKD', text)
89
+ text = WHITESPACE_PATTERN.sub(' ', text)
90
  return text.strip()
91
 
92
+ async def generate_safe_audio(text, voice, semaphore):
93
+ """Generate clean audio with rate limiting."""
94
+ async with semaphore: # Limit concurrent TTS requests
95
+ cleaned_text = clean_text_for_tts(text)
96
+ if not cleaned_text:
97
+ return None
98
+
99
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
100
+ fname = temp_file.name
101
+ temp_file.close()
102
+
103
+ try:
104
+ comm = edge_tts.Communicate(cleaned_text, voice=voice)
105
+ await comm.save(fname)
106
+ return fname
107
+ except Exception as e:
108
+ print(f"Error generating audio: {e}")
109
+ if os.path.exists(fname):
110
+ os.unlink(fname)
111
+ return None
112
 
113
+ @lru_cache(maxsize=256)
114
  def smart_text_chunking(text, max_chars=80):
115
+ """Cached text chunking for speed."""
116
  text = clean_text_for_tts(text)
117
  if not text:
118
+ return tuple() # Return tuple for hashability (required by lru_cache)
119
+
120
+ sentences = SENTENCE_PATTERN.split(text)
121
  chunks = []
122
+
123
  for sentence in sentences:
124
  sentence = sentence.strip()
125
  if not sentence:
126
  continue
127
+
128
  if len(sentence) <= max_chars:
129
  chunks.append(sentence)
130
  else:
131
+ sub_parts = SUB_PATTERN.split(sentence)
132
  for part in sub_parts:
133
  part = part.strip()
134
+ if not part:
135
+ continue
136
+
137
+ if len(part) <= max_chars:
138
+ chunks.append(part)
139
+ else:
140
+ words = part.split()
141
+ current_chunk = ""
142
+ for word in words:
143
+ test_chunk = f"{current_chunk} {word}" if current_chunk else word
144
+ if len(test_chunk) <= max_chars:
145
+ current_chunk = test_chunk
146
+ else:
147
+ if current_chunk:
148
+ chunks.append(current_chunk.strip())
149
+ current_chunk = word
150
+ if current_chunk:
151
+ chunks.append(current_chunk.strip())
152
+
153
+ return tuple(chunk for chunk in chunks if chunk.strip())
154
 
155
+ def process_audio_segment_fast(audio_file):
156
+ """Fast audio processing in separate thread."""
157
+ try:
158
+ segment = AudioSegment.from_file(audio_file)
159
+ segment = normalize(segment)
160
+
161
+ # Only strip silence for longer segments
162
+ if len(segment) > 200:
163
+ try:
164
+ segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
165
+ except:
166
+ pass # Skip if fails
167
+
168
+ return segment
169
+ except Exception as e:
170
+ print(f"Warning: Error processing audio segment: {e}")
171
+ return None
172
+ finally:
173
+ # Cleanup temp file immediately
174
+ try:
175
+ if os.path.exists(audio_file):
176
+ os.unlink(audio_file)
177
+ except:
178
+ pass
179
+
180
+ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
181
+ """Ultra-optimized bilingual TTS with parallel processing."""
182
+ print("Starting optimized bilingual TTS processing...")
183
+
184
  try:
185
  chunks = smart_text_chunking(text)
186
  if not chunks:
187
  print("Error: No valid text chunks after cleaning")
188
  return None
189
+
190
+ print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
191
 
192
  is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
193
+
194
+ # Semaphore to limit concurrent TTS requests (prevents rate limiting)
195
+ semaphore = asyncio.Semaphore(max_concurrent)
196
+
197
+ # Prepare all tasks
198
  tasks = []
199
  for i, chunk in enumerate(chunks):
200
  is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
201
+ voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
202
+ tasks.append(generate_safe_audio(chunk, voice, semaphore))
 
 
 
 
 
203
 
204
+ # Generate all audio files concurrently
205
  audio_files = await asyncio.gather(*tasks, return_exceptions=True)
206
+
207
+ # Filter successful files
208
+ processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
 
 
209
 
210
  if not processed_audio_files:
211
  print("Error: No audio was successfully generated")
212
  return None
213
 
214
+ print(f"Successfully generated {len(processed_audio_files)} audio segments")
215
+
216
+ # Process audio segments in parallel using ThreadPoolExecutor
217
+ with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
218
+ audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
219
+
220
+ # Filter out None segments
221
+ audio_segments = [seg for seg in audio_segments if seg is not None]
 
 
 
 
 
 
 
 
 
 
 
222
 
223
+ if not audio_segments:
224
  print("Error: No audio segments were successfully processed")
225
  return None
226
 
227
+ # Merge audio segments (fast concatenation)
228
+ print("Merging audio segments...")
229
+ merged_audio = audio_segments[0]
230
+ pause = AudioSegment.silent(duration=200)
231
+
232
+ for segment in audio_segments[1:]:
233
+ merged_audio += pause + segment
234
+
235
+ # Apply final processing (compression and normalization)
236
+ print("Applying final audio processing...")
237
+ merged_audio = merged_audio.compress_dynamic_range(
238
+ threshold=-20.0,
239
+ ratio=4.0,
240
+ attack=5.0,
241
+ release=50.0
242
+ )
243
  merged_audio = normalize(merged_audio)
244
 
245
+ # Export with high quality
246
+ merged_audio.export(output_file, format="mp3", bitrate="192k")
247
  print(f"✅ Audio successfully generated: {output_file}")
248
 
 
 
 
 
 
 
 
 
249
  return output_file
250
+
251
  except Exception as main_error:
252
  print(f"Main error in bilingual TTS: {main_error}")
253
  return None
254
 
255
+ async def generate_tts_optimized(id, lines, lang):
256
+ """Optimized TTS generation function."""
 
 
 
257
  voice = {
258
  "English": "en-US-JennyNeural",
259
  "Tamil": "ta-IN-PallaviNeural",
 
288
  "Czech": "cs-CZ-VlastaNeural",
289
  "Hungarian": "hu-HU-NoemiNeural"
290
  }
291
+
292
  audio_name = f"audio{id}.mp3"
293
+ audio_path = os.path.join(AUDIO_DIR, audio_name)
294
+
295
  if "&&&" in lang:
296
  listf = lang.split("&&&")
297
  text = listf[0].strip()
298
  lang_name = listf[1].strip()
299
+ voice_to_use = voice.get(lang_name, VOICE_EN)
300
  else:
301
+ text = lines[id]
302
+ voice_to_use = voice.get(lang, VOICE_EN)
303
+
304
+ # Increase max_concurrent for more speed (adjust based on your system)
305
+ output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
306
+
307
+ if output and os.path.exists(audio_path):
308
  audio = MP3(audio_path)
309
  duration = audio.info.length
310
  return duration, audio_path
311
+
312
  return None, None
313
 
314
  def audio_func(id, lines, lang):
315
+ """Synchronous wrapper for audio generation."""
316
+ return asyncio.run(generate_tts_optimized(id, lines, lang))
317
+
318
  #-----------------------------
319
  #---------------------------------
320
  import os
 
349
  return final_video_path
350
  else:
351
  print("Video generation failed.")
352
+ return None
353
+