sreepathi-ravikumar commited on
Commit
13cdf1c
·
verified ·
1 Parent(s): 7df00a9

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +118 -101
video2.py CHANGED
@@ -41,23 +41,25 @@ for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
41
  warnings.filterwarnings('ignore')
42
  nest_asyncio.apply()
43
 
44
-
45
  import re
46
  import html
47
  import unicodedata
48
  import tempfile
49
  import os
50
  import asyncio
51
- from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
52
  from functools import lru_cache
53
- import edge_tts
54
  from pydub import AudioSegment
55
  from pydub.effects import normalize
56
  from mutagen.mp3 import MP3
57
 
58
- VOICE_EN = "en-IN-NeerjaNeural"
 
 
 
59
 
60
- # Pre-compiled regex patterns for speed (compiled once, reused many times)
61
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
62
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
63
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -66,11 +68,11 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
66
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
67
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
68
 
69
- @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
70
  def clean_text_for_tts(text):
71
  """Cleans text before TTS with optimized regex and caching."""
72
- if not text:
73
- return ""
74
  text = str(text).strip()
75
  text = html.unescape(text)
76
 
@@ -87,36 +89,42 @@ def clean_text_for_tts(text):
87
 
88
  text = unicodedata.normalize('NFKD', text)
89
  text = WHITESPACE_PATTERN.sub(' ', text)
90
- return text.strip()
 
 
 
91
 
92
- async def generate_safe_audio(text, voice, semaphore):
93
- """Generate clean audio with rate limiting."""
94
- async with semaphore: # Limit concurrent TTS requests
95
- cleaned_text = clean_text_for_tts(text)
96
- if not cleaned_text:
97
- return None
98
-
99
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
100
- fname = temp_file.name
101
- temp_file.close()
102
-
103
- try:
104
- comm = edge_tts.Communicate(cleaned_text, voice=voice)
105
- await comm.save(fname)
106
  return fname
107
- except Exception as e:
108
- print(f"Error generating audio: {e}")
109
- if os.path.exists(fname):
110
- os.unlink(fname)
111
  return None
 
 
 
 
 
112
 
113
  @lru_cache(maxsize=256)
114
  def smart_text_chunking(text, max_chars=80):
115
  """Cached text chunking for speed."""
116
  text = clean_text_for_tts(text)
117
  if not text:
118
- return tuple() # Return tuple for hashability (required by lru_cache)
119
-
120
  sentences = SENTENCE_PATTERN.split(text)
121
  chunks = []
122
 
@@ -150,7 +158,7 @@ def smart_text_chunking(text, max_chars=80):
150
  if current_chunk:
151
  chunks.append(current_chunk.strip())
152
 
153
- return tuple(chunk for chunk in chunks if chunk.strip())
154
 
155
  def process_audio_segment_fast(audio_file):
156
  """Fast audio processing in separate thread."""
@@ -165,9 +173,10 @@ def process_audio_segment_fast(audio_file):
165
  except:
166
  pass # Skip if fails
167
 
 
168
  return segment
169
  except Exception as e:
170
- print(f"Warning: Error processing audio segment: {e}")
171
  return None
172
  finally:
173
  # Cleanup temp file immediately
@@ -177,9 +186,9 @@ def process_audio_segment_fast(audio_file):
177
  except:
178
  pass
179
 
180
- async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
181
- """Ultra-optimized bilingual TTS with parallel processing."""
182
- print("Starting optimized bilingual TTS processing...")
183
 
184
  try:
185
  chunks = smart_text_chunking(text)
@@ -189,33 +198,32 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
189
 
190
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
191
 
192
- is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
193
-
194
- # Semaphore to limit concurrent TTS requests (prevents rate limiting)
195
- semaphore = asyncio.Semaphore(max_concurrent)
196
-
197
- # Prepare all tasks
198
- tasks = []
199
- for i, chunk in enumerate(chunks):
200
- is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
201
- voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
202
- tasks.append(generate_safe_audio(chunk, voice, semaphore))
203
 
204
- # Generate all audio files concurrently
205
- audio_files = await asyncio.gather(*tasks, return_exceptions=True)
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- # Filter successful files
208
- processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
209
-
210
- if not processed_audio_files:
211
  print("Error: No audio was successfully generated")
212
  return None
213
 
214
- print(f"Successfully generated {len(processed_audio_files)} audio segments")
215
 
216
- # Process audio segments in parallel using ThreadPoolExecutor
217
- with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
218
- audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
219
 
220
  # Filter out None segments
221
  audio_segments = [seg for seg in audio_segments if seg is not None]
@@ -232,7 +240,7 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
232
  for segment in audio_segments[1:]:
233
  merged_audio += pause + segment
234
 
235
- # Apply final processing (compression and normalization)
236
  print("Applying final audio processing...")
237
  merged_audio = merged_audio.compress_dynamic_range(
238
  threshold=-20.0,
@@ -242,79 +250,88 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
242
  )
243
  merged_audio = normalize(merged_audio)
244
 
245
- # Export with high quality
 
246
  merged_audio.export(output_file, format="mp3", bitrate="192k")
247
- print(f"✅ Audio successfully generated: {output_file}")
248
-
249
- return output_file
 
 
 
250
 
251
  except Exception as main_error:
252
  print(f"Main error in bilingual TTS: {main_error}")
253
  return None
254
 
255
  async def generate_tts_optimized(id, lines, lang):
256
- """Optimized TTS generation function."""
257
- voice = {
258
- "English": "en-US-JennyNeural",
259
- "Tamil": "ta-IN-PallaviNeural",
260
- "Hindi": "hi-IN-SwaraNeural",
261
- "Malayalam": "ml-IN-SobhanaNeural",
262
- "Kannada": "kn-IN-SapnaNeural",
263
- "Telugu": "te-IN-ShrutiNeural",
264
- "Bengali": "bn-IN-TanishaaNeural",
265
- "Marathi": "mr-IN-AarohiNeural",
266
- "Gujarati": "gu-IN-DhwaniNeural",
267
- "Punjabi": "pa-IN-VaaniNeural",
268
- "Urdu": "ur-IN-GulNeural",
269
- "French": "fr-FR-DeniseNeural",
270
- "German": "de-DE-KatjaNeural",
271
- "Spanish": "es-ES-ElviraNeural",
272
- "Italian": "it-IT-IsabellaNeural",
273
- "Russian": "ru-RU-SvetlanaNeural",
274
- "Japanese": "ja-JP-NanamiNeural",
275
- "Korean": "ko-KR-SunHiNeural",
276
- "Chinese": "zh-CN-XiaoxiaoNeural",
277
- "Arabic": "ar-SA-ZariyahNeural",
278
- "Portuguese": "pt-BR-FranciscaNeural",
279
- "Dutch": "nl-NL-FennaNeural",
280
- "Greek": "el-GR-AthinaNeural",
281
- "Hebrew": "he-IL-HilaNeural",
282
- "Turkish": "tr-TR-EmelNeural",
283
- "Polish": "pl-PL-AgnieszkaNeural",
284
- "Thai": "th-TH-AcharaNeural",
285
- "Vietnamese": "vi-VN-HoaiMyNeural",
286
- "Swedish": "sv-SE-SofieNeural",
287
- "Finnish": "fi-FI-NooraNeural",
288
- "Czech": "cs-CZ-VlastaNeural",
289
- "Hungarian": "hu-HU-NoemiNeural"
 
290
  }
291
 
292
  audio_name = f"audio{id}.mp3"
293
  audio_path = os.path.join(AUDIO_DIR, audio_name)
294
 
 
295
  if "&&&" in lang:
296
  listf = lang.split("&&&")
297
  text = listf[0].strip()
298
  lang_name = listf[1].strip()
299
- voice_to_use = voice.get(lang_name, VOICE_EN)
300
  else:
301
  text = lines[id]
302
- voice_to_use = voice.get(lang, VOICE_EN)
303
 
304
- # Increase max_concurrent for more speed (adjust based on your system)
305
- output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
 
306
 
307
  if output and os.path.exists(audio_path):
308
  audio = MP3(audio_path)
309
  duration = audio.info.length
 
310
  return duration, audio_path
311
 
 
312
  return None, None
313
 
314
  def audio_func(id, lines, lang):
315
- """Synchronous wrapper for audio generation."""
316
- return asyncio.run(generate_tts_optimized(id, lines, lang))
317
-
318
  #-----------------------------
319
  #---------------------------------
320
  def video_func(id, lines, lang):
 
41
  warnings.filterwarnings('ignore')
42
  nest_asyncio.apply()
43
 
 
44
  import re
45
  import html
46
  import unicodedata
47
  import tempfile
48
  import os
49
  import asyncio
50
+ from concurrent.futures import ThreadPoolExecutor
51
  from functools import lru_cache
52
+ from gtts import gTTS # ADD: Import gTTS for replacement
53
  from pydub import AudioSegment
54
  from pydub.effects import normalize
55
  from mutagen.mp3 import MP3
56
 
57
+ # Global constants (unchanged)
58
+ AUDIO_DIR = os.path.join("/app/data", "sound") # Ensure this matches your BASE_DIR
59
+ os.makedirs(AUDIO_DIR, exist_ok=True)
60
+ VOICE_EN = "en" # CHANGE: For gTTS, use lang codes instead of full voice names
61
 
62
+ # Pre-compiled regex patterns (unchanged)
63
  URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
64
  TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
65
  BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 
68
  SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
69
  SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
70
 
71
+ @lru_cache(maxsize=1024)
72
  def clean_text_for_tts(text):
73
  """Cleans text before TTS with optimized regex and caching."""
74
+ if not text or text.isspace():
75
+ return "Default text for empty input" # Fallback for empty input
76
  text = str(text).strip()
77
  text = html.unescape(text)
78
 
 
89
 
90
  text = unicodedata.normalize('NFKD', text)
91
  text = WHITESPACE_PATTERN.sub(' ', text)
92
+ text = text.strip()
93
+ if not text:
94
+ return "Default text for empty input" # Ensure non-empty output
95
+ return text
96
 
97
+ def generate_safe_audio(text, lang): # CHANGE: Remove async/semaphore; gTTS is sync
98
+ """Generate clean audio with gTTS (synchronous)."""
99
+ cleaned_text = clean_text_for_tts(text)
100
+ print(f"Generating audio for text: {cleaned_text[:50]}... with lang: {lang}") # Debug log
101
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
102
+ fname = temp_file.name
103
+ temp_file.close()
104
+
105
+ try:
106
+ # Use gTTS with specified lang (e.g., 'en' for English, 'ta' for Tamil)
107
+ tts = gTTS(text=cleaned_text, lang=lang, slow=False) # slow=False for natural speed
108
+ tts.save(fname)
109
+ if os.path.exists(fname) and os.path.getsize(fname) > 0:
110
+ print(f"Audio generated: {fname}") # Debug log
111
  return fname
112
+ else:
113
+ print(f"Audio file {fname} is empty or missing") # Debug log
114
+ os.unlink(fname)
 
115
  return None
116
+ except Exception as e:
117
+ print(f"Error generating audio for '{cleaned_text[:20]}...': {e}") # Debug log
118
+ if os.path.exists(fname):
119
+ os.unlink(fname)
120
+ return None
121
 
122
  @lru_cache(maxsize=256)
123
  def smart_text_chunking(text, max_chars=80):
124
  """Cached text chunking for speed."""
125
  text = clean_text_for_tts(text)
126
  if not text:
127
+ return ("Default text for chunking",) # Non-empty fallback
 
128
  sentences = SENTENCE_PATTERN.split(text)
129
  chunks = []
130
 
 
158
  if current_chunk:
159
  chunks.append(current_chunk.strip())
160
 
161
+ return tuple(chunks) or ("Default text for chunking",) # Non-empty fallback
162
 
163
  def process_audio_segment_fast(audio_file):
164
  """Fast audio processing in separate thread."""
 
173
  except:
174
  pass # Skip if fails
175
 
176
+ print(f"Processed audio segment: {audio_file}") # Debug log
177
  return segment
178
  except Exception as e:
179
+ print(f"Warning: Error processing audio segment {audio_file}: {e}")
180
  return None
181
  finally:
182
  # Cleanup temp file immediately
 
186
  except:
187
  pass
188
 
189
+ def bilingual_tts_optimized(text, output_file="audio0.mp3", LANG_TA=None, max_concurrent=5):
190
+ """Ultra-optimized bilingual TTS with gTTS and parallel processing via threads."""
191
+ print(f"Starting gTTS bilingual TTS for output: {output_file}") # Debug log
192
 
193
  try:
194
  chunks = smart_text_chunking(text)
 
198
 
199
  print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
200
 
201
+ is_bilingual_tamil = LANG_TA is not None and LANG_TA == 'ta'
 
 
 
 
 
 
 
 
 
 
202
 
203
+ # Prepare all audio files using ThreadPoolExecutor (since gTTS is sync)
204
+ audio_files = []
205
+ with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
206
+ futures = []
207
+ for chunk in chunks:
208
+ is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
209
+ lang = LANG_TA if (is_bilingual_tamil and is_tamil) else (LANG_TA or VOICE_EN)
210
+ futures.append(executor.submit(generate_safe_audio, chunk, lang))
211
+
212
+ # Collect results
213
+ for future in futures:
214
+ result = future.result()
215
+ if result:
216
+ audio_files.append(result)
217
 
218
+ if not audio_files:
 
 
 
219
  print("Error: No audio was successfully generated")
220
  return None
221
 
222
+ print(f"Successfully generated {len(audio_files)} audio segments")
223
 
224
+ # Process audio segments in parallel using another ThreadPoolExecutor
225
+ with ThreadPoolExecutor(max_workers=min(len(audio_files), 4)) as executor:
226
+ audio_segments = list(executor.map(process_audio_segment_fast, audio_files))
227
 
228
  # Filter out None segments
229
  audio_segments = [seg for seg in audio_segments if seg is not None]
 
240
  for segment in audio_segments[1:]:
241
  merged_audio += pause + segment
242
 
243
+ # Apply final processing (compression and normalization) for quality
244
  print("Applying final audio processing...")
245
  merged_audio = merged_audio.compress_dynamic_range(
246
  threshold=-20.0,
 
250
  )
251
  merged_audio = normalize(merged_audio)
252
 
253
+ # Export with high quality (192k bitrate for better quality matching edge_tts)
254
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
255
  merged_audio.export(output_file, format="mp3", bitrate="192k")
256
+ if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
257
+ print(f"✅ Audio successfully generated: {output_file}")
258
+ return output_file
259
+ else:
260
+ print(f"Error: Audio file {output_file} is empty or not created")
261
+ return None
262
 
263
  except Exception as main_error:
264
  print(f"Main error in bilingual TTS: {main_error}")
265
  return None
266
 
267
  async def generate_tts_optimized(id, lines, lang):
268
+ """Optimized TTS generation function (now sync-wrapped for async compatibility)."""
269
+ # CHANGE: Map to gTTS lang codes (no neural voices; use standard lang)
270
+ lang_map = {
271
+ "English": "en",
272
+ "Tamil": "ta",
273
+ "Hindi": "hi",
274
+ "Malayalam": "ml",
275
+ "Kannada": "kn",
276
+ "Telugu": "te",
277
+ "Bengali": "bn",
278
+ "Marathi": "mr",
279
+ "Gujarati": "gu",
280
+ "Punjabi": "pa",
281
+ "Urdu": "ur",
282
+ "French": "fr",
283
+ "German": "de",
284
+ "Spanish": "es",
285
+ "Italian": "it",
286
+ "Russian": "ru",
287
+ "Japanese": "ja",
288
+ "Korean": "ko",
289
+ "Chinese": "zh",
290
+ "Arabic": "ar",
291
+ "Portuguese": "pt",
292
+ "Dutch": "nl",
293
+ "Greek": "el",
294
+ "Hebrew": "he",
295
+ "Turkish": "tr",
296
+ "Polish": "pl",
297
+ "Thai": "th",
298
+ "Vietnamese": "vi",
299
+ "Swedish": "sv",
300
+ "Finnish": "fi",
301
+ "Czech": "cs",
302
+ "Hungarian": "hu"
303
  }
304
 
305
  audio_name = f"audio{id}.mp3"
306
  audio_path = os.path.join(AUDIO_DIR, audio_name)
307
 
308
+ print(f"Generating audio for id {id}, lang: {lang}") # Debug log
309
  if "&&&" in lang:
310
  listf = lang.split("&&&")
311
  text = listf[0].strip()
312
  lang_name = listf[1].strip()
313
+ lang_to_use = lang_map.get(lang_name, VOICE_EN)
314
  else:
315
  text = lines[id]
316
+ lang_to_use = lang_map.get(lang, VOICE_EN)
317
 
318
+ print(f"Text for TTS: {text[:50]}...") # Debug log
319
+ # CHANGE: Call sync bilingual_tts_optimized (no async needed for gTTS)
320
+ output = bilingual_tts_optimized(text, audio_path, lang_to_use, max_concurrent=5)
321
 
322
  if output and os.path.exists(audio_path):
323
  audio = MP3(audio_path)
324
  duration = audio.info.length
325
+ print(f"Audio duration: {duration}s, path: {audio_path}") # Debug log
326
  return duration, audio_path
327
 
328
+ print(f"Audio generation failed for id {id}") # Debug log
329
  return None, None
330
 
331
  def audio_func(id, lines, lang):
332
+ """Synchronous wrapper for audio generation (unchanged, but now calls sync TTS)."""
333
+ # CHANGE: No asyncio.run needed since generate_tts_optimized is now sync
334
+ return generate_tts_optimized(id, lines, lang)
335
  #-----------------------------
336
  #---------------------------------
337
  def video_func(id, lines, lang):