sreepathi-ravikumar commited on
Commit
cb6e92f
·
verified ·
1 Parent(s): 2e35fcf

Update video2.py

Browse files
Files changed (1) hide show
  1. video2.py +228 -351
video2.py CHANGED
@@ -40,402 +40,279 @@ for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
40
  warnings.filterwarnings('ignore')
41
  nest_asyncio.apply()
42
 
43
- import re
44
  import html
 
45
  import tempfile
46
  import os
47
  import asyncio
48
- import random
49
  from functools import lru_cache
50
  import edge_tts
51
  from pydub import AudioSegment
52
- from pydub.effects import normalize, compress_dynamic_range, low_pass_filter, high_pass_filter
53
- from pydub.scipy_effects import eq
54
  from mutagen.mp3 import MP3
55
- import numpy as np
56
-
57
- # --- Configuration ---
58
- AUDIO_DIR = "output_audio"
59
- os.makedirs(AUDIO_DIR, exist_ok=True)
60
-
61
- # Voice Mapping
62
- VOICE_MAPPING = {
63
- "English": "en-IN-NeerjaNeural",
64
- "Tamil": "ta-IN-PallaviNeural",
65
- "Hindi": "hi-IN-SwaraNeural",
66
- }
67
 
68
- # Indic script detection
69
- INDIC_SCRIPT_PATTERN = re.compile(r'[ऀ-ൿ]+')
70
 
71
- # === ELEVENLABS-STYLE SETTINGS ===
72
- CROSSFADE_LANG_SWITCH = 80 # Longer crossfade for language switches
73
- CROSSFADE_SAME_LANG = 25 # Short crossfade for same language
74
- BREATH_PAUSE_MS = 120 # Natural breath at sentence end
75
- MICRO_PAUSE_MS = 40 # Tiny pause at commas
76
- TARGET_DBFS = -16.0 # Podcast-quality loudness
77
- COMPRESSION_RATIO = 1.8 # Gentle compression (not squashed)
 
78
 
79
- @lru_cache(maxsize=1024)
80
- def clean_text(text):
81
- if not text: return ""
82
- text = html.unescape(str(text))
83
- text = re.sub(r'https?://S+', '', text)
84
- text = re.sub(r'[*#<>[]{}]', '', text)
85
- text = re.sub(r's+', ' ', text).strip()
86
- return text
87
-
88
- def detect_language_group(word):
89
- """Detect if word is Indic or English."""
90
- if INDIC_SCRIPT_PATTERN.search(word):
91
- return 'indic'
92
- return 'english'
93
-
94
- def analyze_punctuation(text):
95
- """
96
- Determines pause type based on ending punctuation.
97
- Returns: ('breath', 'micro', 'none')
98
- """
99
- text = text.rstrip()
100
- if text.endswith(('.', '!', '?', '।')):
101
- return 'breath' # Full stop = breath pause
102
- elif text.endswith((',', ';', ':')):
103
- return 'micro' # Comma = tiny pause
104
- return 'none'
105
-
106
- def split_with_context(text):
107
- """
108
- Splits text by language while preserving punctuation context.
109
- Returns: [(text, lang_type, pause_type), ...]
110
- """
111
- text = clean_text(text)
112
- words = text.split(' ')
113
 
114
- segments = []
115
- current_chunk = []
116
- current_type = None
117
 
118
- for word in words:
119
- clean_word = word.strip(".,!?;:।")
 
 
 
 
120
 
121
- if not clean_word:
122
- if current_chunk:
123
- current_chunk.append(word)
124
- continue
 
 
 
 
 
 
 
 
 
125
 
126
- word_type = detect_language_group(clean_word)
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- if current_type is None:
129
- current_type = word_type
130
- current_chunk.append(word)
131
- elif word_type == current_type:
132
- current_chunk.append(word)
133
  else:
134
- # Save chunk with pause info
135
- chunk_text = " ".join(current_chunk)
136
- pause_type = analyze_punctuation(chunk_text)
137
- segments.append((chunk_text, current_type, pause_type))
138
-
139
- current_chunk = [word]
140
- current_type = word_type
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- # Final chunk
143
- if current_chunk:
144
- chunk_text = " ".join(current_chunk)
145
- pause_type = analyze_punctuation(chunk_text)
146
- segments.append((chunk_text, current_type, pause_type))
147
-
148
- return segments
149
 
150
- async def generate_segment_audio(text, voice, rate_limit_sem, lang_type):
151
- """Generate audio with optimized speech rate."""
152
- if not text.strip():
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  return None
154
-
155
- async with rate_limit_sem:
156
  try:
157
- # Add jitter to prevent rate limiting
158
- await asyncio.sleep(random.uniform(0.05, 0.15))
159
-
160
- fd, path = tempfile.mkstemp(suffix=".mp3")
161
- os.close(fd)
162
-
163
- # 🔥 SPEED OPTIMIZATION: Match syllable density
164
- # Tamil has more syllables per word, so English needs to speed up
165
- if lang_type == 'english':
166
- rate = "+12%" # Faster to match Tamil flow
167
- else:
168
- rate = "+3%" # Slightly faster for tighter delivery
169
-
170
- # Pitch variation for naturalness
171
- pitch = "+0Hz"
172
-
173
- comm = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
174
- await comm.save(path)
175
- return path
176
- except Exception as e:
177
- print(f"Error generating segment '{text[:30]}...': {e}")
178
- return None
179
 
180
- def apply_pro_audio_processing(audio_segment):
181
- """
182
- 🎚️ PROFESSIONAL AUDIO MASTERING
183
- - EQ for clarity
184
- - De-essing
185
- - Gentle compression
186
- - Warmth enhancement
187
- """
188
  try:
189
- # 1. High-pass filter: Remove rumble below 80Hz
190
- audio_segment = high_pass_filter(audio_segment, 80)
 
 
191
 
192
- # 2. Presence boost: 2-4kHz for voice clarity (like ElevenLabs)
193
- audio_segment = eq(audio_segment, focus_freq=3000, bandwidth=1000, gain_dB=2.5)
194
 
195
- # 3. De-essing: Reduce harsh 's' sounds (6-8kHz)
196
- audio_segment = eq(audio_segment, focus_freq=7000, bandwidth=2000, gain_dB=-3)
197
 
198
- # 4. Warmth: Gentle low-mid boost (200-400Hz)
199
- audio_segment = eq(audio_segment, focus_freq=300, bandwidth=200, gain_dB=1.5)
200
 
201
- # 5. Low-pass filter: Remove digital harshness above 12kHz
202
- audio_segment = low_pass_filter(audio_segment, 12000)
 
 
 
 
203
 
204
- return audio_segment
205
- except:
206
- # Fallback if scipy not available
207
- return audio_segment
208
-
209
- def create_natural_breath(duration_ms=120):
210
- """
211
- Creates a subtle breath sound (silence with very quiet noise).
212
- This mimics human breathing between sentences.
213
- """
214
- # Pure silence for now (can add pink noise for realism)
215
- return AudioSegment.silent(duration=duration_ms)
216
-
217
- def intelligent_crossfade(audio1, audio2, lang1, lang2, pause_type):
218
- """
219
- 🧠 SMART CROSSFADE LOGIC
220
- - Language switch: Long crossfade (80ms) for smooth tonal blend
221
- - Same language: Short crossfade (25ms) for tight flow
222
- - Punctuation: Insert breath pause before crossfade
223
- """
224
-
225
- # If previous segment ended with punctuation, add breath
226
- if pause_type == 'breath':
227
- breath = create_natural_breath(BREATH_PAUSE_MS)
228
- audio1 = audio1 + breath
229
- crossfade_duration = 15 # Short crossfade after breath
230
- elif pause_type == 'micro':
231
- breath = create_natural_breath(MICRO_PAUSE_MS)
232
- audio1 = audio1 + breath
233
- crossfade_duration = 10
234
- else:
235
- # No punctuation - determine crossfade by language switch
236
- if lang1 != lang2:
237
- crossfade_duration = CROSSFADE_LANG_SWITCH # Long for tonal blend
238
- else:
239
- crossfade_duration = CROSSFADE_SAME_LANG # Short for flow
240
-
241
- try:
242
- return audio1.append(audio2, crossfade=crossfade_duration)
243
- except:
244
- # If segment too short, direct append
245
- return audio1 + audio2
246
-
247
- def trim_silence_smart(audio_segment, silence_thresh=-48):
248
- """
249
- Trims Edge TTS's excessive pauses while preserving micro-breaths.
250
- Keeps 15ms at edges for natural attack/release.
251
- """
252
- try:
253
- non_silent = audio_segment.detect_nonsilent(
254
- min_silence_len=40,
255
- silence_thresh=silence_thresh
256
- )
257
 
258
- if not non_silent:
259
- return audio_segment
 
260
 
261
- start = max(0, non_silent[0][0] - 15) # Keep 15ms breath
262
- end = min(len(audio_segment), non_silent[-1][1] + 15)
263
 
264
- return audio_segment[start:end]
265
- except:
266
- return audio_segment
267
-
268
- def apply_micro_dynamics(audio_segment):
269
- """
270
- Apply 3ms fade-in/out to prevent digital clicks.
271
- This is crucial for clean crossfades.
272
- """
273
- return audio_segment.fade_in(3).fade_out(3)
274
-
275
- def match_loudness(audio_segment, target_dbfs=TARGET_DBFS):
276
- """
277
- RMS-based loudness matching (like ElevenLabs).
278
- Better than peak normalization.
279
- """
280
- change_in_dbfs = target_dbfs - audio_segment.dBFS
281
- return audio_segment.apply_gain(change_in_dbfs)
282
-
283
- async def process_segment(file_path, lang_type):
284
- """Process each segment with pro audio treatment."""
285
- if not file_path or not os.path.exists(file_path):
286
- return None
287
-
288
- try:
289
- audio = AudioSegment.from_mp3(file_path)
290
 
291
- # 1. Trim excessive silence
292
- audio = trim_silence_smart(audio, silence_thresh=-50)
293
 
294
- # 2. Match loudness (before processing)
295
- audio = match_loudness(audio, TARGET_DBFS)
 
296
 
297
- # 3. Professional EQ and mastering
298
- audio = apply_pro_audio_processing(audio)
 
 
299
 
300
- # 4. Add micro-fades to prevent clicks
301
- audio = apply_micro_dynamics(audio)
302
 
303
- return audio
304
- except Exception as e:
305
- print(f"Error processing segment: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  return None
307
- finally:
308
- try:
309
- os.remove(file_path)
310
- except:
311
- pass
312
 
313
- async def elevenlabs_quality_tts(full_text, output_file, native_lang_code):
314
- """
315
- 🎙️ ELEVENLABS-QUALITY TTS ENGINE
316
- Natural flow, professional mastering, intelligent crossfading.
317
- """
318
- print("🎬 Starting ElevenLabs-Quality TTS...")
319
-
320
- # 1. Split text with context
321
- segments_data = split_with_context(full_text)
322
-
323
- print(f"📊 Detected {len(segments_data)} segments:")
324
- for i, (text, lang_type, pause_type) in enumerate(segments_data):
325
- pause_icon = "🫁" if pause_type == 'breath' else "," if pause_type == 'micro' else "→"
326
- print(f" {i+1}. [{lang_type.upper()}] {pause_icon} : {text[:50]}...")
327
-
328
- # 2. Voice assignment
329
- native_voice = VOICE_MAPPING.get(native_lang_code, VOICE_MAPPING["Tamil"])
330
- english_voice = VOICE_MAPPING["English"]
331
-
332
- # 3. Generate audio segments
333
- print("🎤 Generating audio...")
334
- semaphore = asyncio.Semaphore(5)
335
-
336
- tasks = []
337
- for text_chunk, lang_type, pause_type in segments_data:
338
- voice = native_voice if lang_type == 'indic' else english_voice
339
- tasks.append(generate_segment_audio(text_chunk, voice, semaphore, lang_type))
 
 
 
 
 
 
 
 
 
340
 
341
- raw_files = await asyncio.gather(*tasks)
 
342
 
343
- # 4. Process segments in parallel
344
- print("🎚️ Applying professional audio processing...")
345
- process_tasks = []
346
- for i, file_path in enumerate(raw_files):
347
- lang_type = segments_data[i][1]
348
- process_tasks.append(process_segment(file_path, lang_type))
349
-
350
- processed_segments = await asyncio.gather(*process_tasks)
351
-
352
- # Filter valid segments
353
- valid_data = []
354
- for i, seg in enumerate(processed_segments):
355
- if seg is not None:
356
- valid_data.append({
357
- 'audio': seg,
358
- 'lang': segments_data[i][1],
359
- 'pause': segments_data[i][2]
360
- })
361
-
362
- if not valid_data:
363
- print("❌ No audio generated.")
364
- return None
365
-
366
- # 5. Intelligent stitching
367
- print("🧵 Stitching with intelligent crossfades...")
368
- final_audio = valid_data[0]['audio']
369
-
370
- for i in range(1, len(valid_data)):
371
- current_seg = valid_data[i]['audio']
372
- prev_lang = valid_data[i-1]['lang']
373
- prev_pause = valid_data[i-1]['pause']
374
- current_lang = valid_data[i]['lang']
375
-
376
- final_audio = intelligent_crossfade(
377
- final_audio,
378
- current_seg,
379
- prev_lang,
380
- current_lang,
381
- prev_pause
382
- )
383
-
384
- # 6. Final mastering pass
385
- print("🎛️ Final mastering...")
386
-
387
- # Gentle broadcast-quality compression
388
- final_audio = compress_dynamic_range(
389
- final_audio,
390
- threshold=-20.0, # Gentle threshold
391
- ratio=COMPRESSION_RATIO, # Light compression (1.8:1)
392
- attack=2.0, # Fast attack for clarity
393
- release=30.0 # Quick release for naturalness
394
- )
395
-
396
- # Final loudness normalization
397
- final_audio = normalize(final_audio)
398
-
399
- # 7. Export with high quality
400
- print("💾 Exporting...")
401
- final_audio.export(
402
- output_file,
403
- format="mp3",
404
- bitrate="256k", # High quality
405
- parameters=["-q:a", "0"] # Best VBR quality
406
- )
407
-
408
- print(f"✅ ElevenLabs-quality audio saved: {output_file}")
409
- return output_file
410
-
411
- # --- Wrapper ---
412
- async def generate_tts(id, lines, lang_input):
413
- if "&&&" in lang_input:
414
- parts = lang_input.split("&&&")
415
- text = parts[0].strip()
416
- lang_name = parts[1].strip()
417
  else:
418
  text = lines[id]
419
- lang_name = lang_input.strip()
420
-
421
- output_path = os.path.join(AUDIO_DIR, f"audio_{id}.mp3")
422
- result = await elevenlabs_quality_tts(text, output_path, lang_name)
423
 
424
- if result:
425
- audio_info = MP3(result)
426
- return audio_info.info.length, result
427
- else:
428
- return 0, None
429
-
430
-
431
-
 
432
 
433
  def audio_func(id, lines, lang):
434
- loop = asyncio.new_event_loop()
435
- asyncio.set_event_loop(loop)
436
- length, path=loop.run_until_complete(generate_tts(id, lines, lang))
437
- loop.close()
438
- return length, path
439
 
440
 
441
 
 
40
  warnings.filterwarnings('ignore')
41
  nest_asyncio.apply()
42
 
43
+ Import re
44
  import html
45
+ import unicodedata
46
  import tempfile
47
  import os
48
  import asyncio
49
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
50
  from functools import lru_cache
51
  import edge_tts
52
  from pydub import AudioSegment
53
+ from pydub.effects import normalize
 
54
  from mutagen.mp3 import MP3
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ VOICE_EN = "en-IN-NeerjaNeural"
 
57
 
58
+ # Pre-compiled regex patterns for speed (compiled once, reused many times)
59
+ URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
60
+ TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
61
+ BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
62
+ SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
63
+ WHITESPACE_PATTERN = re.compile(r'\s+')
64
+ SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
65
+ SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
66
 
67
+ @lru_cache(maxsize=1024) # Cache cleaned text to avoid re-processing
68
+ def clean_text_for_tts(text):
69
+ """Cleans text before TTS with optimized regex and caching."""
70
+ if not text:
71
+ return ""
72
+ text = str(text).strip()
73
+ text = html.unescape(text)
74
+
75
+ # Use pre-compiled patterns (much faster)
76
+ text = URL_PATTERN.sub('', text)
77
+ text = TAG_PATTERN.sub('', text)
78
+ text = BRACKET_PATTERN.sub('', text)
79
+ text = SPECIAL_CHAR_PATTERN.sub('', text)
80
+ text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
81
+
82
+ # Batch remove keywords (faster than multiple re.sub calls)
83
+ for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
84
+ text = text.replace(keyword, '').replace(keyword.upper(), '')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ text = unicodedata.normalize('NFKD', text)
87
+ text = WHITESPACE_PATTERN.sub(' ', text)
88
+ return text.strip()
89
 
90
+ async def generate_safe_audio(text, voice, semaphore):
91
+ """Generate clean audio with rate limiting."""
92
+ async with semaphore: # Limit concurrent TTS requests
93
+ cleaned_text = clean_text_for_tts(text)
94
+ if not cleaned_text:
95
+ return None
96
 
97
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
98
+ fname = temp_file.name
99
+ temp_file.close()
100
+
101
+ try:
102
+ comm = edge_tts.Communicate(cleaned_text, voice=voice)
103
+ await comm.save(fname)
104
+ return fname
105
+ except Exception as e:
106
+ print(f"Error generating audio: {e}")
107
+ if os.path.exists(fname):
108
+ os.unlink(fname)
109
+ return None
110
 
111
+ @lru_cache(maxsize=256)
112
+ def smart_text_chunking(text, max_chars=80):
113
+ """Cached text chunking for speed."""
114
+ text = clean_text_for_tts(text)
115
+ if not text:
116
+ return tuple() # Return tuple for hashability (required by lru_cache)
117
+
118
+ sentences = SENTENCE_PATTERN.split(text)
119
+ chunks = []
120
+
121
+ for sentence in sentences:
122
+ sentence = sentence.strip()
123
+ if not sentence:
124
+ continue
125
 
126
+ if len(sentence) <= max_chars:
127
+ chunks.append(sentence)
 
 
 
128
  else:
129
+ sub_parts = SUB_PATTERN.split(sentence)
130
+ for part in sub_parts:
131
+ part = part.strip()
132
+ if not part:
133
+ continue
134
+
135
+ if len(part) <= max_chars:
136
+ chunks.append(part)
137
+ else:
138
+ words = part.split()
139
+ current_chunk = ""
140
+ for word in words:
141
+ test_chunk = f"{current_chunk} {word}" if current_chunk else word
142
+ if len(test_chunk) <= max_chars:
143
+ current_chunk = test_chunk
144
+ else:
145
+ if current_chunk:
146
+ chunks.append(current_chunk.strip())
147
+ current_chunk = word
148
+ if current_chunk:
149
+ chunks.append(current_chunk.strip())
150
 
151
+ return tuple(chunk for chunk in chunks if chunk.strip())
 
 
 
 
 
 
152
 
153
+ def process_audio_segment_fast(audio_file):
154
+ """Fast audio processing in separate thread."""
155
+ try:
156
+ segment = AudioSegment.from_file(audio_file)
157
+ segment = normalize(segment)
158
+
159
+ # Only strip silence for longer segments
160
+ if len(segment) > 200:
161
+ try:
162
+ segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
163
+ except:
164
+ pass # Skip if fails
165
+
166
+ return segment
167
+ except Exception as e:
168
+ print(f"Warning: Error processing audio segment: {e}")
169
  return None
170
+ finally:
171
+ # Cleanup temp file immediately
172
  try:
173
+ if os.path.exists(audio_file):
174
+ os.unlink(audio_file)
175
+ except:
176
+ pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
+ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
179
+ """Ultra-optimized bilingual TTS with parallel processing."""
180
+ print("Starting optimized bilingual TTS processing...")
181
+
 
 
 
 
182
  try:
183
+ chunks = smart_text_chunking(text)
184
+ if not chunks:
185
+ print("Error: No valid text chunks after cleaning")
186
+ return None
187
 
188
+ print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
 
189
 
190
+ is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
 
191
 
192
+ # Semaphore to limit concurrent TTS requests (prevents rate limiting)
193
+ semaphore = asyncio.Semaphore(max_concurrent)
194
 
195
+ # Prepare all tasks
196
+ tasks = []
197
+ for i, chunk in enumerate(chunks):
198
+ is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
199
+ voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
200
+ tasks.append(generate_safe_audio(chunk, voice, semaphore))
201
 
202
+ # Generate all audio files concurrently
203
+ audio_files = await asyncio.gather(*tasks, return_exceptions=True)
204
+
205
+ # Filter successful files
206
+ processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
+ if not processed_audio_files:
209
+ print("Error: No audio was successfully generated")
210
+ return None
211
 
212
+ print(f"Successfully generated {len(processed_audio_files)} audio segments")
 
213
 
214
+ # Process audio segments in parallel using ThreadPoolExecutor
215
+ with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
216
+ audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
+ # Filter out None segments
219
+ audio_segments = [seg for seg in audio_segments if seg is not None]
220
 
221
+ if not audio_segments:
222
+ print("Error: No audio segments were successfully processed")
223
+ return None
224
 
225
+ # Merge audio segments (fast concatenation)
226
+ print("Merging audio segments...")
227
+ merged_audio = audio_segments[0]
228
+ pause = AudioSegment.silent(duration=200)
229
 
230
+ for segment in audio_segments[1:]:
231
+ merged_audio += pause + segment
232
 
233
+ # Apply final processing (compression and normalization)
234
+ print("Applying final audio processing...")
235
+ merged_audio = merged_audio.compress_dynamic_range(
236
+ threshold=-20.0,
237
+ ratio=4.0,
238
+ attack=5.0,
239
+ release=50.0
240
+ )
241
+ merged_audio = normalize(merged_audio)
242
+
243
+ # Export with high quality
244
+ merged_audio.export(output_file, format="mp3", bitrate="192k")
245
+ print(f"✅ Audio successfully generated: {output_file}")
246
+
247
+ return output_file
248
+
249
+ except Exception as main_error:
250
+ print(f"Main error in bilingual TTS: {main_error}")
251
  return None
 
 
 
 
 
252
 
253
+ async def generate_tts_optimized(id, lines, lang):
254
+ """Optimized TTS generation function."""
255
+ voice = {
256
+ "English": "en-US-JennyNeural",
257
+ "Tamil": "ta-IN-PallaviNeural",
258
+ "Hindi": "hi-IN-SwaraNeural",
259
+ "Malayalam": "ml-IN-SobhanaNeural",
260
+ "Kannada": "kn-IN-SapnaNeural",
261
+ "Telugu": "te-IN-ShrutiNeural",
262
+ "Bengali": "bn-IN-TanishaaNeural",
263
+ "Marathi": "mr-IN-AarohiNeural",
264
+ "Gujarati": "gu-IN-DhwaniNeural",
265
+ "Punjabi": "pa-IN-VaaniNeural",
266
+ "Urdu": "ur-IN-GulNeural",
267
+ "French": "fr-FR-DeniseNeural",
268
+ "German": "de-DE-KatjaNeural",
269
+ "Spanish": "es-ES-ElviraNeural",
270
+ "Italian": "it-IT-IsabellaNeural",
271
+ "Russian": "ru-RU-SvetlanaNeural",
272
+ "Japanese": "ja-JP-NanamiNeural",
273
+ "Korean": "ko-KR-SunHiNeural",
274
+ "Chinese": "zh-CN-XiaoxiaoNeural",
275
+ "Arabic": "ar-SA-ZariyahNeural",
276
+ "Portuguese": "pt-BR-FranciscaNeural",
277
+ "Dutch": "nl-NL-FennaNeural",
278
+ "Greek": "el-GR-AthinaNeural",
279
+ "Hebrew": "he-IL-HilaNeural",
280
+ "Turkish": "tr-TR-EmelNeural",
281
+ "Polish": "pl-PL-AgnieszkaNeural",
282
+ "Thai": "th-TH-AcharaNeural",
283
+ "Vietnamese": "vi-VN-HoaiMyNeural",
284
+ "Swedish": "sv-SE-SofieNeural",
285
+ "Finnish": "fi-FI-NooraNeural",
286
+ "Czech": "cs-CZ-VlastaNeural",
287
+ "Hungarian": "hu-HU-NoemiNeural"
288
+ }
289
 
290
+ audio_name = f"audio{id}.mp3"
291
+ audio_path = os.path.join(AUDIO_DIR, audio_name)
292
 
293
+ if "&&&" in lang:
294
+ listf = lang.split("&&&")
295
+ text = listf[0].strip()
296
+ lang_name = listf[1].strip()
297
+ voice_to_use = voice.get(lang_name, VOICE_EN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
298
  else:
299
  text = lines[id]
300
+ voice_to_use = voice.get(lang, VOICE_EN)
 
 
 
301
 
302
+ # Increase max_concurrent for more speed (adjust based on your system)
303
+ output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
304
+
305
+ if output and os.path.exists(audio_path):
306
+ audio = MP3(audio_path)
307
+ duration = audio.info.length
308
+ return duration, audio_path
309
+
310
+ return None, None
311
 
312
  def audio_func(id, lines, lang):
313
+ """Synchronous wrapper for audio generation."""
314
+ return asyncio.run(generate_tts_optimized(id, lines, lang))
315
+
 
 
316
 
317
 
318