backendprocesssuper1

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Nov 2, 2025

Commit

cecdb1a

verified ·

1 Parent(s): 505aba1

Update video2.py

Browse files

Files changed (1) hide show

video2.py +115 -196

video2.py CHANGED Viewed

@@ -46,18 +46,17 @@ import html
 import unicodedata
 import tempfile
 import os
 from functools import lru_cache
-from gtts import gTTS
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
-from concurrent.futures import ThreadPoolExecutor
-# Default voice/language settings
-DEFAULT_LANG = "en"
-# Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -66,55 +65,7 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
-# gTTS language mappings (ISO 639-1 codes)
-LANGUAGE_MAP = {
-    "English": "en",
-    "Tamil": "ta",
-    "Hindi": "hi",
-    "Malayalam": "ml",
-    "Kannada": "kn",
-    "Telugu": "te",
-    "Bengali": "bn",
-    "Marathi": "mr",
-    "Gujarati": "gu",
-    "Punjabi": "pa",
-    "Urdu": "ur",
-    "French": "fr",
-    "German": "de",
-    "Spanish": "es",
-    "Italian": "it",
-    "Russian": "ru",
-    "Japanese": "ja",
-    "Korean": "ko",
-    "Chinese": "zh-CN",
-    "Arabic": "ar",
-    "Portuguese": "pt",
-    "Dutch": "nl",
-    "Greek": "el",
-    "Hebrew": "he",
-    "Turkish": "tr",
-    "Polish": "pl",
-    "Thai": "th",
-    "Vietnamese": "vi",
-    "Swedish": "sv",
-    "Finnish": "fi",
-    "Czech": "cs",
-    "Hungarian": "hu"
-}
-# Unicode ranges for language detection
-LANGUAGE_UNICODE_RANGES = {
-    'ta': ('\u0B80', '\u0BFF'),  # Tamil
-    'hi': ('\u0900', '\u097F'),  # Hindi/Devanagari
-    'te': ('\u0C00', '\u0C7F'),  # Telugu
-    'kn': ('\u0C80', '\u0CFF'),  # Kannada
-    'ml': ('\u0D00', '\u0D7F'),  # Malayalam
-    'bn': ('\u0980', '\u09FF'),  # Bengali
-    'gu': ('\u0A80', '\u0AFF'),  # Gujarati
-    'pa': ('\u0A00', '\u0A7F'),  # Punjabi
-}
-@lru_cache(maxsize=1024)
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
     if not text:
@@ -122,14 +73,14 @@ def clean_text_for_tts(text):
     text = str(text).strip()
     text = html.unescape(text)
-    # Use pre-compiled patterns
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Remove TTS-specific keywords
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
@@ -137,19 +88,33 @@ def clean_text_for_tts(text):
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
-def detect_language(text):
-    """Detect language from text based on Unicode ranges."""
-    for lang_code, (start, end) in LANGUAGE_UNICODE_RANGES.items():
-        if any(start <= char <= end for char in text):
-            return lang_code
-    return 'en'  # Default to English
 @lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=100):
-    """Cached text chunking optimized for gTTS."""
     text = clean_text_for_tts(text)
     if not text:
-        return tuple()
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
@@ -186,95 +151,60 @@ def smart_text_chunking(text, max_chars=100):
     return tuple(chunk for chunk in chunks if chunk.strip())
-def generate_audio_chunk(args):
-    """Generate audio for a single chunk using gTTS."""
-    chunk, lang_code, chunk_idx = args
-    try:
-        cleaned_text = clean_text_for_tts(chunk)
-        if not cleaned_text:
-            return None
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-        fname = temp_file.name
-        temp_file.close()
-        # Generate TTS with gTTS
-        tts = gTTS(text=cleaned_text, lang=lang_code, slow=False)
-        tts.save(fname)
-        print(f"Generated chunk {chunk_idx + 1}: {len(cleaned_text)} chars")
-        return fname
-    except Exception as e:
-        print(f"Error generating audio chunk {chunk_idx}: {e}")
-        if os.path.exists(fname):
-            os.unlink(fname)
-        return None
-def process_audio_segment(audio_file):
-    """Process audio segment with normalization and silence stripping."""
     try:
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
-        # Strip silence for better quality
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
-                pass
         return segment
     except Exception as e:
         print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
         try:
             if os.path.exists(audio_file):
                 os.unlink(audio_file)
         except:
             pass
-def bilingual_tts_gtts(text, output_file="audio0.mp3", target_lang=None, max_workers=8):
-    """
-    Generate bilingual TTS audio using gTTS with parallel processing.
-    Args:
-        text: Input text (can contain multiple languages)
-        output_file: Output MP3 file path
-        target_lang: Primary language code (auto-detected if None)
-        max_workers: Number of parallel workers
-    Returns:
-        Path to generated audio file or None on error
-    """
-    print("Starting gTTS bilingual audio generation...")
     try:
-        # Chunk the text
-        chunks = smart_text_chunking(text, max_chars=100)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
-        print(f"Processing {len(chunks)} text chunks...")
-        # Detect languages for each chunk
-        chunk_args = []
-        for idx, chunk in enumerate(chunks):
-            # Detect language for this chunk
-            detected_lang = detect_language(chunk)
-            # Use target language if specified, otherwise use detected
-            lang_code = target_lang if target_lang else detected_lang
-            chunk_args.append((chunk, lang_code, idx))
-        # Generate audio chunks in parallel
-        with ThreadPoolExecutor(max_workers=max_workers) as executor:
-            audio_files = list(executor.map(generate_audio_chunk, chunk_args))
         # Filter successful files
-        processed_audio_files = [f for f in audio_files if f and os.path.exists(f)]
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
@@ -282,9 +212,9 @@ def bilingual_tts_gtts(text, output_file="audio0.mp3", target_lang=None, max_wor
         print(f"Successfully generated {len(processed_audio_files)} audio segments")
-        # Process audio segments in parallel
         with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
-            audio_segments = list(executor.map(process_audio_segment, processed_audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
@@ -293,107 +223,96 @@ def bilingual_tts_gtts(text, output_file="audio0.mp3", target_lang=None, max_wor
             print("Error: No audio segments were successfully processed")
             return None
-        # Merge audio segments
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
-        pause = AudioSegment.silent(duration=300)  # 300ms pause between segments
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
-        # Apply final processing for high quality
         print("Applying final audio processing...")
-        # Normalize audio
-        merged_audio = normalize(merged_audio)
-        # Apply dynamic range compression for better clarity
         merged_audio = merged_audio.compress_dynamic_range(
-            threshold=-20.0,
-            ratio=3.0,
-            attack=5.0,
             release=50.0
         )
-        # Final normalization
         merged_audio = normalize(merged_audio)
-        # Export with high quality settings
-        merged_audio.export(
-            output_file,
-            format="mp3",
-            bitrate="192k",
-            parameters=["-q:a", "0"]  # Highest quality
-        )
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
-        import traceback
-        traceback.print_exc()
         return None
-def generate_tts_gtts(id, lines, lang):
-    """
-    Generate TTS audio using gTTS.
-    Args:
-        id: Audio ID/index
-        lines: List of text lines
-        lang: Language specification (can include text with "&&&" separator)
-    Returns:
-        Tuple of (duration, audio_path) or (None, None) on error
-    """
-    # Ensure audio directory exists
-    os.makedirs(AUDIO_DIR, exist_ok=True)
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
-    # Parse language specification
     if "&&&" in lang:
-        parts = lang.split("&&&")
-        text = parts[0].strip()
-        lang_name = parts[1].strip()
-        lang_code = LANGUAGE_MAP.get(lang_name, DEFAULT_LANG)
     else:
-        text = lines[id] if isinstance(lines, list) and id < len(lines) else lines
-        lang_code = LANGUAGE_MAP.get(lang, DEFAULT_LANG)
-    print(f"\nGenerating audio {id} in language: {lang_code}")
-    print(f"Text preview: {text[:100]}...")
-    # Generate audio
-    output = bilingual_tts_gtts(text, audio_path, lang_code, max_workers=8)
     if output and os.path.exists(audio_path):
-        try:
-            audio = MP3(audio_path)
-            duration = audio.info.length
-            print(f"Generated audio duration: {duration:.2f} seconds")
-            return duration, audio_path
-        except Exception as e:
-            print(f"Error reading audio file: {e}")
-            return None, None
     return None, None
 def audio_func(id, lines, lang):
-    """
-    Main function to generate audio using gTTS.
-    Args:
-        id: Audio ID/index
-        lines: Text content (string or list)
-        lang: Language specification
-    Returns:
-        Tuple of (duration, audio_path)
-    """
-    return generate_tts_gtts(id, lines, lang)
 #-----------------------------

 import unicodedata
 import tempfile
 import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 from functools import lru_cache
+import edge_tts
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
+VOICE_EN = "en-IN-NeerjaNeural"
+# Pre-compiled regex patterns for speed (compiled once, reused many times)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
     if not text:
     text = str(text).strip()
     text = html.unescape(text)
+    # Use pre-compiled patterns (much faster)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Batch remove keywords (faster than multiple re.sub calls)
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+async def generate_safe_audio(text, voice, semaphore):
+    """Generate clean audio with rate limiting."""
+    async with semaphore:  # Limit concurrent TTS requests
+        cleaned_text = clean_text_for_tts(text)
+        if not cleaned_text:
+            return None
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        fname = temp_file.name
+        temp_file.close()
+        try:
+            comm = edge_tts.Communicate(cleaned_text, voice=voice)
+            await comm.save(fname)
+            return fname
+        except Exception as e:
+            print(f"Error generating audio: {e}")
+            if os.path.exists(fname):
+                os.unlink(fname)
+            return None
 @lru_cache(maxsize=256)
+def smart_text_chunking(text, max_chars=80):
+    """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
+        return tuple()  # Return tuple for hashability (required by lru_cache)
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
     return tuple(chunk for chunk in chunks if chunk.strip())
+def process_audio_segment_fast(audio_file):
+    """Fast audio processing in separate thread."""
     try:
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
+        # Only strip silence for longer segments
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
+                pass  # Skip if fails
         return segment
     except Exception as e:
         print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
+        # Cleanup temp file immediately
         try:
             if os.path.exists(audio_file):
                 os.unlink(audio_file)
         except:
             pass
+async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
+    """Ultra-optimized bilingual TTS with parallel processing."""
+    print("Starting optimized bilingual TTS processing...")
     try:
+        chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
+        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
+        is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
+        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
+        semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare all tasks
+        tasks = []
+        for i, chunk in enumerate(chunks):
+            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
+            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
+            tasks.append(generate_safe_audio(chunk, voice, semaphore))
+        # Generate all audio files concurrently
+        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
         # Filter successful files
+        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
         print(f"Successfully generated {len(processed_audio_files)} audio segments")
+        # Process audio segments in parallel using ThreadPoolExecutor
         with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
+            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
             print("Error: No audio segments were successfully processed")
             return None
+        # Merge audio segments (fast concatenation)
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=200)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
+        # Apply final processing (compression and normalization)
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
+            threshold=-20.0,
+            ratio=4.0,
+            attack=5.0,
             release=50.0
         )
         merged_audio = normalize(merged_audio)
+        # Export with high quality
+        merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
+async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function."""
+    voice = {
+        "English": "en-US-JennyNeural",
+        "Tamil": "ta-IN-PallaviNeural",
+        "Hindi": "hi-IN-SwaraNeural",
+        "Malayalam": "ml-IN-SobhanaNeural",
+        "Kannada": "kn-IN-SapnaNeural",
+        "Telugu": "te-IN-ShrutiNeural",
+        "Bengali": "bn-IN-TanishaaNeural",
+        "Marathi": "mr-IN-AarohiNeural",
+        "Gujarati": "gu-IN-DhwaniNeural",
+        "Punjabi": "pa-IN-VaaniNeural",
+        "Urdu": "ur-IN-GulNeural",
+        "French": "fr-FR-DeniseNeural",
+        "German": "de-DE-KatjaNeural",
+        "Spanish": "es-ES-ElviraNeural",
+        "Italian": "it-IT-IsabellaNeural",
+        "Russian": "ru-RU-SvetlanaNeural",
+        "Japanese": "ja-JP-NanamiNeural",
+        "Korean": "ko-KR-SunHiNeural",
+        "Chinese": "zh-CN-XiaoxiaoNeural",
+        "Arabic": "ar-SA-ZariyahNeural",
+        "Portuguese": "pt-BR-FranciscaNeural",
+        "Dutch": "nl-NL-FennaNeural",
+        "Greek": "el-GR-AthinaNeural",
+        "Hebrew": "he-IL-HilaNeural",
+        "Turkish": "tr-TR-EmelNeural",
+        "Polish": "pl-PL-AgnieszkaNeural",
+        "Thai": "th-TH-AcharaNeural",
+        "Vietnamese": "vi-VN-HoaiMyNeural",
+        "Swedish": "sv-SE-SofieNeural",
+        "Finnish": "fi-FI-NooraNeural",
+        "Czech": "cs-CZ-VlastaNeural",
+        "Hungarian": "hu-HU-NoemiNeural"
+    }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
+        listf = lang.split("&&&")
+        text = listf[0].strip()
+        lang_name = listf[1].strip()
+        voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
+        text = lines[id]
+        voice_to_use = voice.get(lang, VOICE_EN)
+    # Increase max_concurrent for more speed (adjust based on your system)
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
     if output and os.path.exists(audio_path):
+        audio = MP3(audio_path)
+        duration = audio.info.length
+        return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
+    """Synchronous wrapper for audio generation."""
+    return asyncio.run(generate_tts_optimized(id, lines, lang))
 #-----------------------------