backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Oct 26, 2025

Commit

13cdf1c

verified ·

1 Parent(s): 7df00a9

Update video2.py

Browse files

Files changed (1) hide show

video2.py +118 -101

video2.py CHANGED Viewed

@@ -41,23 +41,25 @@ for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
 warnings.filterwarnings('ignore')
 nest_asyncio.apply()
 import re
 import html
 import unicodedata
 import tempfile
 import os
 import asyncio
-from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
 from functools import lru_cache
-import edge_tts
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
-VOICE_EN = "en-IN-NeerjaNeural"
-# Pre-compiled regex patterns for speed (compiled once, reused many times)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -66,11 +68,11 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
-@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
-    if not text:
-        return ""
     text = str(text).strip()
     text = html.unescape(text)
@@ -87,36 +89,42 @@ def clean_text_for_tts(text):
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
-    return text.strip()
-async def generate_safe_audio(text, voice, semaphore):
-    """Generate clean audio with rate limiting."""
-    async with semaphore:  # Limit concurrent TTS requests
-        cleaned_text = clean_text_for_tts(text)
-        if not cleaned_text:
-            return None
-        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-        fname = temp_file.name
-        temp_file.close()
-        try:
-            comm = edge_tts.Communicate(cleaned_text, voice=voice)
-            await comm.save(fname)
             return fname
-        except Exception as e:
-            print(f"Error generating audio: {e}")
-            if os.path.exists(fname):
-                os.unlink(fname)
             return None
 @lru_cache(maxsize=256)
 def smart_text_chunking(text, max_chars=80):
     """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
-        return tuple()  # Return tuple for hashability (required by lru_cache)
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
@@ -150,7 +158,7 @@ def smart_text_chunking(text, max_chars=80):
                     if current_chunk:
                         chunks.append(current_chunk.strip())
-    return tuple(chunk for chunk in chunks if chunk.strip())
 def process_audio_segment_fast(audio_file):
     """Fast audio processing in separate thread."""
@@ -165,9 +173,10 @@ def process_audio_segment_fast(audio_file):
             except:
                 pass  # Skip if fails
         return segment
     except Exception as e:
-        print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
         # Cleanup temp file immediately
@@ -177,9 +186,9 @@ def process_audio_segment_fast(audio_file):
         except:
             pass
-async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
-    """Ultra-optimized bilingual TTS with parallel processing."""
-    print("Starting optimized bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
@@ -189,33 +198,32 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
-        is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
-        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
-        semaphore = asyncio.Semaphore(max_concurrent)
-        # Prepare all tasks
-        tasks = []
-        for i, chunk in enumerate(chunks):
-            is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
-            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
-            tasks.append(generate_safe_audio(chunk, voice, semaphore))
-        # Generate all audio files concurrently
-        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
-        # Filter successful files
-        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
-        if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
-        print(f"Successfully generated {len(processed_audio_files)} audio segments")
-        # Process audio segments in parallel using ThreadPoolExecutor
-        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
-            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
@@ -232,7 +240,7 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
-        # Apply final processing (compression and normalization)
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
             threshold=-20.0,
@@ -242,79 +250,88 @@ async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None,
         )
         merged_audio = normalize(merged_audio)
-        # Export with high quality
         merged_audio.export(output_file, format="mp3", bitrate="192k")
-        print(f"✅ Audio successfully generated: {output_file}")
-        return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
 async def generate_tts_optimized(id, lines, lang):
-    """Optimized TTS generation function."""
-    voice = {
-        "English": "en-US-JennyNeural",
-        "Tamil": "ta-IN-PallaviNeural",
-        "Hindi": "hi-IN-SwaraNeural",
-        "Malayalam": "ml-IN-SobhanaNeural",
-        "Kannada": "kn-IN-SapnaNeural",
-        "Telugu": "te-IN-ShrutiNeural",
-        "Bengali": "bn-IN-TanishaaNeural",
-        "Marathi": "mr-IN-AarohiNeural",
-        "Gujarati": "gu-IN-DhwaniNeural",
-        "Punjabi": "pa-IN-VaaniNeural",
-        "Urdu": "ur-IN-GulNeural",
-        "French": "fr-FR-DeniseNeural",
-        "German": "de-DE-KatjaNeural",
-        "Spanish": "es-ES-ElviraNeural",
-        "Italian": "it-IT-IsabellaNeural",
-        "Russian": "ru-RU-SvetlanaNeural",
-        "Japanese": "ja-JP-NanamiNeural",
-        "Korean": "ko-KR-SunHiNeural",
-        "Chinese": "zh-CN-XiaoxiaoNeural",
-        "Arabic": "ar-SA-ZariyahNeural",
-        "Portuguese": "pt-BR-FranciscaNeural",
-        "Dutch": "nl-NL-FennaNeural",
-        "Greek": "el-GR-AthinaNeural",
-        "Hebrew": "he-IL-HilaNeural",
-        "Turkish": "tr-TR-EmelNeural",
-        "Polish": "pl-PL-AgnieszkaNeural",
-        "Thai": "th-TH-AcharaNeural",
-        "Vietnamese": "vi-VN-HoaiMyNeural",
-        "Swedish": "sv-SE-SofieNeural",
-        "Finnish": "fi-FI-NooraNeural",
-        "Czech": "cs-CZ-VlastaNeural",
-        "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
-        voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
         text = lines[id]
-        voice_to_use = voice.get(lang, VOICE_EN)
-    # Increase max_concurrent for more speed (adjust based on your system)
-    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
     if output and os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
-    """Synchronous wrapper for audio generation."""
-    return asyncio.run(generate_tts_optimized(id, lines, lang))
 #-----------------------------
 #---------------------------------
 def video_func(id, lines, lang):

 warnings.filterwarnings('ignore')
 nest_asyncio.apply()
 import re
 import html
 import unicodedata
 import tempfile
 import os
 import asyncio
+from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
+from gtts import gTTS  # ADD: Import gTTS for replacement
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
+# Global constants (unchanged)
+AUDIO_DIR = os.path.join("/app/data", "sound")  # Ensure this matches your BASE_DIR
+os.makedirs(AUDIO_DIR, exist_ok=True)
+VOICE_EN = "en"  # CHANGE: For gTTS, use lang codes instead of full voice names
+# Pre-compiled regex patterns (unchanged)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
+    if not text or text.isspace():
+        return "Default text for empty input"  # Fallback for empty input
     text = str(text).strip()
     text = html.unescape(text)
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
+    text = text.strip()
+    if not text:
+        return "Default text for empty input"  # Ensure non-empty output
+    return text
+def generate_safe_audio(text, lang):  # CHANGE: Remove async/semaphore; gTTS is sync
+    """Generate clean audio with gTTS (synchronous)."""
+    cleaned_text = clean_text_for_tts(text)
+    print(f"Generating audio for text: {cleaned_text[:50]}... with lang: {lang}")  # Debug log
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+    fname = temp_file.name
+    temp_file.close()
+    try:
+        # Use gTTS with specified lang (e.g., 'en' for English, 'ta' for Tamil)
+        tts = gTTS(text=cleaned_text, lang=lang, slow=False)  # slow=False for natural speed
+        tts.save(fname)
+        if os.path.exists(fname) and os.path.getsize(fname) > 0:
+            print(f"Audio generated: {fname}")  # Debug log
             return fname
+        else:
+            print(f"Audio file {fname} is empty or missing")  # Debug log
+            os.unlink(fname)
             return None
+    except Exception as e:
+        print(f"Error generating audio for '{cleaned_text[:20]}...': {e}")  # Debug log
+        if os.path.exists(fname):
+            os.unlink(fname)
+        return None
 @lru_cache(maxsize=256)
 def smart_text_chunking(text, max_chars=80):
     """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
+        return ("Default text for chunking",)  # Non-empty fallback
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
                     if current_chunk:
                         chunks.append(current_chunk.strip())
+    return tuple(chunks) or ("Default text for chunking",)  # Non-empty fallback
 def process_audio_segment_fast(audio_file):
     """Fast audio processing in separate thread."""
             except:
                 pass  # Skip if fails
+        print(f"Processed audio segment: {audio_file}")  # Debug log
         return segment
     except Exception as e:
+        print(f"Warning: Error processing audio segment {audio_file}: {e}")
         return None
     finally:
         # Cleanup temp file immediately
         except:
             pass
+def bilingual_tts_optimized(text, output_file="audio0.mp3", LANG_TA=None, max_concurrent=5):
+    """Ultra-optimized bilingual TTS with gTTS and parallel processing via threads."""
+    print(f"Starting gTTS bilingual TTS for output: {output_file}")  # Debug log
     try:
         chunks = smart_text_chunking(text)
         print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
+        is_bilingual_tamil = LANG_TA is not None and LANG_TA == 'ta'
+        # Prepare all audio files using ThreadPoolExecutor (since gTTS is sync)
+        audio_files = []
+        with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
+            futures = []
+            for chunk in chunks:
+                is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
+                lang = LANG_TA if (is_bilingual_tamil and is_tamil) else (LANG_TA or VOICE_EN)
+                futures.append(executor.submit(generate_safe_audio, chunk, lang))
+            # Collect results
+            for future in futures:
+                result = future.result()
+                if result:
+                    audio_files.append(result)
+        if not audio_files:
             print("Error: No audio was successfully generated")
             return None
+        print(f"Successfully generated {len(audio_files)} audio segments")
+        # Process audio segments in parallel using another ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=min(len(audio_files), 4)) as executor:
+            audio_segments = list(executor.map(process_audio_segment_fast, audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
+        # Apply final processing (compression and normalization) for quality
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
             threshold=-20.0,
         )
         merged_audio = normalize(merged_audio)
+        # Export with high quality (192k bitrate for better quality matching edge_tts)
+        os.makedirs(os.path.dirname(output_file), exist_ok=True)
         merged_audio.export(output_file, format="mp3", bitrate="192k")
+        if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
+            print(f"✅ Audio successfully generated: {output_file}")
+            return output_file
+        else:
+            print(f"Error: Audio file {output_file} is empty or not created")
+            return None
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
 async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function (now sync-wrapped for async compatibility)."""
+    # CHANGE: Map to gTTS lang codes (no neural voices; use standard lang)
+    lang_map = {
+        "English": "en",
+        "Tamil": "ta",
+        "Hindi": "hi",
+        "Malayalam": "ml",
+        "Kannada": "kn",
+        "Telugu": "te",
+        "Bengali": "bn",
+        "Marathi": "mr",
+        "Gujarati": "gu",
+        "Punjabi": "pa",
+        "Urdu": "ur",
+        "French": "fr",
+        "German": "de",
+        "Spanish": "es",
+        "Italian": "it",
+        "Russian": "ru",
+        "Japanese": "ja",
+        "Korean": "ko",
+        "Chinese": "zh",
+        "Arabic": "ar",
+        "Portuguese": "pt",
+        "Dutch": "nl",
+        "Greek": "el",
+        "Hebrew": "he",
+        "Turkish": "tr",
+        "Polish": "pl",
+        "Thai": "th",
+        "Vietnamese": "vi",
+        "Swedish": "sv",
+        "Finnish": "fi",
+        "Czech": "cs",
+        "Hungarian": "hu"
     }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
+    print(f"Generating audio for id {id}, lang: {lang}")  # Debug log
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
+        lang_to_use = lang_map.get(lang_name, VOICE_EN)
     else:
         text = lines[id]
+        lang_to_use = lang_map.get(lang, VOICE_EN)
+    print(f"Text for TTS: {text[:50]}...")  # Debug log
+    # CHANGE: Call sync bilingual_tts_optimized (no async needed for gTTS)
+    output = bilingual_tts_optimized(text, audio_path, lang_to_use, max_concurrent=5)
     if output and os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
+        print(f"Audio duration: {duration}s, path: {audio_path}")  # Debug log
         return duration, audio_path
+    print(f"Audio generation failed for id {id}")  # Debug log
     return None, None
 def audio_func(id, lines, lang):
+    """Synchronous wrapper for audio generation (unchanged, but now calls sync TTS)."""
+    # CHANGE: No asyncio.run needed since generate_tts_optimized is now sync
+    return generate_tts_optimized(id, lines, lang)
 #-----------------------------
 #---------------------------------
 def video_func(id, lines, lang):