backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Oct 12, 2025

Commit

b6cac61

verified ·

1 Parent(s): 68b396b

Update video2.py

Browse files

Files changed (1) hide show

video2.py +172 -110

video2.py CHANGED Viewed

@@ -42,165 +42,218 @@ warnings.filterwarnings('ignore')
 nest_asyncio.apply()
 VOICE_EN = "en-IN-NeerjaNeural"
 def clean_text_for_tts(text):
-    """Cleans text before TTS so only the spoken words are read."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
-    # Remove URLs
-    text = re.sub(r'https?://[^\s<>"\']+', '', text)
-    text = re.sub(r'www\.[^\s<>"\']+', '', text)
-    # Remove XML/HTML/SSML tags
-    text = re.sub(r'<[^>]*>', '', text)
-    text = re.sub(r'[<>]', '', text)
-    text = re.sub(r'[\{\}\[\]]', '', text)
-    # Remove problematic special characters
-    text = re.sub(r'[#@$%^&*_+=|\\`~]', '', text)
-    # Replace escape sequences
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Remove unwanted SSML keywords
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
-        text = re.sub(f'\\b{keyword}\\b', '', text, flags=re.IGNORECASE)
-    # Unicode normalization and spacing
     text = unicodedata.normalize('NFKD', text)
-    text = re.sub(r'\s+', ' ', text)
     return text.strip()
-async def generate_safe_audio(text, voice):
-    """Generate clean, plain text audio using edge-tts."""
-    cleaned_text = clean_text_for_tts(text)
-    if not cleaned_text:
-        return None
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-    fname = temp_file.name
-    temp_file.close()
-    try:
-        comm = edge_tts.Communicate(cleaned_text, voice=voice)
-        await comm.save(fname)
-        return fname
-    except Exception as e:
-        print(f"Error generating audio: {e}")
-        return None
 def smart_text_chunking(text, max_chars=80):
-    """Split text into sensible, natural-length chunks for TTS."""
     text = clean_text_for_tts(text)
     if not text:
-        return []
-    sentences = re.split(r'(?<=[.!?])\s+', text)
     chunks = []
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
-            sub_parts = re.split(r'(?<=[,;:])\s+', sentence)
             for part in sub_parts:
                 part = part.strip()
-                if part:
-                    if len(part) <= max_chars:
-                        chunks.append(part)
-                    else:
-                        words = part.split()
-                        current_chunk = ""
-                        for word in words:
-                            if len(current_chunk + " " + word) <= max_chars:
-                                current_chunk += " " + word if current_chunk else word
-                            else:
-                                if current_chunk:
-                                    chunks.append(current_chunk.strip())
-                                current_chunk = word
-                        if current_chunk:
-                            chunks.append(current_chunk.strip())
-    return [chunk for chunk in chunks if chunk.strip()]
-async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
-    """Main fixed function for bilingual TTS output with concurrent audio generation for speed."""
-    print("Starting fixed bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
-        print(f"Processing {len(chunks)} text chunks...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
         tasks = []
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
-            if is_bilingual_tamil:
-                voice = VOICE_TA if is_tamil else VOICE_EN
-            else:
-                voice = VOICE_TA
-            lang_label = "Tamil" if is_tamil else "English"
-            print(f"Chunk {i+1}/{len(chunks)} ({lang_label}): {chunk[:40]}...")
-            tasks.append(generate_safe_audio(chunk, voice))
         audio_files = await asyncio.gather(*tasks, return_exceptions=True)
-        processed_audio_files = [f for f in audio_files if isinstance(f, str)]  # Filter successful files
-        errors = [e for e in audio_files if isinstance(e, Exception)]
-        if errors:
-            for e in errors:
-                print(f"Warning: Audio generation error: {e}")
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
-        merged_audio = None
-        for audio_file in processed_audio_files:
-            try:
-                segment = AudioSegment.from_file(audio_file)
-                segment = normalize(segment)
-                # Only strip silence if segment is reasonably long
-                if len(segment) > 200:
-                    try:
-                        segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
-                    except Exception as e:
-                        print(f" (Info) Skipped strip_silence: {e}")
-                if merged_audio is None:
-                    merged_audio = segment
-                else:
-                    pause = AudioSegment.silent(duration=200)
-                    merged_audio += pause + segment
-            except Exception as audio_error:
-                print(f"Warning: Error processing audio: {audio_error}")
-                continue
-        if merged_audio is None:
             print("Error: No audio segments were successfully processed")
             return None
-        # Improved quality: Apply overall compression and normalization
-        merged_audio = merged_audio.compress_dynamic_range(threshold=-20.0, ratio=4.0, attack=5.0, release=50.0)
         merged_audio = normalize(merged_audio)
-        merged_audio.export(output_file, format="mp3", bitrate="192k")  # Increased bitrate for better quality
         print(f"✅ Audio successfully generated: {output_file}")
-        # Cleanup temp files
-        for temp_file in processed_audio_files:
-            try:
-                if os.path.exists(temp_file):
-                    os.unlink(temp_file)
-            except:
-                pass
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
-# USAGE EXAMPLE
-async def run_fixed_tts(text_input, output_file, lang):
-    await bilingual_tts_fixed(text_input, output_file, lang)
-async def generate_tts(id, lines, lang):
     voice = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
@@ -235,25 +288,33 @@ async def generate_tts(id, lines, lang):
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
-    audio_path = os.path.join(AUDIO_DIR, audio_name)  # Assuming AUDIO_DIR is defined elsewhere
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
-        voice_to_use = voice[lang_name]
     else:
-        text = lines[id]  # Assuming lines is a dict or list indexed by id
-        voice_to_use = voice[lang]
-    output = await run_fixed_tts(text, audio_path, voice_to_use)
-    if os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
-    return asyncio.run(generate_tts(id, lines, lang))
 #-----------------------------
 #---------------------------------
 import os
@@ -288,4 +349,5 @@ def video_func(id, lines, lang):
         return final_video_path
     else:
         print("Video generation failed.")
-        return None

 nest_asyncio.apply()
+import re
+import html
+import unicodedata
+import tempfile
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+from functools import lru_cache
+import edge_tts
+from pydub import AudioSegment
+from pydub.effects import normalize
+from mutagen.mp3 import MP3
 VOICE_EN = "en-IN-NeerjaNeural"
+# Pre-compiled regex patterns for speed (compiled once, reused many times)
+URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
+TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
+BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
+SPECIAL_CHAR_PATTERN = re.compile(r'[#@$%^&*_+=|\\`~]')
+WHITESPACE_PATTERN = re.compile(r'\s+')
+SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
+SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+@lru_cache(maxsize=1024)  # Cache cleaned text to avoid re-processing
 def clean_text_for_tts(text):
+    """Cleans text before TTS with optimized regex and caching."""
     if not text:
         return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Use pre-compiled patterns (much faster)
+    text = URL_PATTERN.sub('', text)
+    text = TAG_PATTERN.sub('', text)
+    text = BRACKET_PATTERN.sub('', text)
+    text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Batch remove keywords (faster than multiple re.sub calls)
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
+        text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = unicodedata.normalize('NFKD', text)
+    text = WHITESPACE_PATTERN.sub(' ', text)
     return text.strip()
+async def generate_safe_audio(text, voice, semaphore):
+    """Generate clean audio with rate limiting."""
+    async with semaphore:  # Limit concurrent TTS requests
+        cleaned_text = clean_text_for_tts(text)
+        if not cleaned_text:
+            return None
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        fname = temp_file.name
+        temp_file.close()
+        try:
+            comm = edge_tts.Communicate(cleaned_text, voice=voice)
+            await comm.save(fname)
+            return fname
+        except Exception as e:
+            print(f"Error generating audio: {e}")
+            if os.path.exists(fname):
+                os.unlink(fname)
+            return None
+@lru_cache(maxsize=256)
 def smart_text_chunking(text, max_chars=80):
+    """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
+        return tuple()  # Return tuple for hashability (required by lru_cache)
+    sentences = SENTENCE_PATTERN.split(text)
     chunks = []
     for sentence in sentences:
         sentence = sentence.strip()
         if not sentence:
             continue
         if len(sentence) <= max_chars:
             chunks.append(sentence)
         else:
+            sub_parts = SUB_PATTERN.split(sentence)
             for part in sub_parts:
                 part = part.strip()
+                if not part:
+                    continue
+                if len(part) <= max_chars:
+                    chunks.append(part)
+                else:
+                    words = part.split()
+                    current_chunk = ""
+                    for word in words:
+                        test_chunk = f"{current_chunk} {word}" if current_chunk else word
+                        if len(test_chunk) <= max_chars:
+                            current_chunk = test_chunk
+                        else:
+                            if current_chunk:
+                                chunks.append(current_chunk.strip())
+                            current_chunk = word
+                    if current_chunk:
+                        chunks.append(current_chunk.strip())
+    return tuple(chunk for chunk in chunks if chunk.strip())
+def process_audio_segment_fast(audio_file):
+    """Fast audio processing in separate thread."""
+    try:
+        segment = AudioSegment.from_file(audio_file)
+        segment = normalize(segment)
+        # Only strip silence for longer segments
+        if len(segment) > 200:
+            try:
+                segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
+            except:
+                pass  # Skip if fails
+        return segment
+    except Exception as e:
+        print(f"Warning: Error processing audio segment: {e}")
+        return None
+    finally:
+        # Cleanup temp file immediately
+        try:
+            if os.path.exists(audio_file):
+                os.unlink(audio_file)
+        except:
+            pass
+async def bilingual_tts_optimized(text, output_file="audio0.mp3", VOICE_TA=None, max_concurrent=10):
+    """Ultra-optimized bilingual TTS with parallel processing."""
+    print("Starting optimized bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
+        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
+        # Semaphore to limit concurrent TTS requests (prevents rate limiting)
+        semaphore = asyncio.Semaphore(max_concurrent)
+        # Prepare all tasks
         tasks = []
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
+            voice = VOICE_TA if (is_bilingual_tamil and is_tamil) else (VOICE_TA or VOICE_EN)
+            tasks.append(generate_safe_audio(chunk, voice, semaphore))
+        # Generate all audio files concurrently
         audio_files = await asyncio.gather(*tasks, return_exceptions=True)
+        # Filter successful files
+        processed_audio_files = [f for f in audio_files if isinstance(f, str) and f]
         if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
+        print(f"Successfully generated {len(processed_audio_files)} audio segments")
+        # Process audio segments in parallel using ThreadPoolExecutor
+        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
+            audio_segments = list(executor.map(process_audio_segment_fast, processed_audio_files))
+        # Filter out None segments
+        audio_segments = [seg for seg in audio_segments if seg is not None]
+        if not audio_segments:
             print("Error: No audio segments were successfully processed")
             return None
+        # Merge audio segments (fast concatenation)
+        print("Merging audio segments...")
+        merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=200)
+        for segment in audio_segments[1:]:
+            merged_audio += pause + segment
+        # Apply final processing (compression and normalization)
+        print("Applying final audio processing...")
+        merged_audio = merged_audio.compress_dynamic_range(
+            threshold=-20.0,
+            ratio=4.0,
+            attack=5.0,
+            release=50.0
+        )
         merged_audio = normalize(merged_audio)
+        # Export with high quality
+        merged_audio.export(output_file, format="mp3", bitrate="192k")
         print(f"✅ Audio successfully generated: {output_file}")
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
+async def generate_tts_optimized(id, lines, lang):
+    """Optimized TTS generation function."""
     voice = {
         "English": "en-US-JennyNeural",
         "Tamil": "ta-IN-PallaviNeural",
         "Czech": "cs-CZ-VlastaNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
+    audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
+        voice_to_use = voice.get(lang_name, VOICE_EN)
     else:
+        text = lines[id]
+        voice_to_use = voice.get(lang, VOICE_EN)
+    # Increase max_concurrent for more speed (adjust based on your system)
+    output = await bilingual_tts_optimized(text, audio_path, voice_to_use, max_concurrent=15)
+    if output and os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
+    """Synchronous wrapper for audio generation."""
+    return asyncio.run(generate_tts_optimized(id, lines, lang))
 #-----------------------------
 #---------------------------------
 import os
         return final_video_path
     else:
         print("Video generation failed.")
+        return None