backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Oct 26, 2025

Commit

916cab7

verified ·

1 Parent(s): 13cdf1c

Update video2.py

Browse files

Files changed (1) hide show

video2.py +226 -146

video2.py CHANGED Viewed

@@ -46,20 +46,18 @@ import html
 import unicodedata
 import tempfile
 import os
-import asyncio
-from concurrent.futures import ThreadPoolExecutor
 from functools import lru_cache
-from gtts import gTTS  # ADD: Import gTTS for replacement
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
-# Global constants (unchanged)
-AUDIO_DIR = os.path.join("/app/data", "sound")  # Ensure this matches your BASE_DIR
-os.makedirs(AUDIO_DIR, exist_ok=True)
-VOICE_EN = "en"  # CHANGE: For gTTS, use lang codes instead of full voice names
-# Pre-compiled regex patterns (unchanged)
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
@@ -68,63 +66,91 @@ WHITESPACE_PATTERN = re.compile(r'\s+')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
 @lru_cache(maxsize=1024)
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
-    if not text or text.isspace():
-        return "Default text for empty input"  # Fallback for empty input
     text = str(text).strip()
     text = html.unescape(text)
-    # Use pre-compiled patterns (much faster)
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
-    # Batch remove keywords (faster than multiple re.sub calls)
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
-    text = text.strip()
-    if not text:
-        return "Default text for empty input"  # Ensure non-empty output
-    return text
-def generate_safe_audio(text, lang):  # CHANGE: Remove async/semaphore; gTTS is sync
-    """Generate clean audio with gTTS (synchronous)."""
-    cleaned_text = clean_text_for_tts(text)
-    print(f"Generating audio for text: {cleaned_text[:50]}... with lang: {lang}")  # Debug log
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-    fname = temp_file.name
-    temp_file.close()
-    try:
-        # Use gTTS with specified lang (e.g., 'en' for English, 'ta' for Tamil)
-        tts = gTTS(text=cleaned_text, lang=lang, slow=False)  # slow=False for natural speed
-        tts.save(fname)
-        if os.path.exists(fname) and os.path.getsize(fname) > 0:
-            print(f"Audio generated: {fname}")  # Debug log
-            return fname
-        else:
-            print(f"Audio file {fname} is empty or missing")  # Debug log
-            os.unlink(fname)
-            return None
-    except Exception as e:
-        print(f"Error generating audio for '{cleaned_text[:20]}...': {e}")  # Debug log
-        if os.path.exists(fname):
-            os.unlink(fname)
-        return None
 @lru_cache(maxsize=256)
-def smart_text_chunking(text, max_chars=80):
-    """Cached text chunking for speed."""
     text = clean_text_for_tts(text)
     if not text:
-        return ("Default text for chunking",)  # Non-empty fallback
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
@@ -158,72 +184,107 @@ def smart_text_chunking(text, max_chars=80):
                     if current_chunk:
                         chunks.append(current_chunk.strip())
-    return tuple(chunks) or ("Default text for chunking",)  # Non-empty fallback
-def process_audio_segment_fast(audio_file):
-    """Fast audio processing in separate thread."""
     try:
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
-        # Only strip silence for longer segments
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
-                pass  # Skip if fails
-        print(f"Processed audio segment: {audio_file}")  # Debug log
         return segment
     except Exception as e:
-        print(f"Warning: Error processing audio segment {audio_file}: {e}")
         return None
     finally:
-        # Cleanup temp file immediately
         try:
             if os.path.exists(audio_file):
                 os.unlink(audio_file)
         except:
             pass
-def bilingual_tts_optimized(text, output_file="audio0.mp3", LANG_TA=None, max_concurrent=5):
-    """Ultra-optimized bilingual TTS with gTTS and parallel processing via threads."""
-    print(f"Starting gTTS bilingual TTS for output: {output_file}")  # Debug log
     try:
-        chunks = smart_text_chunking(text)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
-        print(f"Processing {len(chunks)} text chunks with max {max_concurrent} concurrent requests...")
-        is_bilingual_tamil = LANG_TA is not None and LANG_TA == 'ta'
-        # Prepare all audio files using ThreadPoolExecutor (since gTTS is sync)
-        audio_files = []
-        with ThreadPoolExecutor(max_workers=max_concurrent) as executor:
-            futures = []
-            for chunk in chunks:
-                is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
-                lang = LANG_TA if (is_bilingual_tamil and is_tamil) else (LANG_TA or VOICE_EN)
-                futures.append(executor.submit(generate_safe_audio, chunk, lang))
-            # Collect results
-            for future in futures:
-                result = future.result()
-                if result:
-                    audio_files.append(result)
-        if not audio_files:
             print("Error: No audio was successfully generated")
             return None
-        print(f"Successfully generated {len(audio_files)} audio segments")
-        # Process audio segments in parallel using another ThreadPoolExecutor
-        with ThreadPoolExecutor(max_workers=min(len(audio_files), 4)) as executor:
-            audio_segments = list(executor.map(process_audio_segment_fast, audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
@@ -232,106 +293,125 @@ def bilingual_tts_optimized(text, output_file="audio0.mp3", LANG_TA=None, max_co
             print("Error: No audio segments were successfully processed")
             return None
-        # Merge audio segments (fast concatenation)
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
-        pause = AudioSegment.silent(duration=200)
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
-        # Apply final processing (compression and normalization) for quality
         print("Applying final audio processing...")
         merged_audio = merged_audio.compress_dynamic_range(
-            threshold=-20.0,
-            ratio=4.0,
-            attack=5.0,
             release=50.0
         )
         merged_audio = normalize(merged_audio)
-        # Export with high quality (192k bitrate for better quality matching edge_tts)
-        os.makedirs(os.path.dirname(output_file), exist_ok=True)
-        merged_audio.export(output_file, format="mp3", bitrate="192k")
-        if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
-            print(f"✅ Audio successfully generated: {output_file}")
-            return output_file
-        else:
-            print(f"Error: Audio file {output_file} is empty or not created")
-            return None
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
-async def generate_tts_optimized(id, lines, lang):
-    """Optimized TTS generation function (now sync-wrapped for async compatibility)."""
-    # CHANGE: Map to gTTS lang codes (no neural voices; use standard lang)
-    lang_map = {
-        "English": "en",
-        "Tamil": "ta",
-        "Hindi": "hi",
-        "Malayalam": "ml",
-        "Kannada": "kn",
-        "Telugu": "te",
-        "Bengali": "bn",
-        "Marathi": "mr",
-        "Gujarati": "gu",
-        "Punjabi": "pa",
-        "Urdu": "ur",
-        "French": "fr",
-        "German": "de",
-        "Spanish": "es",
-        "Italian": "it",
-        "Russian": "ru",
-        "Japanese": "ja",
-        "Korean": "ko",
-        "Chinese": "zh",
-        "Arabic": "ar",
-        "Portuguese": "pt",
-        "Dutch": "nl",
-        "Greek": "el",
-        "Hebrew": "he",
-        "Turkish": "tr",
-        "Polish": "pl",
-        "Thai": "th",
-        "Vietnamese": "vi",
-        "Swedish": "sv",
-        "Finnish": "fi",
-        "Czech": "cs",
-        "Hungarian": "hu"
-    }
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
-    print(f"Generating audio for id {id}, lang: {lang}")  # Debug log
     if "&&&" in lang:
-        listf = lang.split("&&&")
-        text = listf[0].strip()
-        lang_name = listf[1].strip()
-        lang_to_use = lang_map.get(lang_name, VOICE_EN)
     else:
-        text = lines[id]
-        lang_to_use = lang_map.get(lang, VOICE_EN)
-    print(f"Text for TTS: {text[:50]}...")  # Debug log
-    # CHANGE: Call sync bilingual_tts_optimized (no async needed for gTTS)
-    output = bilingual_tts_optimized(text, audio_path, lang_to_use, max_concurrent=5)
     if output and os.path.exists(audio_path):
-        audio = MP3(audio_path)
-        duration = audio.info.length
-        print(f"Audio duration: {duration}s, path: {audio_path}")  # Debug log
-        return duration, audio_path
-    print(f"Audio generation failed for id {id}")  # Debug log
     return None, None
 def audio_func(id, lines, lang):
-    """Synchronous wrapper for audio generation (unchanged, but now calls sync TTS)."""
-    # CHANGE: No asyncio.run needed since generate_tts_optimized is now sync
-    return generate_tts_optimized(id, lines, lang)
 #-----------------------------
 #---------------------------------
 def video_func(id, lines, lang):

 import unicodedata
 import tempfile
 import os
 from functools import lru_cache
+from gtts import gTTS
 from pydub import AudioSegment
 from pydub.effects import normalize
 from mutagen.mp3 import MP3
+from concurrent.futures import ThreadPoolExecutor
+# Default voice/language settings
+DEFAULT_LANG = "en"
+# Pre-compiled regex patterns for speed
 URL_PATTERN = re.compile(r'https?://[^\s<>"\']+|www\.[^\s<>"\']+')
 TAG_PATTERN = re.compile(r'<[^>]*>|[<>]')
 BRACKET_PATTERN = re.compile(r'[\{\}\[\]]')
 SENTENCE_PATTERN = re.compile(r'(?<=[.!?])\s+')
 SUB_PATTERN = re.compile(r'(?<=[,;:])\s+')
+# gTTS language mappings (ISO 639-1 codes)
+LANGUAGE_MAP = {
+    "English": "en",
+    "Tamil": "ta",
+    "Hindi": "hi",
+    "Malayalam": "ml",
+    "Kannada": "kn",
+    "Telugu": "te",
+    "Bengali": "bn",
+    "Marathi": "mr",
+    "Gujarati": "gu",
+    "Punjabi": "pa",
+    "Urdu": "ur",
+    "French": "fr",
+    "German": "de",
+    "Spanish": "es",
+    "Italian": "it",
+    "Russian": "ru",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Chinese": "zh-CN",
+    "Arabic": "ar",
+    "Portuguese": "pt",
+    "Dutch": "nl",
+    "Greek": "el",
+    "Hebrew": "he",
+    "Turkish": "tr",
+    "Polish": "pl",
+    "Thai": "th",
+    "Vietnamese": "vi",
+    "Swedish": "sv",
+    "Finnish": "fi",
+    "Czech": "cs",
+    "Hungarian": "hu"
+}
+# Unicode ranges for language detection
+LANGUAGE_UNICODE_RANGES = {
+    'ta': ('\u0B80', '\u0BFF'),  # Tamil
+    'hi': ('\u0900', '\u097F'),  # Hindi/Devanagari
+    'te': ('\u0C00', '\u0C7F'),  # Telugu
+    'kn': ('\u0C80', '\u0CFF'),  # Kannada
+    'ml': ('\u0D00', '\u0D7F'),  # Malayalam
+    'bn': ('\u0980', '\u09FF'),  # Bengali
+    'gu': ('\u0A80', '\u0AFF'),  # Gujarati
+    'pa': ('\u0A00', '\u0A7F'),  # Punjabi
+}
 @lru_cache(maxsize=1024)
 def clean_text_for_tts(text):
     """Cleans text before TTS with optimized regex and caching."""
+    if not text:
+        return ""
     text = str(text).strip()
     text = html.unescape(text)
+    # Use pre-compiled patterns
     text = URL_PATTERN.sub('', text)
     text = TAG_PATTERN.sub('', text)
     text = BRACKET_PATTERN.sub('', text)
     text = SPECIAL_CHAR_PATTERN.sub('', text)
     text = text.replace('\\n', ' ').replace('\\t', ' ').replace('\\r', ' ')
+    # Remove TTS-specific keywords
     for keyword in ['voice', 'speak', 'prosody', 'ssml', 'xmlns']:
         text = text.replace(keyword, '').replace(keyword.upper(), '')
     text = unicodedata.normalize('NFKD', text)
     text = WHITESPACE_PATTERN.sub(' ', text)
+    return text.strip()
+def detect_language(text):
+    """Detect language from text based on Unicode ranges."""
+    for lang_code, (start, end) in LANGUAGE_UNICODE_RANGES.items():
+        if any(start <= char <= end for char in text):
+            return lang_code
+    return 'en'  # Default to English
 @lru_cache(maxsize=256)
+def smart_text_chunking(text, max_chars=100):
+    """Cached text chunking optimized for gTTS."""
     text = clean_text_for_tts(text)
     if not text:
+        return tuple()
     sentences = SENTENCE_PATTERN.split(text)
     chunks = []
                     if current_chunk:
                         chunks.append(current_chunk.strip())
+    return tuple(chunk for chunk in chunks if chunk.strip())
+def generate_audio_chunk(args):
+    """Generate audio for a single chunk using gTTS."""
+    chunk, lang_code, chunk_idx = args
+    try:
+        cleaned_text = clean_text_for_tts(chunk)
+        if not cleaned_text:
+            return None
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
+        fname = temp_file.name
+        temp_file.close()
+        # Generate TTS with gTTS
+        tts = gTTS(text=cleaned_text, lang=lang_code, slow=False)
+        tts.save(fname)
+        print(f"Generated chunk {chunk_idx + 1}: {len(cleaned_text)} chars")
+        return fname
+    except Exception as e:
+        print(f"Error generating audio chunk {chunk_idx}: {e}")
+        if os.path.exists(fname):
+            os.unlink(fname)
+        return None
+def process_audio_segment(audio_file):
+    """Process audio segment with normalization and silence stripping."""
     try:
         segment = AudioSegment.from_file(audio_file)
         segment = normalize(segment)
+        # Strip silence for better quality
         if len(segment) > 200:
             try:
                 segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
             except:
+                pass
         return segment
     except Exception as e:
+        print(f"Warning: Error processing audio segment: {e}")
         return None
     finally:
         try:
             if os.path.exists(audio_file):
                 os.unlink(audio_file)
         except:
             pass
+def bilingual_tts_gtts(text, output_file="audio0.mp3", target_lang=None, max_workers=8):
+    """
+    Generate bilingual TTS audio using gTTS with parallel processing.
+    Args:
+        text: Input text (can contain multiple languages)
+        output_file: Output MP3 file path
+        target_lang: Primary language code (auto-detected if None)
+        max_workers: Number of parallel workers
+    Returns:
+        Path to generated audio file or None on error
+    """
+    print("Starting gTTS bilingual audio generation...")
     try:
+        # Chunk the text
+        chunks = smart_text_chunking(text, max_chars=100)
         if not chunks:
             print("Error: No valid text chunks after cleaning")
             return None
+        print(f"Processing {len(chunks)} text chunks...")
+        # Detect languages for each chunk
+        chunk_args = []
+        for idx, chunk in enumerate(chunks):
+            # Detect language for this chunk
+            detected_lang = detect_language(chunk)
+            # Use target language if specified, otherwise use detected
+            lang_code = target_lang if target_lang else detected_lang
+            chunk_args.append((chunk, lang_code, idx))
+        # Generate audio chunks in parallel
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            audio_files = list(executor.map(generate_audio_chunk, chunk_args))
+        # Filter successful files
+        processed_audio_files = [f for f in audio_files if f and os.path.exists(f)]
+        if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
+        print(f"Successfully generated {len(processed_audio_files)} audio segments")
+        # Process audio segments in parallel
+        with ThreadPoolExecutor(max_workers=min(len(processed_audio_files), 8)) as executor:
+            audio_segments = list(executor.map(process_audio_segment, processed_audio_files))
         # Filter out None segments
         audio_segments = [seg for seg in audio_segments if seg is not None]
             print("Error: No audio segments were successfully processed")
             return None
+        # Merge audio segments
         print("Merging audio segments...")
         merged_audio = audio_segments[0]
+        pause = AudioSegment.silent(duration=300)  # 300ms pause between segments
         for segment in audio_segments[1:]:
             merged_audio += pause + segment
+        # Apply final processing for high quality
         print("Applying final audio processing...")
+        # Normalize audio
+        merged_audio = normalize(merged_audio)
+        # Apply dynamic range compression for better clarity
         merged_audio = merged_audio.compress_dynamic_range(
+            threshold=-20.0,
+            ratio=3.0,
+            attack=5.0,
             release=50.0
         )
+        # Final normalization
         merged_audio = normalize(merged_audio)
+        # Export with high quality settings
+        merged_audio.export(
+            output_file,
+            format="mp3",
+            bitrate="192k",
+            parameters=["-q:a", "0"]  # Highest quality
+        )
+        print(f"✅ Audio successfully generated: {output_file}")
+        return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
+        import traceback
+        traceback.print_exc()
         return None
+def generate_tts_gtts(id, lines, lang):
+    """
+    Generate TTS audio using gTTS.
+    Args:
+        id: Audio ID/index
+        lines: List of text lines
+        lang: Language specification (can include text with "&&&" separator)
+    Returns:
+        Tuple of (duration, audio_path) or (None, None) on error
+    """
+    # Ensure audio directory exists
+    os.makedirs(AUDIO_DIR, exist_ok=True)
     audio_name = f"audio{id}.mp3"
     audio_path = os.path.join(AUDIO_DIR, audio_name)
+    # Parse language specification
     if "&&&" in lang:
+        parts = lang.split("&&&")
+        text = parts[0].strip()
+        lang_name = parts[1].strip()
+        lang_code = LANGUAGE_MAP.get(lang_name, DEFAULT_LANG)
     else:
+        text = lines[id] if isinstance(lines, list) and id < len(lines) else lines
+        lang_code = LANGUAGE_MAP.get(lang, DEFAULT_LANG)
+    print(f"\nGenerating audio {id} in language: {lang_code}")
+    print(f"Text preview: {text[:100]}...")
+    # Generate audio
+    output = bilingual_tts_gtts(text, audio_path, lang_code, max_workers=8)
     if output and os.path.exists(audio_path):
+        try:
+            audio = MP3(audio_path)
+            duration = audio.info.length
+            print(f"Generated audio duration: {duration:.2f} seconds")
+            return duration, audio_path
+        except Exception as e:
+            print(f"Error reading audio file: {e}")
+            return None, None
     return None, None
 def audio_func(id, lines, lang):
+    """
+    Main function to generate audio using gTTS.
+    Args:
+        id: Audio ID/index
+        lines: Text content (string or list)
+        lang: Language specification
+    Returns:
+        Tuple of (duration, audio_path)
+    """
+    return generate_tts_gtts(id, lines, lang)
+# Example usage
+if __name__ == "__main__":
+    # Example 1: Simple English text
+    lines = ["Hello, this is a test of the Google Text-to-Speech system."]
+    duration, path = audio_func(0, lines, "English")
+    print(f"Generated: {path} ({duration}s)")
+    # Example 2: Bilingual text with custom format
+    bilingual_text = "Hello, welcome to our service. வணக்கம், எங்கள் சேவைக்கு வரவேற்கிறோம். &&&Tamil"
+    duration, path = audio_func(1, bilingual_text, bilingual_text)
+    print(f"Generated: {path} ({duration}s)")
+    # Example 3: Tamil text
+    tamil_lines = ["வணக்கம், இது தமிழில் ஒரு சோதனை செய்தி."]
+    duration, path = audio_func(2, tamil_lines, "Tamil")
+    print(f"Generated: {path} ({duration}s)")
 #-----------------------------
 #---------------------------------
 def video_func(id, lines, lang):