backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Nov 23, 2025

Commit

df79249

verified ·

1 Parent(s): 9467330

Update video2.py

Browse files

Files changed (1) hide show

video2.py +64 -100

video2.py CHANGED Viewed

@@ -55,209 +55,181 @@ from pydub.effects import normalize, compress_dynamic_range
 from mutagen.mp3 import MP3
 # --- Configuration ---
-AUDIO_DIR = "output_audio"  # Directory to save files
 os.makedirs(AUDIO_DIR, exist_ok=True)
-# Default Voices
 VOICE_MAPPING = {
-    "English": "en-IN-NeerjaNeural", # Indian English for better blending with Indian languages
     "Tamil": "ta-IN-PallaviNeural",
     "Hindi": "hi-IN-SwaraNeural",
-    "Malayalam": "ml-IN-SobhanaNeural",
-    "Kannada": "kn-IN-SapnaNeural",
-    "Telugu": "te-IN-ShrutiNeural",
-    "Bengali": "bn-IN-TanishaaNeural",
-    "Marathi": "mr-IN-AarohiNeural",
-    # Add others as needed
 }
-# --- Regex Patterns ---
-# Detects Tamil, Devanagari (Hindi), etc. based on Unicode ranges
-# Tamil: \u0B80-\u0BFF, Devanagari: \u0900-\u097F, Malayalam: \u0D00-\u0D7F
 INDIC_SCRIPT_PATTERN = re.compile(r'[\u0900-\u0D7F]+')
-SENTENCE_ENDINGS = re.compile(r'[.!?।]\s+')
 @lru_cache(maxsize=1024)
 def clean_text(text):
-    """Basic cleanup to remove artifacts but keep punctuation for pauses."""
     if not text: return ""
     text = html.unescape(str(text))
-    text = re.sub(r'https?://\S+', '', text) # Remove URLs
-    text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text) # Remove markdown/brackets
     text = re.sub(r'\s+', ' ', text).strip()
     return text
-def detect_language_group(text_segment):
     """
-    Determines if a segment is primarily English or an Indian Language.
-    Returns: 'indic' or 'english'
     """
-    # If the segment contains Indian script characters, treat as Indic
-    if INDIC_SCRIPT_PATTERN.search(text_segment):
         return 'indic'
     return 'english'
 def split_by_language_and_sentence(text):
     """
-    Intelligent splitter that groups words by language to ensure
-    the correct voice is used for English words inside Tamil sentences.
     """
     text = clean_text(text)
     words = text.split(' ')
     segments = []
     current_chunk = []
-    current_type = None # 'english' or 'indic'
     for word in words:
-        # Check if word ends with sentence punctuation
-        has_punctuation = any(char in ".!?," for char in word)
-        clean_word = word.strip(".,!?")
-        # Determine type of this specific word
-        word_type = detect_language_group(clean_word)
-        # Initialize first chunk
         if current_type is None:
             current_type = word_type
             current_chunk.append(word)
-        # If type matches, keep adding to chunk
         elif word_type == current_type:
             current_chunk.append(word)
-        # If type changes (Language switch), save chunk and start new one
         else:
             segments.append((" ".join(current_chunk), current_type))
             current_chunk = [word]
             current_type = word_type
-        # If this word had punctuation, it implies a natural pause,
-        # so we might want to force a segment break to allow breathing room,
-        # but for smoothness, we keep it in the stream unless logic dictates otherwise.
-    # Append the final chunk
     if current_chunk:
         segments.append((" ".join(current_chunk), current_type))
     return segments
 async def generate_segment_audio(text, voice, rate_limit_sem):
-    """Generates audio for a single segment."""
     if not text.strip():
         return None
     async with rate_limit_sem:
         try:
-            # Create a unique temp file
             fd, path = tempfile.mkstemp(suffix=".mp3")
             os.close(fd)
-            # Rate adjustment: Make English slightly faster to match Indian speech rates usually
-            rate = "+0%"
             comm = edge_tts.Communicate(text, voice, rate=rate)
             await comm.save(path)
             return path
         except Exception as e:
-            print(f"Error generating segment '{text[:20]}...': {e}")
             return None
 def process_audio_segment(file_path):
-    """
-    Reads MP3, removes static silence, and normalizes volume.
-    Run in ThreadPool to avoid blocking event loop.
-    """
     if not file_path or not os.path.exists(file_path):
         return None
     try:
         audio = AudioSegment.from_mp3(file_path)
-        # 1. Gentle Silence Trimming (Don't cut off word endings)
-        # We only trim if silence is longer than 300ms at ends
-        def trim_silence(sound, silence_threshold=-40.0, chunk_size=10):
-            trim_ms = 0
-            while sound[trim_ms:trim_ms+chunk_size].dBFS < silence_threshold and trim_ms < len(sound):
-                trim_ms += chunk_size
-            return sound[trim_ms:]
-        audio = trim_silence(audio) # Trim start
-        audio = trim_silence(audio.reverse()).reverse() # Trim end
-        # 2. Add a tiny bit of padding (50ms) to prevent abrupt cuts
         silence_pad = AudioSegment.silent(duration=50)
         audio = silence_pad + audio + silence_pad
         return audio
     except Exception as e:
-        print(f"Error processing audio file {file_path}: {e}")
         return None
     finally:
-        # Cleanup temp file
         try:
             os.remove(file_path)
         except:
             pass
 async def bilingual_tts_optimized(full_text, output_file, native_lang_code):
-    """
-    Main Orchestrator.
-    """
-    print(f"Processing: {full_text[:50]}...")
-    # 1. Split text into Language chunks (English vs Native)
-    # The native_lang_code should be something like "Tamil", "Hindi" keys in VOICE_MAPPING
     segments_data = split_by_language_and_sentence(full_text)
-    # 2. Define voices
     native_voice = VOICE_MAPPING.get(native_lang_code, VOICE_MAPPING["English"])
     english_voice = VOICE_MAPPING["English"]
     tasks = []
-    # Limit concurrent connections to Edge TTS to avoid 429 Too Many Requests
-    semaphore = asyncio.Semaphore(8)
-    # 3. Queue up generation tasks
     for text_chunk, type_group in segments_data:
         voice = native_voice if type_group == 'indic' else english_voice
         tasks.append(generate_segment_audio(text_chunk, voice, semaphore))
-    # 4. Generate Raw Audio Files (Async)
     raw_files = await asyncio.gather(*tasks)
-    # 5. Process Audio (Normalization & Stitching)
-    # Using ThreadPool for CPU intensive pydub operations
     final_audio = AudioSegment.empty()
     with ThreadPoolExecutor(max_workers=4) as executor:
         processed_segments = list(executor.map(process_audio_segment, raw_files))
-    # 6. Stitch with Crossfade for smoothness
-    # We ignore None types
     valid_segments = [seg for seg in processed_segments if seg is not None]
     if not valid_segments:
         return None
-    # Logic: If the segments are short, crossfade. If it looks like a sentence end, add pause.
     for i, seg in enumerate(valid_segments):
         if i == 0:
             final_audio += seg
         else:
-            # Crossfade logic: overlap the previous segment end with next segment start
-            # by 30ms to create a smooth flow instead of a hard cut.
-            try:
-                final_audio = final_audio.append(seg, crossfade=30)
-            except:
-                # Fallback if segment is too short to crossfade
-                final_audio += seg
-    # 7. Final Mastering
-    # Normalize to standard -3dB
-    final_audio = normalize(final_audio, headroom=3.0)
-    # Optional: Dynamic Range Compression to make voice sound "richer" and consistent
     final_audio = compress_dynamic_range(
         final_audio,
         threshold=-15.0,
@@ -265,22 +237,15 @@ async def bilingual_tts_optimized(full_text, output_file, native_lang_code):
         attack=5.0,
         release=50.0
     )
-    # 8. Export
     final_audio.export(output_file, format="mp3", bitrate="192k")
-    print(f"Saved: {output_file}")
     return output_file
-# --- Wrapper for usage ---
 async def generate_tts(id, lines, lang_input):
-    """
-    Called by external script.
-    lang_input format examples: "Tamil", "Text &&& Tamil"
-    """
-    # Parse input
     if "&&&" in lang_input:
         parts = lang_input.split("&&&")
         text = parts[0].strip()
@@ -290,8 +255,6 @@ async def generate_tts(id, lines, lang_input):
         lang_name = lang_input.strip()
     output_path = os.path.join(AUDIO_DIR, f"audio_{id}.mp3")
-    # Run the generator
     result = await bilingual_tts_optimized(text, output_path, lang_name)
     if result:
@@ -300,6 +263,7 @@ async def generate_tts(id, lines, lang_input):
     else:
         return 0, None
 def audio_func(id, lines, lang):
     """Synchronous wrapper for audio generation."""
     return asyncio.run(generate_tts(id, lines, lang))

 from mutagen.mp3 import MP3
 # --- Configuration ---
+AUDIO_DIR = "output_audio"
 os.makedirs(AUDIO_DIR, exist_ok=True)
+# Voice Mapping
+# using 'NeerjaNeural' for English as it blends better with Indian contexts
 VOICE_MAPPING = {
+    "English": "en-IN-NeerjaNeural",
     "Tamil": "ta-IN-PallaviNeural",
     "Hindi": "hi-IN-SwaraNeural",
 }
+# Regex to find Indian Language characters (Tamil, Hindi, Malayalam, etc.)
+# Tamil Unicode range is inside this block (\u0B80-\u0BFF)
 INDIC_SCRIPT_PATTERN = re.compile(r'[\u0900-\u0D7F]+')
 @lru_cache(maxsize=1024)
 def clean_text(text):
     if not text: return ""
     text = html.unescape(str(text))
+    # Remove URLs and Markdown, but keep basic punctuation
+    text = re.sub(r'https?://\S+', '', text)
+    text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
+def detect_language_group(word):
     """
+    Returns 'indic' if the word has Tamil/Hindi chars.
+    Returns 'english' otherwise (for words like 'Voltage', '1.5V', 'circuit').
     """
+    if INDIC_SCRIPT_PATTERN.search(word):
         return 'indic'
     return 'english'
 def split_by_language_and_sentence(text):
     """
+    Splits text into chunks of English vs Native language.
+    Example: "Voltage னு" -> [("Voltage", "english"), ("னு", "indic")]
     """
     text = clean_text(text)
     words = text.split(' ')
     segments = []
     current_chunk = []
+    current_type = None
     for word in words:
+        # Clean punctuation for detection (e.g. "force," -> "force")
+        # But keep the original word for the audio generation
+        clean_word_for_check = word.strip(".,!?")
+        if not clean_word_for_check:
+            # If word was just "...", keep it with previous chunk
+            if current_chunk:
+                current_chunk.append(word)
+            continue
+        word_type = detect_language_group(clean_word_for_check)
+        # Start first chunk
         if current_type is None:
             current_type = word_type
             current_chunk.append(word)
+        # If type matches current chunk, add to it
         elif word_type == current_type:
             current_chunk.append(word)
+        # Type switched (e.g., from English 'Voltage' to Tamil 'னு')
         else:
             segments.append((" ".join(current_chunk), current_type))
             current_chunk = [word]
             current_type = word_type
+    # Add valid final chunk
     if current_chunk:
         segments.append((" ".join(current_chunk), current_type))
     return segments
 async def generate_segment_audio(text, voice, rate_limit_sem):
+    """Generates audio for a specific text segment using EdgeTTS."""
     if not text.strip():
         return None
     async with rate_limit_sem:
         try:
             fd, path = tempfile.mkstemp(suffix=".mp3")
             os.close(fd)
+            # Slight speed adjustment for flow
+            rate = "+0%"
             comm = edge_tts.Communicate(text, voice, rate=rate)
             await comm.save(path)
             return path
         except Exception as e:
+            print(f"Error generating segment '{text}': {e}")
             return None
 def process_audio_segment(file_path):
+    """Process individual segment: normalize and add micro-padding."""
     if not file_path or not os.path.exists(file_path):
         return None
     try:
         audio = AudioSegment.from_mp3(file_path)
+        # Normalize volume
+        audio = normalize(audio)
+        # Add tiny silence (50ms) to start/end to prevent 'clipped' words
+        # This makes the transition between "Voltage" and "nu" sound natural
         silence_pad = AudioSegment.silent(duration=50)
         audio = silence_pad + audio + silence_pad
         return audio
     except Exception as e:
+        print(f"Error processing segment: {e}")
         return None
     finally:
         try:
             os.remove(file_path)
         except:
             pass
 async def bilingual_tts_optimized(full_text, output_file, native_lang_code):
+    print("\n--- Starting Processing ---")
+    # 1. Split Text
     segments_data = split_by_language_and_sentence(full_text)
+    # DEBUG: Print the split logic so user can see it
+    print(f"Detected {len(segments_data)} segments:")
+    for i, (text, lang_type) in enumerate(segments_data):
+        print(f"  {i+1}. [{lang_type.upper()}] : {text}")
+    # 2. Assign Voices
     native_voice = VOICE_MAPPING.get(native_lang_code, VOICE_MAPPING["English"])
     english_voice = VOICE_MAPPING["English"]
     tasks = []
+    semaphore = asyncio.Semaphore(5) # Prevent overloading API
+    # 3. Create Tasks
     for text_chunk, type_group in segments_data:
         voice = native_voice if type_group == 'indic' else english_voice
         tasks.append(generate_segment_audio(text_chunk, voice, semaphore))
+    # 4. Run Generation
+    print("\nGenerating Audio Segments...")
     raw_files = await asyncio.gather(*tasks)
+    # 5. Process Audio (Stitching)
+    print("Stitching and Mastering...")
     final_audio = AudioSegment.empty()
     with ThreadPoolExecutor(max_workers=4) as executor:
         processed_segments = list(executor.map(process_audio_segment, raw_files))
     valid_segments = [seg for seg in processed_segments if seg is not None]
     if not valid_segments:
+        print("Error: No audio generated.")
         return None
+    # Crossfade Stitching
     for i, seg in enumerate(valid_segments):
         if i == 0:
             final_audio += seg
         else:
+            # 30ms crossfade blends the English word ending into the Tamil start
+            final_audio = final_audio.append(seg, crossfade=30)
+    # 6. Final Mastering
+    # Compress dynamic range to make it sound punchy like a podcast
     final_audio = compress_dynamic_range(
         final_audio,
         threshold=-15.0,
         attack=5.0,
         release=50.0
     )
+    final_audio = normalize(final_audio)
     final_audio.export(output_file, format="mp3", bitrate="192k")
+    print(f"✅ Success! Audio saved to: {output_file}")
     return output_file
+# --- Wrapper for your usage ---
 async def generate_tts(id, lines, lang_input):
     if "&&&" in lang_input:
         parts = lang_input.split("&&&")
         text = parts[0].strip()
         lang_name = lang_input.strip()
     output_path = os.path.join(AUDIO_DIR, f"audio_{id}.mp3")
     result = await bilingual_tts_optimized(text, output_path, lang_name)
     if result:
     else:
         return 0, None
 def audio_func(id, lines, lang):
     """Synchronous wrapper for audio generation."""
     return asyncio.run(generate_tts(id, lines, lang))