backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Oct 1, 2025

Commit

a0f5f50

verified ·

1 Parent(s): db8baa3

Update video2.py

Browse files

Files changed (1) hide show

video2.py +59 -31

video2.py CHANGED Viewed

@@ -40,7 +40,10 @@ for path in [BASE_DIR, AUDIO_DIR, CLIPS_DIR]:
     Path(path).mkdir(parents=True, exist_ok=True)
 warnings.filterwarnings('ignore')
 nest_asyncio.apply()
 VOICE_EN = "en-IN-NeerjaNeural"
 def clean_text_for_tts(text):
     """Cleans text before TTS so only the spoken words are read."""
     if not text:
@@ -65,6 +68,7 @@ def clean_text_for_tts(text):
     text = unicodedata.normalize('NFKD', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 async def generate_safe_audio(text, voice):
     """Generate clean, plain text audio using edge-tts."""
     cleaned_text = clean_text_for_tts(text)
@@ -80,6 +84,7 @@ async def generate_safe_audio(text, voice):
     except Exception as e:
         print(f"Error generating audio: {e}")
         return None
 def smart_text_chunking(text, max_chars=80):
     """Split text into sensible, natural-length chunks for TTS."""
     text = clean_text_for_tts(text)
@@ -113,8 +118,9 @@ def smart_text_chunking(text, max_chars=80):
                         if current_chunk:
                             chunks.append(current_chunk.strip())
     return [chunk for chunk in chunks if chunk.strip()]
 async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
-    """Main fixed function for bilingual TTS output."""
     print("Starting fixed bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
@@ -122,9 +128,9 @@ async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
             print("Error: No valid text chunks after cleaning")
             return None
         print(f"Processing {len(chunks)} text chunks...")
-        audio_files = []
-        merged_audio = None
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
             if is_bilingual_tamil:
@@ -133,45 +139,67 @@ async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
                 voice = VOICE_TA
             lang_label = "Tamil" if is_tamil else "English"
             print(f"Chunk {i+1}/{len(chunks)} ({lang_label}): {chunk[:40]}...")
-            audio_file = await generate_safe_audio(chunk, voice)
-            if audio_file:
-                audio_files.append(audio_file)
-                try:
-                    segment = AudioSegment.from_file(audio_file)
-                    segment = normalize(segment)
-                    # Only strip silence if segment is reasonably long
-                    if len(segment) > 200:
-                        try:
-                            segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
-                        except Exception as e:
-                            print(f" (Info) Skipped strip_silence: {e}")
-                    if merged_audio is None:
-                        merged_audio = segment
-                    else:
-                        pause = AudioSegment.silent(duration=200)
-                        merged_audio += pause + segment
-                except Exception as audio_error:
-                    print(f"Warning: Error processing audio for chunk {i+1}: {audio_error}")
-                    continue
-        if merged_audio is None:
             print("Error: No audio was successfully generated")
             return None
-        merged_audio.export(output_file, format="mp3", bitrate="128k")
         print(f"✅ Audio successfully generated: {output_file}")
-        for temp_file in audio_files:
             try:
                 if os.path.exists(temp_file):
                     os.unlink(temp_file)
             except:
                 pass
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
 # USAGE EXAMPLE
 async def run_fixed_tts(text_input, output_file, lang):
     await bilingual_tts_fixed(text_input, output_file, lang)
 async def generate_tts(id, lines, lang):
     voice = {
         "English": "en-US-JennyNeural",
@@ -208,22 +236,22 @@ async def generate_tts(id, lines, lang):
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
-    audio_path = os.path.join(AUDIO_DIR, audio_name)
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
         voice_to_use = voice[lang_name]
     else:
-        text = lines[id]
         voice_to_use = voice[lang]
-    loop = asyncio.get_event_loop()
-    output = loop.run_until_complete(run_fixed_tts(text, audio_path, voice_to_use))
     if os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
     return asyncio.run(generate_tts(id, lines, lang))
 #-----------------------------

     Path(path).mkdir(parents=True, exist_ok=True)
 warnings.filterwarnings('ignore')
 nest_asyncio.apply()
 VOICE_EN = "en-IN-NeerjaNeural"
 def clean_text_for_tts(text):
     """Cleans text before TTS so only the spoken words are read."""
     if not text:
     text = unicodedata.normalize('NFKD', text)
     text = re.sub(r'\s+', ' ', text)
     return text.strip()
 async def generate_safe_audio(text, voice):
     """Generate clean, plain text audio using edge-tts."""
     cleaned_text = clean_text_for_tts(text)
     except Exception as e:
         print(f"Error generating audio: {e}")
         return None
 def smart_text_chunking(text, max_chars=80):
     """Split text into sensible, natural-length chunks for TTS."""
     text = clean_text_for_tts(text)
                         if current_chunk:
                             chunks.append(current_chunk.strip())
     return [chunk for chunk in chunks if chunk.strip()]
 async def bilingual_tts_fixed(text, output_file="audio0.mp3", VOICE_TA=None):
+    """Main fixed function for bilingual TTS output with concurrent audio generation for speed."""
     print("Starting fixed bilingual TTS processing...")
     try:
         chunks = smart_text_chunking(text)
             print("Error: No valid text chunks after cleaning")
             return None
         print(f"Processing {len(chunks)} text chunks...")
         is_bilingual_tamil = VOICE_TA is not None and "ta-IN" in VOICE_TA
+        tasks = []
         for i, chunk in enumerate(chunks):
             is_tamil = any('\u0B80' <= char <= '\u0BFF' for char in chunk)
             if is_bilingual_tamil:
                 voice = VOICE_TA
             lang_label = "Tamil" if is_tamil else "English"
             print(f"Chunk {i+1}/{len(chunks)} ({lang_label}): {chunk[:40]}...")
+            tasks.append(generate_safe_audio(chunk, voice))
+        audio_files = await asyncio.gather(*tasks, return_exceptions=True)
+        processed_audio_files = [f for f in audio_files if isinstance(f, str)]  # Filter successful files
+        errors = [e for e in audio_files if isinstance(e, Exception)]
+        if errors:
+            for e in errors:
+                print(f"Warning: Audio generation error: {e}")
+        if not processed_audio_files:
             print("Error: No audio was successfully generated")
             return None
+        merged_audio = None
+        for audio_file in processed_audio_files:
+            try:
+                segment = AudioSegment.from_file(audio_file)
+                segment = normalize(segment)
+                # Only strip silence if segment is reasonably long
+                if len(segment) > 200:
+                    try:
+                        segment = segment.strip_silence(silence_len=50, silence_thresh=-40)
+                    except Exception as e:
+                        print(f" (Info) Skipped strip_silence: {e}")
+                if merged_audio is None:
+                    merged_audio = segment
+                else:
+                    pause = AudioSegment.silent(duration=200)
+                    merged_audio += pause + segment
+            except Exception as audio_error:
+                print(f"Warning: Error processing audio: {audio_error}")
+                continue
+        if merged_audio is None:
+            print("Error: No audio segments were successfully processed")
+            return None
+        # Improved quality: Apply overall compression and normalization
+        merged_audio = merged_audio.compress_dynamic_range(threshold=-20.0, ratio=4.0, attack=5.0, release=50.0)
+        merged_audio = normalize(merged_audio)
+        merged_audio.export(output_file, format="mp3", bitrate="192k")  # Increased bitrate for better quality
         print(f"✅ Audio successfully generated: {output_file}")
+        # Cleanup temp files
+        for temp_file in processed_audio_files:
             try:
                 if os.path.exists(temp_file):
                     os.unlink(temp_file)
             except:
                 pass
         return output_file
     except Exception as main_error:
         print(f"Main error in bilingual TTS: {main_error}")
         return None
 # USAGE EXAMPLE
 async def run_fixed_tts(text_input, output_file, lang):
     await bilingual_tts_fixed(text_input, output_file, lang)
 async def generate_tts(id, lines, lang):
     voice = {
         "English": "en-US-JennyNeural",
         "Hungarian": "hu-HU-NoemiNeural"
     }
     audio_name = f"audio{id}.mp3"
+    audio_path = os.path.join(AUDIO_DIR, audio_name)  # Assuming AUDIO_DIR is defined elsewhere
     if "&&&" in lang:
         listf = lang.split("&&&")
         text = listf[0].strip()
         lang_name = listf[1].strip()
         voice_to_use = voice[lang_name]
     else:
+        text = lines[id]  # Assuming lines is a dict or list indexed by id
         voice_to_use = voice[lang]
+    output = await run_fixed_tts(text, audio_path, voice_to_use)
     if os.path.exists(audio_path):
         audio = MP3(audio_path)
         duration = audio.info.length
         return duration, audio_path
     return None, None
 def audio_func(id, lines, lang):
     return asyncio.run(generate_tts(id, lines, lang))
 #-----------------------------