backendprocesssuper

Sleeping

App Files Files Community

sreepathi-ravikumar commited on Nov 23, 2025

Commit

faaec01

verified ·

1 Parent(s): 73b2a26

Update video2.py

Browse files

Files changed (1) hide show

video2.py +29 -30

video2.py CHANGED Viewed

@@ -58,7 +58,7 @@ from mutagen.mp3 import MP3
 AUDIO_DIR = "output_audio"
 os.makedirs(AUDIO_DIR, exist_ok=True)
-# Max concurrent requests (Safe zone for Edge TTS)
 MAX_CONCURRENT_REQUESTS = 3
 MAX_RETRIES = 5
 BASE_DELAY = 2.0
@@ -76,7 +76,6 @@ def clean_text(text):
     if not text: return ""
     text = html.unescape(str(text))
     text = re.sub(r'https?://\S+', '', text)
-    # Remove special chars but KEEP punctuation
     text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
@@ -88,14 +87,16 @@ def detect_language(word):
 def calculate_pause(text_chunk):
     """
-    INCREASED GAP DURATIONS as requested.
     """
     t = text_chunk.strip()
-    if t.endswith('.'): return 650   # Long pause for full stop
-    elif t.endswith('?'): return 700 # Question pause
-    elif t.endswith('!'): return 600
-    elif t.endswith(',') or t.endswith(';'): return 250 # Clear breath
-    return 0 # Default gap logic handles the rest
 def analyze_and_segment(text):
     text = clean_text(text)
@@ -151,14 +152,14 @@ async def generate_chunk_with_retry(segment_data, semaphore):
     voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
-    # Slight speed adjustment remains for naturalness
-    rate = "-10%" if lang_type == 'english' else "+0%"
     pitch = "+0Hz"
     for attempt in range(MAX_RETRIES):
         async with semaphore:
             try:
-                await asyncio.sleep(random.uniform(0.1, 0.5)) # Jitter
                 fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
                 os.close(fd)
@@ -175,7 +176,6 @@ async def generate_chunk_with_retry(segment_data, semaphore):
             except Exception as e:
                 delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
-                print(f"⚠️ Retry Chunk {idx} in {delay:.1f}s... ({e})")
                 try: os.remove(path)
                 except: pass
                 if attempt == MAX_RETRIES - 1: return None
@@ -187,14 +187,9 @@ def process_and_stitch(results):
     final_audio = AudioSegment.empty()
-    # Default gap between switched words (e.g. Voltage [GAP] nu)
-    # 100ms is noticeable but not awkward.
-    DEFAULT_SWITCH_GAP = 120
     for i, item in enumerate(results):
         try:
             path = item['path']
             segment_audio = AudioSegment.from_mp3(path)
             try: os.remove(path)
             except: pass
@@ -206,17 +201,20 @@ def process_and_stitch(results):
             else:
                 prev_item = results[i-1]
-                # LOGIC CHANGE: Always add silence. No crossfades.
                 if prev_item['pause'] > 0:
-                    # Punctuation Gap (Big)
-                    gap_duration = prev_item['pause']
                 else:
-                    # Language Switch Gap (Small but clear)
-                    gap_duration = DEFAULT_SWITCH_GAP
-                silence = AudioSegment.silent(duration=gap_duration)
-                final_audio += silence + segment_audio
         except Exception as e:
             print(f"Error stitching segment {i}: {e}")
@@ -236,17 +234,17 @@ async def natural_tts_engine(full_text, output_file, native_lang_code):
     raw_results = await asyncio.gather(*tasks)
-    print("Stitching with gaps...")
     final_audio = process_and_stitch(raw_results)
     if not final_audio: return None
     print("Mastering...")
-    # Compression ensures the gaps are quiet and words are punchy
     final_audio = compress_dynamic_range(
         final_audio,
-        threshold=-18.0,
-        ratio=2.0,
         attack=5.0,
         release=50.0
     )
@@ -273,6 +271,7 @@ async def generate_tts(id, lines, lang_input):
     return 0, None
 def audio_func(id, lines, lang):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)

 AUDIO_DIR = "output_audio"
 os.makedirs(AUDIO_DIR, exist_ok=True)
+# Rate Limit Protection
 MAX_CONCURRENT_REQUESTS = 3
 MAX_RETRIES = 5
 BASE_DELAY = 2.0
     if not text: return ""
     text = html.unescape(str(text))
     text = re.sub(r'https?://\S+', '', text)
     text = re.sub(r'[\*\#\<\>\[\]\{\}]', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 def calculate_pause(text_chunk):
     """
+    NATURAL PAUSE DURATIONS (Reduced for speed/flow)
     """
     t = text_chunk.strip()
+    # Quick breath for comma (was 250, now 100)
+    if t.endswith(',') or t.endswith(';'): return 100
+    # Standard sentence end (was 650, now 350)
+    elif t.endswith('.'): return 350
+    elif t.endswith('?'): return 400
+    elif t.endswith('!'): return 350
+    return 0
 def analyze_and_segment(text):
     text = clean_text(text)
     voice = VOICES["Tamil"] if lang_type == 'indic' else VOICES["English"]
+    # Keeping English slightly slower helps it blend with Tamil speed
+    rate = "-5%" if lang_type == 'english' else "+0%"
     pitch = "+0Hz"
     for attempt in range(MAX_RETRIES):
         async with semaphore:
             try:
+                await asyncio.sleep(random.uniform(0.1, 0.4)) # Jitter
                 fd, path = tempfile.mkstemp(suffix=f"_{idx}.mp3")
                 os.close(fd)
             except Exception as e:
                 delay = BASE_DELAY * (2 ** attempt) + random.uniform(0, 1)
                 try: os.remove(path)
                 except: pass
                 if attempt == MAX_RETRIES - 1: return None
     final_audio = AudioSegment.empty()
     for i, item in enumerate(results):
         try:
             path = item['path']
             segment_audio = AudioSegment.from_mp3(path)
             try: os.remove(path)
             except: pass
             else:
                 prev_item = results[i-1]
+                # --- FLOW LOGIC ---
                 if prev_item['pause'] > 0:
+                    # If there was a comma/period, use the calculated pause
+                    silence = AudioSegment.silent(duration=prev_item['pause'])
+                    final_audio += silence + segment_audio
                 else:
+                    # If switching languages (Voltage -> nu), use CROSSFADE
+                    # Crossfade blends the end of English with start of Tamil.
+                    # 20ms is fast enough to sound connected but not overlapped.
+                    try:
+                        final_audio = final_audio.append(segment_audio, crossfade=20)
+                    except:
+                        # Fallback for tiny segments
+                        final_audio += segment_audio
         except Exception as e:
             print(f"Error stitching segment {i}: {e}")
     raw_results = await asyncio.gather(*tasks)
+    print("Stitching for Natural Flow...")
     final_audio = process_and_stitch(raw_results)
     if not final_audio: return None
     print("Mastering...")
+    # Compression ratio 2.5 makes speech punchy and clear
     final_audio = compress_dynamic_range(
         final_audio,
+        threshold=-15.0,
+        ratio=2.5,
         attack=5.0,
         release=50.0
     )
     return 0, None
 def audio_func(id, lines, lang):
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)