Mix-Tts

Sleeping

App Files Files Community

hivecorp commited on Jun 27, 2025

Commit

c1db51a

verified ·

1 Parent(s): 135d192

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -34

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ async def text_to_speech(text, voice, rate, pitch):
         await communicate.save(tmp_path)
     return tmp_path, text, None
-# Split text into manageable segments
 def split_text_by_punctuation(text):
     raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
     segments = []
@@ -43,7 +43,7 @@ def split_text_by_punctuation(text):
             segments.append(" ".join(words))
     return segments
-# Generate subtitle based on audio activity and text
 def generate_srt(audio_path, input_text):
     y, sr = librosa.load(audio_path)
     intervals = librosa.effects.split(y, top_db=25)
@@ -51,40 +51,43 @@ def generate_srt(audio_path, input_text):
     total_audio_duration = librosa.get_duration(y=y, sr=sr)
     num_segments = len(segments)
-    subs = []
-    if len(intervals) < num_segments:
-        avg_duration = total_audio_duration / num_segments
-        start_time = 0.0
-        for i, seg in enumerate(segments):
-            end_time = start_time + avg_duration
-            subs.append(srt.Subtitle(
-                index=i + 1,
-                start=datetime.timedelta(seconds=start_time),
-                end=datetime.timedelta(seconds=end_time),
-                content=seg
-            ))
-            start_time = end_time
-    else:
-        for i, (start_sample, end_sample) in enumerate(intervals[:num_segments]):
-            start_sec = start_sample / sr
-            end_sec = end_sample / sr
-            subs.append(srt.Subtitle(
-                index=i + 1,
-                start=datetime.timedelta(seconds=start_sec),
-                end=datetime.timedelta(seconds=end_sec),
-                content=segments[i]
-            ))
     return srt.compose(subs)
-# Save SRT to temp file
 def save_srt_file(srt_text):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
         f.write(srt_text)
         return f.name
-# Interface logic
 def tts_interface(text, voice, rate, pitch):
     audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
     if not audio:
@@ -93,15 +96,16 @@ def tts_interface(text, voice, rate, pitch):
     srt_file = save_srt_file(srt_data)
     return audio, srt_file, warning
-# Gradio app setup
 async def create_demo():
     voices = await get_voices()
     description = """
-    🎙️ Convert text to natural speech using Microsoft Edge TTS with subtitle generation (.srt).
-    Subtitles are automatically synced based on punctuation and audio waveform.
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
@@ -115,13 +119,13 @@ async def create_demo():
             gr.File(label="Download Subtitle (.srt)"),
             gr.Markdown(label="Warning", visible=False)
         ],
-        title="Edge TTS with Subtitles",
         description=description,
         allow_flagging=False
     )
     return demo
-# Run app
 if __name__ == "__main__":
     demo = asyncio.run(create_demo())
     demo.launch()

         await communicate.save(tmp_path)
     return tmp_path, text, None
+# Split text into subtitle segments
 def split_text_by_punctuation(text):
     raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
     segments = []
             segments.append(" ".join(words))
     return segments
+# Generate accurate subtitle timings using waveform intervals
 def generate_srt(audio_path, input_text):
     y, sr = librosa.load(audio_path)
     intervals = librosa.effects.split(y, top_db=25)
     total_audio_duration = librosa.get_duration(y=y, sr=sr)
     num_segments = len(segments)
+    num_intervals = len(intervals)
+    # If fewer intervals than segments, create synthetic intervals
+    if num_intervals < num_segments:
+        step = int(len(y) / num_segments)
+        intervals = [(i * step, min((i + 1) * step, len(y))) for i in range(num_segments)]
+    elif num_intervals > num_segments:
+        merged_intervals = []
+        i = 0
+        segs_per_interval = num_intervals / num_segments
+        while i < num_intervals:
+            start = intervals[int(i)][0]
+            end = intervals[min(int(i + segs_per_interval - 1), num_intervals - 1)][1]
+            merged_intervals.append((start, end))
+            i += segs_per_interval
+        intervals = merged_intervals
+    subs = []
+    for idx, (seg_text, (start_sample, end_sample)) in enumerate(zip(segments, intervals)):
+        start_sec = start_sample / sr
+        end_sec = end_sample / sr
+        subs.append(srt.Subtitle(
+            index=idx + 1,
+            start=datetime.timedelta(seconds=start_sec),
+            end=datetime.timedelta(seconds=end_sec),
+            content=seg_text
+        ))
     return srt.compose(subs)
+# Save SRT file
 def save_srt_file(srt_text):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
         f.write(srt_text)
         return f.name
+# Main interface logic
 def tts_interface(text, voice, rate, pitch):
     audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
     if not audio:
     srt_file = save_srt_file(srt_data)
     return audio, srt_file, warning
+# Create Gradio interface
 async def create_demo():
     voices = await get_voices()
     description = """
+    🎙️ Convert text to realistic voice with Microsoft Edge TTS.
+    ✨ Auto-generate synced subtitles (.srt) from punctuation and audio waveform.
+    💡 Use for YouTube, narration, and voiceover projects.
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
             gr.File(label="Download Subtitle (.srt)"),
             gr.Markdown(label="Warning", visible=False)
         ],
+        title="Edge TTS with Auto Subtitles",
         description=description,
         allow_flagging=False
     )
     return demo
+# Launch app
 if __name__ == "__main__":
     demo = asyncio.run(create_demo())
     demo.launch()