Mix-Tts

Sleeping

App Files Files Community

hivecorp commited on Jun 27, 2025

Commit

daa4d26

verified ·

1 Parent(s): 2bc7131

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -58

app.py CHANGED Viewed

@@ -3,6 +3,11 @@ import edge_tts
 import asyncio
 import tempfile
 import os
 # Get all available voices
 async def get_voices():
@@ -12,9 +17,9 @@ async def get_voices():
 # Text-to-speech function
 async def text_to_speech(text, voice, rate, pitch):
     if not text.strip():
-        return None, gr.Warning("Please enter text to convert.")
     if not voice:
-        return None, gr.Warning("Please select a voice.")
     voice_short_name = voice.split(" - ")[0]
     rate_str = f"{rate:+d}%"
@@ -25,76 +30,78 @@ async def text_to_speech(text, voice, rate, pitch):
         await communicate.save(tmp_path)
     return tmp_path, text, None
-# Gradio interface function
-def tts_interface(text, voice, rate, pitch):
-    audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
-    if not audio:
-        return None, None, warning
-    srt_data = generate_srt(audio, input_text)
-    srt_file = save_srt_file(srt_data)
-    return audio, srt_file, warning
-import librosa
-import numpy as np
-import srt
-import datetime
-# Function to generate SRT from audio and input text
 def generate_srt(audio_path, input_text):
     y, sr = librosa.load(audio_path)
-    total_duration = librosa.get_duration(y=y, sr=sr)
-    words = input_text.strip().split()
-    num_words = len(words)
-    if num_words == 0:
-        return ""
-    avg_word_duration = total_duration / num_words
     subs = []
-    start_time = 0.0
-    for i, word in enumerate(words):
-        end_time = start_time + avg_word_duration
-        subs.append(
-            srt.Subtitle(index=i+1,
-                         start=datetime.timedelta(seconds=start_time),
-                         end=datetime.timedelta(seconds=end_time),
-                         content=word)
-        )
-        start_time = end_time
     return srt.compose(subs)
-# Save SRT to file
 def save_srt_file(srt_text):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
         f.write(srt_text)
         return f.name
-# Create Gradio application
-import gradio as gr
 async def create_demo():
     voices = await get_voices()
     description = """
-    Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
-    🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
-    Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
-    Transform your words into stunning, professional-quality videos in just a few clicks.
-    ✨ Features:
-    • Convert text to engaging videos with customizable visuals
-    • Choose from 40+ languages and 300+ voices
-    • Perfect for creating audiobooks, storytelling, and language learning materials
-    • Ideal for educators, content creators, and language enthusiasts
-    Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
@@ -108,15 +115,13 @@ async def create_demo():
             gr.File(label="Download Subtitle (.srt)"),
             gr.Markdown(label="Warning", visible=False)
         ],
-        title="Edge TTS Text-to-Speech",
         description=description,
-        article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
-        analytics_enabled=False,
         allow_flagging=False
     )
     return demo
-# Run the application
 if __name__ == "__main__":
     demo = asyncio.run(create_demo())
-    demo.launch()

 import asyncio
 import tempfile
 import os
+import librosa
+import numpy as np
+import srt
+import datetime
+import re
 # Get all available voices
 async def get_voices():
 # Text-to-speech function
 async def text_to_speech(text, voice, rate, pitch):
     if not text.strip():
+        return None, text, gr.Warning("Please enter text to convert.")
     if not voice:
+        return None, text, gr.Warning("Please select a voice.")
     voice_short_name = voice.split(" - ")[0]
     rate_str = f"{rate:+d}%"
         await communicate.save(tmp_path)
     return tmp_path, text, None
+# Split text into manageable segments
+def split_text_by_punctuation(text):
+    raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
+    segments = []
+    for segment in raw_segments:
+        words = segment.strip().split()
+        while len(words) > 8:
+            segments.append(" ".join(words[:8]))
+            words = words[8:]
+        if words:
+            segments.append(" ".join(words))
+    return segments
+# Generate subtitle based on audio activity and text
 def generate_srt(audio_path, input_text):
     y, sr = librosa.load(audio_path)
+    intervals = librosa.effects.split(y, top_db=25)
+    segments = split_text_by_punctuation(input_text)
+    total_audio_duration = librosa.get_duration(y=y, sr=sr)
+    num_segments = len(segments)
     subs = []
+    if len(intervals) < num_segments:
+        avg_duration = total_audio_duration / num_segments
+        start_time = 0.0
+        for i, seg in enumerate(segments):
+            end_time = start_time + avg_duration
+            subs.append(srt.Subtitle(
+                index=i + 1,
+                start=datetime.timedelta(seconds=start_time),
+                end=datetime.timedelta(seconds=end_time),
+                content=seg
+            ))
+            start_time = end_time
+    else:
+        for i, (start_sample, end_sample) in enumerate(intervals[:num_segments]):
+            start_sec = start_sample / sr
+            end_sec = end_sample / sr
+            subs.append(srt.Subtitle(
+                index=i + 1,
+                start=datetime.timedelta(seconds=start_sec),
+                end=datetime.timedelta(seconds=end_sec),
+                content=segments[i]
+            ))
     return srt.compose(subs)
+# Save SRT to temp file
 def save_srt_file(srt_text):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
         f.write(srt_text)
         return f.name
+# Interface logic
+def tts_interface(text, voice, rate, pitch):
+    audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
+    if not audio:
+        return None, None, warning
+    srt_data = generate_srt(audio, input_text)
+    srt_file = save_srt_file(srt_data)
+    return audio, srt_file, warning
+# Gradio app setup
 async def create_demo():
     voices = await get_voices()
     description = """
+    🎙️ Convert text to natural speech using Microsoft Edge TTS with subtitle generation (.srt).
+    Subtitles are automatically synced based on punctuation and audio waveform.
     """
     demo = gr.Interface(
         fn=tts_interface,
         inputs=[
             gr.File(label="Download Subtitle (.srt)"),
             gr.Markdown(label="Warning", visible=False)
         ],
+        title="Edge TTS with Subtitles",
         description=description,
         allow_flagging=False
     )
     return demo
+# Run app
 if __name__ == "__main__":
     demo = asyncio.run(create_demo())
+    demo.launch()