Mix-Tts

Sleeping

App Files Files Community

hivecorp commited on Jun 27, 2025

Commit

397032e

verified ·

1 Parent(s): 71d30cf

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -3

app.py CHANGED Viewed

@@ -23,12 +23,54 @@ async def text_to_speech(text, voice, rate, pitch):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
         tmp_path = tmp_file.name
         await communicate.save(tmp_path)
-    return tmp_path, None
 # Gradio interface function
 def tts_interface(text, voice, rate, pitch):
-    audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
-    return audio, warning
 # Create Gradio application
 import gradio as gr
@@ -63,6 +105,7 @@ async def create_demo():
         ],
         outputs=[
             gr.Audio(label="Generated Audio", type="filepath"),
             gr.Markdown(label="Warning", visible=False)
         ],
         title="Edge TTS Text-to-Speech",

     with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
         tmp_path = tmp_file.name
         await communicate.save(tmp_path)
+    return tmp_path, text, None
 # Gradio interface function
 def tts_interface(text, voice, rate, pitch):
+    audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
+    if not audio:
+        return None, None, warning
+    srt_data = generate_srt(audio, input_text)
+    srt_file = save_srt_file(srt_data)
+    return audio, srt_file, warning
+import librosa
+import numpy as np
+import srt
+import datetime
+# Function to generate SRT from audio and input text
+def generate_srt(audio_path, input_text):
+    y, sr = librosa.load(audio_path)
+    total_duration = librosa.get_duration(y=y, sr=sr)
+    words = input_text.strip().split()
+    num_words = len(words)
+    if num_words == 0:
+        return ""
+    avg_word_duration = total_duration / num_words
+    subs = []
+    start_time = 0.0
+    for i, word in enumerate(words):
+        end_time = start_time + avg_word_duration
+        subs.append(
+            srt.Subtitle(index=i+1,
+                         start=datetime.timedelta(seconds=start_time),
+                         end=datetime.timedelta(seconds=end_time),
+                         content=word)
+        )
+        start_time = end_time
+    return srt.compose(subs)
+# Save SRT to file
+def save_srt_file(srt_text):
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
+        f.write(srt_text)
+        return f.name
 # Create Gradio application
 import gradio as gr
         ],
         outputs=[
             gr.Audio(label="Generated Audio", type="filepath"),
+            gr.File(label="Download Subtitle (.srt)"),
             gr.Markdown(label="Warning", visible=False)
         ],
         title="Edge TTS Text-to-Speech",