hivecorp commited on
Commit
397032e
·
verified ·
1 Parent(s): 71d30cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -3
app.py CHANGED
@@ -23,12 +23,54 @@ async def text_to_speech(text, voice, rate, pitch):
23
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
  tmp_path = tmp_file.name
25
  await communicate.save(tmp_path)
26
- return tmp_path, None
27
 
28
  # Gradio interface function
29
  def tts_interface(text, voice, rate, pitch):
30
- audio, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
31
- return audio, warning
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  # Create Gradio application
34
  import gradio as gr
@@ -63,6 +105,7 @@ async def create_demo():
63
  ],
64
  outputs=[
65
  gr.Audio(label="Generated Audio", type="filepath"),
 
66
  gr.Markdown(label="Warning", visible=False)
67
  ],
68
  title="Edge TTS Text-to-Speech",
 
23
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
  tmp_path = tmp_file.name
25
  await communicate.save(tmp_path)
26
+ return tmp_path, text, None
27
 
28
  # Gradio interface function
29
  def tts_interface(text, voice, rate, pitch):
30
+ audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
31
+ if not audio:
32
+ return None, None, warning
33
+ srt_data = generate_srt(audio, input_text)
34
+ srt_file = save_srt_file(srt_data)
35
+ return audio, srt_file, warning
36
+
37
+
38
+ import librosa
39
+ import numpy as np
40
+ import srt
41
+ import datetime
42
+
43
+ # Function to generate SRT from audio and input text
44
+ def generate_srt(audio_path, input_text):
45
+ y, sr = librosa.load(audio_path)
46
+ total_duration = librosa.get_duration(y=y, sr=sr)
47
+ words = input_text.strip().split()
48
+ num_words = len(words)
49
+
50
+ if num_words == 0:
51
+ return ""
52
+
53
+ avg_word_duration = total_duration / num_words
54
+ subs = []
55
+ start_time = 0.0
56
+
57
+ for i, word in enumerate(words):
58
+ end_time = start_time + avg_word_duration
59
+ subs.append(
60
+ srt.Subtitle(index=i+1,
61
+ start=datetime.timedelta(seconds=start_time),
62
+ end=datetime.timedelta(seconds=end_time),
63
+ content=word)
64
+ )
65
+ start_time = end_time
66
+
67
+ return srt.compose(subs)
68
+
69
+ # Save SRT to file
70
+ def save_srt_file(srt_text):
71
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
72
+ f.write(srt_text)
73
+ return f.name
74
 
75
  # Create Gradio application
76
  import gradio as gr
 
105
  ],
106
  outputs=[
107
  gr.Audio(label="Generated Audio", type="filepath"),
108
+ gr.File(label="Download Subtitle (.srt)"),
109
  gr.Markdown(label="Warning", visible=False)
110
  ],
111
  title="Edge TTS Text-to-Speech",