hivecorp commited on
Commit
daa4d26
·
verified ·
1 Parent(s): 2bc7131

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -58
app.py CHANGED
@@ -3,6 +3,11 @@ import edge_tts
3
  import asyncio
4
  import tempfile
5
  import os
 
 
 
 
 
6
 
7
  # Get all available voices
8
  async def get_voices():
@@ -12,9 +17,9 @@ async def get_voices():
12
  # Text-to-speech function
13
  async def text_to_speech(text, voice, rate, pitch):
14
  if not text.strip():
15
- return None, gr.Warning("Please enter text to convert.")
16
  if not voice:
17
- return None, gr.Warning("Please select a voice.")
18
 
19
  voice_short_name = voice.split(" - ")[0]
20
  rate_str = f"{rate:+d}%"
@@ -25,76 +30,78 @@ async def text_to_speech(text, voice, rate, pitch):
25
  await communicate.save(tmp_path)
26
  return tmp_path, text, None
27
 
28
- # Gradio interface function
29
- def tts_interface(text, voice, rate, pitch):
30
- audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
31
- if not audio:
32
- return None, None, warning
33
- srt_data = generate_srt(audio, input_text)
34
- srt_file = save_srt_file(srt_data)
35
- return audio, srt_file, warning
36
-
37
-
38
- import librosa
39
- import numpy as np
40
- import srt
41
- import datetime
42
 
43
- # Function to generate SRT from audio and input text
44
  def generate_srt(audio_path, input_text):
45
  y, sr = librosa.load(audio_path)
46
- total_duration = librosa.get_duration(y=y, sr=sr)
47
- words = input_text.strip().split()
48
- num_words = len(words)
49
 
50
- if num_words == 0:
51
- return ""
52
-
53
- avg_word_duration = total_duration / num_words
54
  subs = []
55
- start_time = 0.0
56
 
57
- for i, word in enumerate(words):
58
- end_time = start_time + avg_word_duration
59
- subs.append(
60
- srt.Subtitle(index=i+1,
61
- start=datetime.timedelta(seconds=start_time),
62
- end=datetime.timedelta(seconds=end_time),
63
- content=word)
64
- )
65
- start_time = end_time
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  return srt.compose(subs)
68
 
69
- # Save SRT to file
70
  def save_srt_file(srt_text):
71
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
72
  f.write(srt_text)
73
  return f.name
74
 
75
- # Create Gradio application
76
- import gradio as gr
 
 
 
 
 
 
77
 
 
78
  async def create_demo():
79
  voices = await get_voices()
80
-
81
  description = """
82
- Convert text to speech using Microsoft Edge TTS. Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
83
-
84
- 🎥 **Exciting News: Introducing our Text-to-Video Converter!** 🎥
85
-
86
- Take your content creation to the next level with our cutting-edge Text-to-Video Converter!
87
- Transform your words into stunning, professional-quality videos in just a few clicks.
88
-
89
- ✨ Features:
90
- • Convert text to engaging videos with customizable visuals
91
- • Choose from 40+ languages and 300+ voices
92
- • Perfect for creating audiobooks, storytelling, and language learning materials
93
- • Ideal for educators, content creators, and language enthusiasts
94
-
95
- Ready to revolutionize your content? [Click here to try our Text-to-Video Converter now!](https://text2video.wingetgui.com/)
96
  """
97
-
98
  demo = gr.Interface(
99
  fn=tts_interface,
100
  inputs=[
@@ -108,15 +115,13 @@ async def create_demo():
108
  gr.File(label="Download Subtitle (.srt)"),
109
  gr.Markdown(label="Warning", visible=False)
110
  ],
111
- title="Edge TTS Text-to-Speech",
112
  description=description,
113
- article="Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!",
114
- analytics_enabled=False,
115
  allow_flagging=False
116
  )
117
  return demo
118
 
119
- # Run the application
120
  if __name__ == "__main__":
121
  demo = asyncio.run(create_demo())
122
- demo.launch()
 
3
  import asyncio
4
  import tempfile
5
  import os
6
+ import librosa
7
+ import numpy as np
8
+ import srt
9
+ import datetime
10
+ import re
11
 
12
  # Get all available voices
13
  async def get_voices():
 
17
  # Text-to-speech function
18
  async def text_to_speech(text, voice, rate, pitch):
19
  if not text.strip():
20
+ return None, text, gr.Warning("Please enter text to convert.")
21
  if not voice:
22
+ return None, text, gr.Warning("Please select a voice.")
23
 
24
  voice_short_name = voice.split(" - ")[0]
25
  rate_str = f"{rate:+d}%"
 
30
  await communicate.save(tmp_path)
31
  return tmp_path, text, None
32
 
33
+ # Split text into manageable segments
34
+ def split_text_by_punctuation(text):
35
+ raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
36
+ segments = []
37
+ for segment in raw_segments:
38
+ words = segment.strip().split()
39
+ while len(words) > 8:
40
+ segments.append(" ".join(words[:8]))
41
+ words = words[8:]
42
+ if words:
43
+ segments.append(" ".join(words))
44
+ return segments
 
 
45
 
46
+ # Generate subtitle based on audio activity and text
47
  def generate_srt(audio_path, input_text):
48
  y, sr = librosa.load(audio_path)
49
+ intervals = librosa.effects.split(y, top_db=25)
50
+ segments = split_text_by_punctuation(input_text)
51
+ total_audio_duration = librosa.get_duration(y=y, sr=sr)
52
 
53
+ num_segments = len(segments)
 
 
 
54
  subs = []
 
55
 
56
+ if len(intervals) < num_segments:
57
+ avg_duration = total_audio_duration / num_segments
58
+ start_time = 0.0
59
+ for i, seg in enumerate(segments):
60
+ end_time = start_time + avg_duration
61
+ subs.append(srt.Subtitle(
62
+ index=i + 1,
63
+ start=datetime.timedelta(seconds=start_time),
64
+ end=datetime.timedelta(seconds=end_time),
65
+ content=seg
66
+ ))
67
+ start_time = end_time
68
+ else:
69
+ for i, (start_sample, end_sample) in enumerate(intervals[:num_segments]):
70
+ start_sec = start_sample / sr
71
+ end_sec = end_sample / sr
72
+ subs.append(srt.Subtitle(
73
+ index=i + 1,
74
+ start=datetime.timedelta(seconds=start_sec),
75
+ end=datetime.timedelta(seconds=end_sec),
76
+ content=segments[i]
77
+ ))
78
 
79
  return srt.compose(subs)
80
 
81
+ # Save SRT to temp file
82
  def save_srt_file(srt_text):
83
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
84
  f.write(srt_text)
85
  return f.name
86
 
87
+ # Interface logic
88
+ def tts_interface(text, voice, rate, pitch):
89
+ audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
90
+ if not audio:
91
+ return None, None, warning
92
+ srt_data = generate_srt(audio, input_text)
93
+ srt_file = save_srt_file(srt_data)
94
+ return audio, srt_file, warning
95
 
96
+ # Gradio app setup
97
  async def create_demo():
98
  voices = await get_voices()
99
+
100
  description = """
101
+ 🎙️ Convert text to natural speech using Microsoft Edge TTS with subtitle generation (.srt).
102
+ Subtitles are automatically synced based on punctuation and audio waveform.
 
 
 
 
 
 
 
 
 
 
 
 
103
  """
104
+
105
  demo = gr.Interface(
106
  fn=tts_interface,
107
  inputs=[
 
115
  gr.File(label="Download Subtitle (.srt)"),
116
  gr.Markdown(label="Warning", visible=False)
117
  ],
118
+ title="Edge TTS with Subtitles",
119
  description=description,
 
 
120
  allow_flagging=False
121
  )
122
  return demo
123
 
124
+ # Run app
125
  if __name__ == "__main__":
126
  demo = asyncio.run(create_demo())
127
+ demo.launch()