hivecorp commited on
Commit
c1db51a
·
verified ·
1 Parent(s): 135d192

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -34
app.py CHANGED
@@ -30,7 +30,7 @@ async def text_to_speech(text, voice, rate, pitch):
30
  await communicate.save(tmp_path)
31
  return tmp_path, text, None
32
 
33
- # Split text into manageable segments
34
  def split_text_by_punctuation(text):
35
  raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
36
  segments = []
@@ -43,7 +43,7 @@ def split_text_by_punctuation(text):
43
  segments.append(" ".join(words))
44
  return segments
45
 
46
- # Generate subtitle based on audio activity and text
47
  def generate_srt(audio_path, input_text):
48
  y, sr = librosa.load(audio_path)
49
  intervals = librosa.effects.split(y, top_db=25)
@@ -51,40 +51,43 @@ def generate_srt(audio_path, input_text):
51
  total_audio_duration = librosa.get_duration(y=y, sr=sr)
52
 
53
  num_segments = len(segments)
54
- subs = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- if len(intervals) < num_segments:
57
- avg_duration = total_audio_duration / num_segments
58
- start_time = 0.0
59
- for i, seg in enumerate(segments):
60
- end_time = start_time + avg_duration
61
- subs.append(srt.Subtitle(
62
- index=i + 1,
63
- start=datetime.timedelta(seconds=start_time),
64
- end=datetime.timedelta(seconds=end_time),
65
- content=seg
66
- ))
67
- start_time = end_time
68
- else:
69
- for i, (start_sample, end_sample) in enumerate(intervals[:num_segments]):
70
- start_sec = start_sample / sr
71
- end_sec = end_sample / sr
72
- subs.append(srt.Subtitle(
73
- index=i + 1,
74
- start=datetime.timedelta(seconds=start_sec),
75
- end=datetime.timedelta(seconds=end_sec),
76
- content=segments[i]
77
- ))
78
 
79
  return srt.compose(subs)
80
 
81
- # Save SRT to temp file
82
  def save_srt_file(srt_text):
83
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
84
  f.write(srt_text)
85
  return f.name
86
 
87
- # Interface logic
88
  def tts_interface(text, voice, rate, pitch):
89
  audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
90
  if not audio:
@@ -93,15 +96,16 @@ def tts_interface(text, voice, rate, pitch):
93
  srt_file = save_srt_file(srt_data)
94
  return audio, srt_file, warning
95
 
96
- # Gradio app setup
97
  async def create_demo():
98
  voices = await get_voices()
99
-
100
  description = """
101
- 🎙️ Convert text to natural speech using Microsoft Edge TTS with subtitle generation (.srt).
102
- Subtitles are automatically synced based on punctuation and audio waveform.
 
103
  """
104
-
105
  demo = gr.Interface(
106
  fn=tts_interface,
107
  inputs=[
@@ -115,13 +119,13 @@ async def create_demo():
115
  gr.File(label="Download Subtitle (.srt)"),
116
  gr.Markdown(label="Warning", visible=False)
117
  ],
118
- title="Edge TTS with Subtitles",
119
  description=description,
120
  allow_flagging=False
121
  )
122
  return demo
123
 
124
- # Run app
125
  if __name__ == "__main__":
126
  demo = asyncio.run(create_demo())
127
  demo.launch()
 
30
  await communicate.save(tmp_path)
31
  return tmp_path, text, None
32
 
33
+ # Split text into subtitle segments
34
  def split_text_by_punctuation(text):
35
  raw_segments = re.split(r'(?<=[.?!])\s+|\n+', text.strip())
36
  segments = []
 
43
  segments.append(" ".join(words))
44
  return segments
45
 
46
+ # Generate accurate subtitle timings using waveform intervals
47
  def generate_srt(audio_path, input_text):
48
  y, sr = librosa.load(audio_path)
49
  intervals = librosa.effects.split(y, top_db=25)
 
51
  total_audio_duration = librosa.get_duration(y=y, sr=sr)
52
 
53
  num_segments = len(segments)
54
+ num_intervals = len(intervals)
55
+
56
+ # If fewer intervals than segments, create synthetic intervals
57
+ if num_intervals < num_segments:
58
+ step = int(len(y) / num_segments)
59
+ intervals = [(i * step, min((i + 1) * step, len(y))) for i in range(num_segments)]
60
+ elif num_intervals > num_segments:
61
+ merged_intervals = []
62
+ i = 0
63
+ segs_per_interval = num_intervals / num_segments
64
+ while i < num_intervals:
65
+ start = intervals[int(i)][0]
66
+ end = intervals[min(int(i + segs_per_interval - 1), num_intervals - 1)][1]
67
+ merged_intervals.append((start, end))
68
+ i += segs_per_interval
69
+ intervals = merged_intervals
70
 
71
+ subs = []
72
+ for idx, (seg_text, (start_sample, end_sample)) in enumerate(zip(segments, intervals)):
73
+ start_sec = start_sample / sr
74
+ end_sec = end_sample / sr
75
+ subs.append(srt.Subtitle(
76
+ index=idx + 1,
77
+ start=datetime.timedelta(seconds=start_sec),
78
+ end=datetime.timedelta(seconds=end_sec),
79
+ content=seg_text
80
+ ))
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  return srt.compose(subs)
83
 
84
+ # Save SRT file
85
  def save_srt_file(srt_text):
86
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w', encoding='utf-8') as f:
87
  f.write(srt_text)
88
  return f.name
89
 
90
+ # Main interface logic
91
  def tts_interface(text, voice, rate, pitch):
92
  audio, input_text, warning = asyncio.run(text_to_speech(text, voice, rate, pitch))
93
  if not audio:
 
96
  srt_file = save_srt_file(srt_data)
97
  return audio, srt_file, warning
98
 
99
+ # Create Gradio interface
100
  async def create_demo():
101
  voices = await get_voices()
102
+
103
  description = """
104
+ 🎙️ Convert text to realistic voice with Microsoft Edge TTS.
105
+ Auto-generate synced subtitles (.srt) from punctuation and audio waveform.
106
+ 💡 Use for YouTube, narration, and voiceover projects.
107
  """
108
+
109
  demo = gr.Interface(
110
  fn=tts_interface,
111
  inputs=[
 
119
  gr.File(label="Download Subtitle (.srt)"),
120
  gr.Markdown(label="Warning", visible=False)
121
  ],
122
+ title="Edge TTS with Auto Subtitles",
123
  description=description,
124
  allow_flagging=False
125
  )
126
  return demo
127
 
128
+ # Launch app
129
  if __name__ == "__main__":
130
  demo = asyncio.run(create_demo())
131
  demo.launch()