hivecorp commited on
Commit
7c14ea7
·
verified ·
1 Parent(s): 7d70e82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -54
app.py CHANGED
@@ -2,23 +2,17 @@ import gradio as gr
2
  import edge_tts
3
  import asyncio
4
  import tempfile
5
- import os
6
  import nltk
 
7
  import srt
8
  from pydub import AudioSegment, silence
9
  import datetime
10
  import nest_asyncio
11
- import threading
12
 
13
- # Enable nested event loop (required for Spaces)
14
  nest_asyncio.apply()
15
 
16
- # 📦 Safe punkt download
17
- nltk_data_path = os.path.join(os.path.expanduser("~"), "nltk_data")
18
- nltk.download("punkt", download_dir=nltk_data_path)
19
- nltk.data.path.append(nltk_data_path)
20
 
21
- # 🗣️ Text-to-Speech and SRT generator
22
  async def text_to_speech(text, voice, rate, pitch):
23
  if not text.strip():
24
  return None, None, "Please enter some text."
@@ -34,11 +28,10 @@ async def text_to_speech(text, voice, rate, pitch):
34
  tmp_path = tmp_file.name
35
  await communicate.save(tmp_path)
36
 
37
- # SRT generation
38
  srt_path = generate_srt(tmp_path, text)
39
  return tmp_path, srt_path, ""
40
 
41
- # 🧠 Generate subtitles
42
  def generate_srt(audio_path, text):
43
  audio = AudioSegment.from_file(audio_path)
44
  silences = silence.detect_silence(audio, min_silence_len=400, silence_thresh=audio.dBFS - 16)
@@ -68,48 +61,33 @@ def generate_srt(audio_path, text):
68
  srt_file.write(srt_data)
69
  return srt_file.name
70
 
71
- # Interface wrapper
72
  async def tts_interface(text, voice, rate, pitch):
73
- audio_path, srt_path, message = await text_to_speech(text, voice, rate, pitch)
74
- return audio_path, srt_path, message
75
-
76
- # UI setup
77
- async def create_demo():
78
- voices = await edge_tts.list_voices()
79
- voice_dict = {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
80
-
81
- with gr.Blocks() as demo:
82
- gr.Markdown("# 🎙️ Text-to-Speech + Subtitle Generator")
83
-
84
- with gr.Row():
85
- with gr.Column():
86
- text_input = gr.Textbox(label="Input Text", lines=5)
87
- voice_dropdown = gr.Dropdown(choices=[""] + list(voice_dict.keys()), label="Select Voice")
88
- rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate (%)")
89
- pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch (Hz)")
90
- generate_btn = gr.Button("🎧 Generate Audio + SRT")
91
-
92
- with gr.Column():
93
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
94
- srt_output = gr.File(label="Download Subtitle (.srt)")
95
- message_output = gr.Textbox(label="Status", interactive=False)
96
-
97
- generate_btn.click(
98
- fn=tts_interface,
99
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
100
- outputs=[audio_output, srt_output, message_output]
101
- )
102
-
103
- return demo
104
-
105
- # Global demo instance (for Hugging Face Spaces compatibility)
106
- demo = None
107
-
108
- def launch_app():
109
- global demo
110
- demo = asyncio.run(create_demo())
111
- demo.queue()
112
- demo.launch()
113
-
114
- # Start the app in a thread (non-blocking, Space can detect `demo`)
115
- threading.Thread(target=launch_app).start()
 
2
  import edge_tts
3
  import asyncio
4
  import tempfile
 
5
  import nltk
6
+ import os
7
  import srt
8
  from pydub import AudioSegment, silence
9
  import datetime
10
  import nest_asyncio
 
11
 
12
+ nltk.download("punkt")
13
  nest_asyncio.apply()
14
 
 
 
 
 
15
 
 
16
  async def text_to_speech(text, voice, rate, pitch):
17
  if not text.strip():
18
  return None, None, "Please enter some text."
 
28
  tmp_path = tmp_file.name
29
  await communicate.save(tmp_path)
30
 
 
31
  srt_path = generate_srt(tmp_path, text)
32
  return tmp_path, srt_path, ""
33
 
34
+
35
  def generate_srt(audio_path, text):
36
  audio = AudioSegment.from_file(audio_path)
37
  silences = silence.detect_silence(audio, min_silence_len=400, silence_thresh=audio.dBFS - 16)
 
61
  srt_file.write(srt_data)
62
  return srt_file.name
63
 
64
+
65
  async def tts_interface(text, voice, rate, pitch):
66
+ return await text_to_speech(text, voice, rate, pitch)
67
+
68
+
69
+ # ⬇️ Create demo synchronously (run async functions in loop)
70
+ voices = asyncio.run(edge_tts.list_voices())
71
+ voice_dict = {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
72
+
73
+ with gr.Blocks() as demo:
74
+ gr.Markdown("# 🎙️ Text-to-Speech + Subtitle Generator")
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ text_input = gr.Textbox(label="Input Text", lines=5)
79
+ voice_dropdown = gr.Dropdown(choices=[""] + list(voice_dict.keys()), label="Select Voice")
80
+ rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate (%)")
81
+ pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch (Hz)")
82
+ generate_btn = gr.Button("🎧 Generate Audio + SRT")
83
+
84
+ with gr.Column():
85
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
86
+ srt_output = gr.File(label="Download Subtitle (.srt)")
87
+ message_output = gr.Textbox(label="Status", interactive=False)
88
+
89
+ generate_btn.click(
90
+ fn=lambda text, voice, rate, pitch: asyncio.run(tts_interface(text, voice, rate, pitch)),
91
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
92
+ outputs=[audio_output, srt_output, message_output]
93
+ )