hivecorp commited on
Commit
ec75c79
Β·
verified Β·
1 Parent(s): 1689d75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -101
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
- import requests
3
- import random
4
- import urllib.parse
5
  import tempfile
6
  import os
7
  import nltk
@@ -9,125 +8,98 @@ import srt
9
  from pydub import AudioSegment, silence
10
  import datetime
11
 
12
- nltk.download("punkt")
13
 
14
- NSFW_URL_TEMPLATE = os.getenv("NSFW_API_URL_TEMPLATE")
15
- TTS_URL_TEMPLATE = os.getenv("TTS_API_URL_TEMPLATE")
 
 
 
 
16
 
17
- if not NSFW_URL_TEMPLATE:
18
- raise ValueError("Missing Secret: NSFW_API_URL_TEMPLATE is not set.")
19
- if not TTS_URL_TEMPLATE:
20
- raise ValueError("Missing Secret: TTS_API_URL_TEMPLATE is not set.")
21
 
22
- VOICES = ["alloy", "echo", "fable", "onyx", "nova", "shimmer", "coral", "verse", "ballad", "ash", "sage", "amuch", "dan"]
 
 
23
 
24
- def generate_srt(audio_path, script_text):
 
 
 
 
 
 
25
  audio = AudioSegment.from_file(audio_path)
26
- silences = silence.detect_silence(audio, min_silence_len=300, silence_thresh=audio.dBFS - 16)
27
- silences = [(start / 1000.0, end / 1000.0) for start, end in silences]
28
- sentences = nltk.tokenize.sent_tokenize(script_text)
29
 
30
  subtitles = []
31
  last_time = 0.0
32
  for i, sentence in enumerate(sentences):
33
- if i < len(silences):
34
  start = last_time
35
- end = silences[i][0]
36
- last_time = silences[i][1]
37
  else:
38
  start = last_time
39
- end = start + 2.5 # default
40
- subtitle = srt.Subtitle(
41
  index=i + 1,
42
  start=datetime.timedelta(seconds=start),
43
  end=datetime.timedelta(seconds=end),
44
  content=sentence
45
- )
46
- subtitles.append(subtitle)
47
 
48
  srt_data = srt.compose(subtitles)
49
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w') as srt_file:
50
  srt_file.write(srt_data)
51
  return srt_file.name
52
 
53
- def check_nsfw(prompt: str) -> bool:
54
- encoded_prompt = urllib.parse.quote(prompt)
55
- url = NSFW_URL_TEMPLATE.format(prompt=encoded_prompt)
56
- try:
57
- response = requests.get(url, timeout=20)
58
- result = response.text.strip().upper()
59
- return result == "YES"
60
- except:
61
- raise gr.Error("Failed to check prompt safety.")
62
-
63
- def generate_audio(prompt: str, voice: str, emotion: str, seed: int) -> bytes:
64
- encoded_prompt = urllib.parse.quote(prompt)
65
- encoded_emotion = urllib.parse.quote(emotion)
66
- url = TTS_URL_TEMPLATE.format(prompt=encoded_prompt, emotion=encoded_emotion, voice=voice, seed=seed)
67
- response = requests.get(url, timeout=60)
68
- if 'audio' not in response.headers.get('content-type', ''):
69
- raise gr.Error("Invalid audio response.")
70
- return response.content
71
-
72
- def text_to_speech_app(prompt, voice, emotion, use_random_seed, specific_seed, subtitle_script):
73
- if not prompt or not voice:
74
- raise gr.Error("Prompt and Voice are required.")
75
- seed = random.randint(0, 2**32 - 1) if use_random_seed else int(specific_seed)
76
- is_nsfw = False # You can enable this by: is_nsfw = check_nsfw(prompt)
77
-
78
- if is_nsfw:
79
- return None, None, "Prompt is flagged NSFW"
80
-
81
- try:
82
- audio_bytes = generate_audio(prompt, voice, emotion, seed)
83
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
84
- temp_audio.write(audio_bytes)
85
- audio_path = temp_audio.name
86
-
87
- srt_path = None
88
- if subtitle_script.strip():
89
- srt_path = generate_srt(audio_path, subtitle_script)
90
-
91
- return audio_path, srt_path, f"Audio and SRT generated with seed {seed}"
92
- except Exception as e:
93
- return None, None, f"Error: {str(e)}"
94
-
95
- def toggle_seed_input(use_random_seed):
96
- return gr.update(visible=not use_random_seed, value=12345)
97
-
98
- with gr.Blocks() as app:
99
- gr.Markdown("## πŸŽ™οΈ Advanced OpenAI TTS + Subtitle Generator")
100
-
101
- with gr.Row():
102
- with gr.Column(scale=2):
103
- prompt_input = gr.Textbox(label="Prompt", placeholder="Enter your text...")
104
- emotion_input = gr.Textbox(label="Emotion Style", placeholder="happy, sad, excited, calm...")
105
- voice_dropdown = gr.Dropdown(label="Voice", choices=VOICES, value="alloy")
106
- subtitle_script = gr.Textbox(label="Subtitle Script", lines=6, placeholder="Paste script here for SRT generation")
107
- with gr.Column(scale=1):
108
- random_seed_checkbox = gr.Checkbox(label="Use Random Seed", value=True)
109
- seed_input = gr.Number(label="Specific Seed", value=12345, visible=False, precision=0)
110
-
111
- submit_button = gr.Button("🎧 Generate Audio + Subtitles", variant="primary")
112
-
113
- with gr.Row():
114
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
115
- srt_output = gr.File(label="Download SRT File")
116
- status_output = gr.Textbox(label="Status")
117
-
118
- random_seed_checkbox.change(
119
- fn=toggle_seed_input,
120
- inputs=[random_seed_checkbox],
121
- outputs=[seed_input]
122
- )
123
-
124
- submit_button.click(
125
- fn=text_to_speech_app,
126
- inputs=[prompt_input, voice_dropdown, emotion_input, random_seed_checkbox, seed_input, subtitle_script],
127
- outputs=[audio_output, srt_output, status_output],
128
- concurrency_limit=30
129
- )
130
 
131
  if __name__ == "__main__":
132
- if NSFW_URL_TEMPLATE and TTS_URL_TEMPLATE:
133
- app.launch()
 
1
  import gradio as gr
2
+ import edge_tts
3
+ import asyncio
 
4
  import tempfile
5
  import os
6
  import nltk
 
8
  from pydub import AudioSegment, silence
9
  import datetime
10
 
11
+ nltk.download('punkt')
12
 
13
+ # πŸ”Š Generate TTS audio
14
+ async def text_to_speech(text, voice, rate, pitch):
15
+ if not text.strip():
16
+ return None, None, "Please enter text to convert."
17
+ if not voice:
18
+ return None, None, "Please select a voice."
19
 
20
+ voice_short_name = voice.split(" - ")[0]
21
+ rate_str = f"{rate:+d}%"
22
+ pitch_str = f"{pitch:+d}Hz"
23
+ communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
24
 
25
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
26
+ tmp_path = tmp_file.name
27
+ await communicate.save(tmp_path)
28
 
29
+ # Generate SRT
30
+ srt_path = generate_srt(tmp_path, text)
31
+
32
+ return tmp_path, srt_path, None
33
+
34
+ # 🧠 Generate SRT from audio + text
35
+ def generate_srt(audio_path, text):
36
  audio = AudioSegment.from_file(audio_path)
37
+ silence_ranges = silence.detect_silence(audio, min_silence_len=400, silence_thresh=audio.dBFS - 16)
38
+ silence_ranges = [(start / 1000.0, end / 1000.0) for start, end in silence_ranges]
39
+ sentences = nltk.tokenize.sent_tokenize(text)
40
 
41
  subtitles = []
42
  last_time = 0.0
43
  for i, sentence in enumerate(sentences):
44
+ if i < len(silence_ranges):
45
  start = last_time
46
+ end = silence_ranges[i][0]
47
+ last_time = silence_ranges[i][1]
48
  else:
49
  start = last_time
50
+ end = start + 2.5 # fallback timing
51
+ subtitles.append(srt.Subtitle(
52
  index=i + 1,
53
  start=datetime.timedelta(seconds=start),
54
  end=datetime.timedelta(seconds=end),
55
  content=sentence
56
+ ))
 
57
 
58
  srt_data = srt.compose(subtitles)
59
  with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode='w') as srt_file:
60
  srt_file.write(srt_data)
61
  return srt_file.name
62
 
63
+ # πŸŽ›οΈ Interface wrapper
64
+ async def tts_interface(text, voice, rate, pitch):
65
+ audio, srt_file, warning = await text_to_speech(text, voice, rate, pitch)
66
+ if warning:
67
+ return None, None, gr.Warning(warning)
68
+ return audio, srt_file, None
69
+
70
+ # πŸ“‹ Setup Gradio UI
71
+ async def create_demo():
72
+ voices = await edge_tts.list_voices()
73
+ voice_dict = {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
74
+
75
+ with gr.Blocks() as demo:
76
+ gr.Markdown("# πŸŽ™οΈ Edge TTS + Subtitle Generator (.srt)")
77
+
78
+ with gr.Row():
79
+ with gr.Column():
80
+ text_input = gr.Textbox(label="Input Text", lines=5, placeholder="Enter your script here...")
81
+ voice_dropdown = gr.Dropdown(choices=[""] + list(voice_dict.keys()), label="Select Voice", value="")
82
+ rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate (%)")
83
+ pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch (Hz)")
84
+ generate_btn = gr.Button("🎧 Generate Audio + SRT")
85
+
86
+ with gr.Column():
87
+ audio_output = gr.Audio(label="Generated Audio", type="filepath")
88
+ srt_output = gr.File(label="Download .srt Subtitle")
89
+ warning_output = gr.Markdown(visible=False)
90
+
91
+ generate_btn.click(
92
+ fn=tts_interface,
93
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
94
+ outputs=[audio_output, srt_output, warning_output]
95
+ )
96
+
97
+ return demo
98
+
99
+ async def main():
100
+ demo = await create_demo()
101
+ demo.queue(concurrency_count=10)
102
+ demo.launch(show_api=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  if __name__ == "__main__":
105
+ asyncio.run(main())