aitekphsoftware commited on
Commit
e7a4109
·
verified ·
1 Parent(s): 55447c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +328 -90
app.py CHANGED
@@ -4,26 +4,45 @@ import asyncio
4
  import tempfile
5
  import os
6
 
 
 
 
7
  async def get_voices():
8
  voices = await edge_tts.list_voices()
9
- return {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
 
 
 
 
 
 
 
10
 
11
  async def text_to_speech(text, voice, rate, pitch):
12
  if not text.strip():
13
- return None, "Please enter text to convert."
 
14
  if not voice:
15
  return None, "Please select a voice."
16
-
17
- voice_short_name = voice.split(" - ")[0]
 
 
18
  rate_str = f"{rate:+d}%"
19
  pitch_str = f"{pitch:+d}Hz"
20
- communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str)
21
-
22
- # Save directly to mp3 file (Edge TTS actually outputs mp3 format)
 
 
 
 
 
 
23
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
24
  tmp_path = tmp_file.name
25
  await communicate.save(tmp_path)
26
-
27
  return tmp_path, None
28
 
29
  async def tts_interface(text, voice, rate, pitch):
@@ -32,95 +51,314 @@ async def tts_interface(text, voice, rate, pitch):
32
  return audio, gr.Warning(warning)
33
  return audio, None
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  async def create_demo():
36
  voices = await get_voices()
37
-
38
- with gr.Blocks(analytics_enabled=False) as demo:
39
- gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
40
-
41
- with gr.Row():
42
- with gr.Column(scale=1):
43
- gr.Markdown("## Text-to-Speech with Microsoft Edge TTS")
44
- gr.Markdown("""
45
- Convert text to speech using Microsoft Edge TTS.
46
- Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease.
47
- """)
48
-
49
- gr.HTML("""
50
- <div style="margin: 20px 0; padding: 15px; border: 1px solid #4CAF50; border-radius: 10px; background-color: #f1f8e9;">
51
- <p style="margin-top: 0;"><b>Looking for the new version with more features?</b></p>
52
- <p>The new version includes:</p>
53
- <ul>
54
- <li><b>SRT Subtitle Support</b>: Upload SRT files or input SRT format text</li>
55
- <li><b>File Upload</b>: Easily upload TXT or SRT files</li>
56
- <li><b>Smart Format Detection</b>: Detects plain text or SRT format</li>
57
- <li><b>MP3 Output</b>: Generate high-quality MP3 audio</li>
58
- </ul>
59
- <div style="text-align: center; margin-top: 15px;">
60
- <a href="https://text-to-speech.wingetgui.com/" target="_blank"
61
- style="display: inline-block;
62
- background: linear-gradient(45deg, #4CAF50, #8BC34A);
63
- color: white;
64
- padding: 12px 30px;
65
- text-decoration: none;
66
- border-radius: 30px;
67
- font-weight: bold;
68
- font-size: 16px;
69
- box-shadow: 0 4px 10px rgba(76, 175, 80, 0.3);
70
- transition: all 0.3s ease;">Try New Version ➔</a>
71
  </div>
72
  </div>
73
- """)
74
-
75
- with gr.Column(scale=1):
76
- gr.HTML("""
77
- <div style="height: 100%; background-color: #f0f8ff; padding: 15px; border-radius: 10px;">
78
- <h2 style="color: #1e90ff; margin-top: 0;">Turn Your Text Into Professional Videos!</h2>
79
- <ul style="list-style-type: none; padding-left: 0;">
80
- <li>✅ <b>40+ languages and 300+ voices supported</b></li>
81
- <li>✅ <b>Custom backgrounds, music, and visual effects</b></li>
82
- <li>✅ <b>Create engaging video content from simple text</b></li>
83
- <li>✅ <b>Perfect for educators, content creators, and marketers</b></li>
84
- </ul>
85
- <div style="text-align: center; margin-top: 20px;">
86
- <span style="font-size: 96px;">🎬</span>
87
- <div style="margin-top: 15px;">
88
- <a href="https://text2video.wingetgui.com/" target="_blank"
89
- style="display: inline-block;
90
- background: linear-gradient(45deg, #2196F3, #21CBF3);
91
- color: white;
92
- padding: 12px 30px;
93
- text-decoration: none;
94
- border-radius: 30px;
95
- font-weight: bold;
96
- font-size: 16px;
97
- box-shadow: 0 4px 10px rgba(33, 150, 243, 0.3);
98
- transition: all 0.3s ease;">Try Text-to-Video ➔</a>
99
- </div>
100
- </div>
101
  </div>
102
- """)
103
-
 
 
104
  with gr.Row():
105
- with gr.Column():
106
- text_input = gr.Textbox(label="Input Text", lines=5)
107
- voice_dropdown = gr.Dropdown(choices=[""] + list(voices.keys()), label="Select Voice", value="")
108
- rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1)
109
- pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1)
110
-
111
- generate_btn = gr.Button("Generate Speech", variant="primary")
112
-
113
- audio_output = gr.Audio(label="Generated Audio", type="filepath")
114
- warning_md = gr.Markdown(label="Warning", visible=False)
115
-
116
- generate_btn.click(
117
- fn=tts_interface,
118
- inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
119
- outputs=[audio_output, warning_md]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  )
121
-
122
- gr.Markdown("Experience the power of Edge TTS for text-to-speech conversion, and explore our advanced Text-to-Video Converter for even more creative possibilities!")
123
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  return demo
125
 
126
  async def main():
 
4
  import tempfile
5
  import os
6
 
7
+ # -----------------------------
8
+ # Core TTS helpers
9
+ # -----------------------------
10
  async def get_voices():
11
  voices = await edge_tts.list_voices()
12
+ # Keep label style similar to ElevenLabs voice list (clean, informative)
13
+ voice_labels = [
14
+ f"{v['ShortName']} - {v['Locale']} ({v['Gender']})"
15
+ for v in voices
16
+ ]
17
+ # Sort alphabetically for nicer UI
18
+ voice_labels.sort()
19
+ return voice_labels
20
 
21
  async def text_to_speech(text, voice, rate, pitch):
22
  if not text.strip():
23
+ return None, "Please enter some text to synthesize."
24
+
25
  if not voice:
26
  return None, "Please select a voice."
27
+
28
+ # Voice label is like: "en-US-AriaNeural - en-US (Female)"
29
+ voice_short_name = voice.split(" - ")[0].strip()
30
+
31
  rate_str = f"{rate:+d}%"
32
  pitch_str = f"{pitch:+d}Hz"
33
+
34
+ communicate = edge_tts.Communicate(
35
+ text=text,
36
+ voice=voice_short_name,
37
+ rate=rate_str,
38
+ pitch=pitch_str,
39
+ )
40
+
41
+ # Save to temporary MP3 file
42
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
43
  tmp_path = tmp_file.name
44
  await communicate.save(tmp_path)
45
+
46
  return tmp_path, None
47
 
48
  async def tts_interface(text, voice, rate, pitch):
 
51
  return audio, gr.Warning(warning)
52
  return audio, None
53
 
54
+ # -----------------------------
55
+ # Eburon Speech – ElevenLabs-like UI
56
+ # -----------------------------
57
+ EBURON_CSS = """
58
+ body {
59
+ background: radial-gradient(circle at top left, #0f172a 0, #020617 40%, #020617 100%);
60
+ color: #e5e7eb;
61
+ }
62
+
63
+ /* Global font & smoothing */
64
+ * {
65
+ font-family: system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif;
66
+ -webkit-font-smoothing: antialiased;
67
+ }
68
+
69
+ /* Header */
70
+ #eburon-header {
71
+ display: flex;
72
+ align-items: center;
73
+ justify-content: space-between;
74
+ padding: 18px 20px;
75
+ margin-bottom: 8px;
76
+ border-radius: 18px;
77
+ background: radial-gradient(circle at top left, #1e293b 0, #020617 55%);
78
+ border: 1px solid rgba(148, 163, 184, 0.28);
79
+ box-shadow: 0 18px 45px rgba(15, 23, 42, 0.85);
80
+ }
81
+
82
+ #eburon-logo-badge {
83
+ display: inline-flex;
84
+ align-items: center;
85
+ gap: 10px;
86
+ }
87
+
88
+ #eburon-logo-circle {
89
+ width: 32px;
90
+ height: 32px;
91
+ border-radius: 999px;
92
+ background: conic-gradient(from 180deg, #38bdf8, #6366f1, #22c55e, #38bdf8);
93
+ display: flex;
94
+ align-items: center;
95
+ justify-content: center;
96
+ box-shadow: 0 0 25px rgba(56, 189, 248, 0.5);
97
+ color: #020617;
98
+ font-weight: 800;
99
+ font-size: 18px;
100
+ }
101
+
102
+ #eburon-brand-title {
103
+ display: flex;
104
+ flex-direction: column;
105
+ }
106
+
107
+ #eburon-brand-title span:nth-child(1) {
108
+ font-size: 19px;
109
+ font-weight: 700;
110
+ letter-spacing: 0.06em;
111
+ text-transform: uppercase;
112
+ color: #e5e7eb;
113
+ }
114
+
115
+ #eburon-brand-title span:nth-child(2) {
116
+ font-size: 12px;
117
+ color: #9ca3af;
118
+ }
119
+
120
+ /* Header right badge */
121
+ #eburon-header-right {
122
+ display: inline-flex;
123
+ align-items: center;
124
+ gap: 8px;
125
+ font-size: 11px;
126
+ color: #9ca3af;
127
+ padding: 6px 12px;
128
+ border-radius: 999px;
129
+ border: 1px solid rgba(148, 163, 184, 0.4);
130
+ background: radial-gradient(circle at top, rgba(55, 65, 81, 0.9), rgba(15, 23, 42, 0.9));
131
+ }
132
+
133
+ /* Cards */
134
+ .eburon-card {
135
+ border-radius: 18px;
136
+ background: radial-gradient(circle at top left, #0b1120, #020617);
137
+ border: 1px solid rgba(30, 64, 175, 0.6);
138
+ box-shadow: 0 18px 45px rgba(15, 23, 42, 0.85);
139
+ padding: 16px 18px;
140
+ }
141
+
142
+ /* Script header */
143
+ #eburon-script-header {
144
+ display: flex;
145
+ justify-content: space-between;
146
+ align-items: center;
147
+ margin-bottom: 6px;
148
+ }
149
+
150
+ #eburon-script-title {
151
+ font-size: 14px;
152
+ font-weight: 600;
153
+ color: #e5e7eb;
154
+ }
155
+
156
+ #eburon-script-subtitle {
157
+ font-size: 11px;
158
+ color: #9ca3af;
159
+ }
160
+
161
+ /* Voice header */
162
+ #eburon-voice-header {
163
+ display: flex;
164
+ justify-content: space-between;
165
+ align-items: center;
166
+ margin-bottom: 6px;
167
+ }
168
+
169
+ #eburon-voice-title {
170
+ font-size: 14px;
171
+ font-weight: 600;
172
+ color: #e5e7eb;
173
+ }
174
+
175
+ #eburon-voice-subtitle {
176
+ font-size: 11px;
177
+ color: #9ca3af;
178
+ }
179
+
180
+ /* Generate row */
181
+ #eburon-generate-row {
182
+ margin-top: 12px;
183
+ }
184
+
185
+ /* Generate button */
186
+ #eburon-generate-btn button {
187
+ width: 100%;
188
+ border-radius: 999px;
189
+ font-weight: 600;
190
+ letter-spacing: 0.02em;
191
+ padding: 10px 16px;
192
+ background: linear-gradient(135deg, #38bdf8, #6366f1);
193
+ box-shadow: 0 12px 30px rgba(79, 70, 229, 0.6);
194
+ border: none;
195
+ }
196
+
197
+ #eburon-generate-btn button:hover {
198
+ transform: translateY(-1px);
199
+ box-shadow: 0 18px 40px rgba(79, 70, 229, 0.95);
200
+ }
201
+
202
+ /* Audio player card */
203
+ #eburon-audio-card {
204
+ border-radius: 18px;
205
+ background: radial-gradient(circle at top right, #0f172a, #020617);
206
+ border: 1px solid rgba(30, 64, 175, 0.6);
207
+ box-shadow: 0 18px 45px rgba(15, 23, 42, 0.85);
208
+ padding: 14px 16px;
209
+ }
210
+
211
+ /* Smaller labels */
212
+ label span, .gr-textbox label, .gr-slider label, .gr-dropdown label {
213
+ font-size: 12px !important;
214
+ color: #9ca3af !important;
215
+ }
216
+
217
+ /* Textbox styling */
218
+ textarea {
219
+ background-color: #020617 !important;
220
+ border-radius: 14px !important;
221
+ border: 1px solid rgba(55, 65, 81, 0.9) !important;
222
+ color: #e5e7eb !important;
223
+ }
224
+
225
+ /* Dropdown & sliders */
226
+ select, input[type="range"] {
227
+ background-color: #020617 !important;
228
+ border-radius: 999px !important;
229
+ border: 1px solid rgba(55, 65, 81, 0.9) !important;
230
+ }
231
+
232
+ /* Warning styling (Gradio Alert) */
233
+ .svelte-1g805jl {
234
+ border-radius: 999px !important;
235
+ }
236
+ """
237
+
238
  async def create_demo():
239
  voices = await get_voices()
240
+
241
+ with gr.Blocks(
242
+ analytics_enabled=False,
243
+ css=EBURON_CSS,
244
+ title="Eburon Speech Studio"
245
+ ) as demo:
246
+ # Header
247
+ gr.HTML(
248
+ """
249
+ <div id="eburon-header">
250
+ <div id="eburon-logo-badge">
251
+ <div id="eburon-logo-circle">E</div>
252
+ <div id="eburon-brand-title">
253
+ <span>EBURON SPEECH</span>
254
+ <span>AI voice studio powered by Edge TTS</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  </div>
256
  </div>
257
+ <div id="eburon-header-right">
258
+ <span>Realtime TTS</span>
259
+ <span>•</span>
260
+ <span>Studio Grade Voices</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
  </div>
262
+ </div>
263
+ """
264
+ )
265
+
266
  with gr.Row():
267
+ # LEFT: Script panel
268
+ with gr.Column(scale=2, min_width=450):
269
+ gr.HTML(
270
+ """
271
+ <div id="eburon-script-header">
272
+ <div>
273
+ <div id="eburon-script-title">Script</div>
274
+ <div id="eburon-script-subtitle">
275
+ Paste or type your text. Long-form friendly.
276
+ </div>
277
+ </div>
278
+ <div style="font-size: 11px; color: #6b7280;">
279
+ ⏱️ Approx. 5k characters per generation
280
+ </div>
281
+ </div>
282
+ """
283
+ )
284
+
285
+ with gr.Group(elem_classes="eburon-card"):
286
+ text_input = gr.Textbox(
287
+ label="",
288
+ placeholder="Write your narration, dialogue, or public talk script here...",
289
+ lines=10
290
+ )
291
+
292
+ # RIGHT: Voice & settings panel
293
+ with gr.Column(scale=1, min_width=320):
294
+ gr.HTML(
295
+ """
296
+ <div id="eburon-voice-header">
297
+ <div>
298
+ <div id="eburon-voice-title">Voice & Settings</div>
299
+ <div id="eburon-voice-subtitle">
300
+ Choose a voice and fine-tune its delivery.
301
+ </div>
302
+ </div>
303
+ <div style="font-size: 11px; color: #6b7280;">
304
+ 🎧 Best experienced with headphones
305
+ </div>
306
+ </div>
307
+ """
308
  )
309
+
310
+ with gr.Group(elem_classes="eburon-card"):
311
+ voice_dropdown = gr.Dropdown(
312
+ choices=[""] + voices,
313
+ label="Voice",
314
+ value="",
315
+ info="Pick a voice from the Edge TTS catalog."
316
+ )
317
+
318
+ rate_slider = gr.Slider(
319
+ minimum=-50,
320
+ maximum=50,
321
+ value=0,
322
+ label="Speed",
323
+ step=1,
324
+ info="Negative is slower, positive is faster."
325
+ )
326
+
327
+ pitch_slider = gr.Slider(
328
+ minimum=-20,
329
+ maximum=20,
330
+ value=0,
331
+ label="Pitch",
332
+ step=1,
333
+ info="Negative is deeper, positive is brighter."
334
+ )
335
+
336
+ # Bottom row: Generate + audio preview
337
+ with gr.Row(elem_id="eburon-generate-row"):
338
+ with gr.Column(scale=1, min_width=260):
339
+ generate_btn = gr.Button(
340
+ "Generate speech",
341
+ variant="primary",
342
+ elem_id="eburon-generate-btn"
343
+ )
344
+ warning_md = gr.Markdown(visible=False)
345
+
346
+ with gr.Column(scale=2, min_width=420):
347
+ with gr.Group(elem_id="eburon-audio-card"):
348
+ gr.Markdown(
349
+ "##### Playback\nListen to your generated voice clip below."
350
+ )
351
+ audio_output = gr.Audio(
352
+ label="",
353
+ type="filepath",
354
+ )
355
+
356
+ generate_btn.click(
357
+ fn=tts_interface,
358
+ inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
359
+ outputs=[audio_output, warning_md]
360
+ )
361
+
362
  return demo
363
 
364
  async def main():