DevNumb commited on
Commit
036af98
·
verified ·
1 Parent(s): 5c51f1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -530
app.py CHANGED
@@ -1,568 +1,99 @@
1
- import asyncio
2
- import platform
3
- import sys
4
-
5
- # Fix for asyncio warnings on Hugging Face Spaces
6
- if sys.platform.startswith("linux") or sys.platform.startswith("darwin"):
7
- try:
8
- # Clean up any existing loops
9
- try:
10
- loop = asyncio.get_event_loop()
11
- if loop.is_running():
12
- loop.close()
13
- except:
14
- pass
15
-
16
- # Set default policy
17
- asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
18
- # Create a new event loop
19
- asyncio.set_event_loop(asyncio.new_event_loop())
20
- except Exception:
21
- pass # Ignore any errors if this fails
22
-
23
- # Now import other modules
24
  import gradio as gr
25
- import numpy as np
26
  import tempfile
27
- import time
28
- import scipy.io.wavfile
29
  import warnings
30
  warnings.filterwarnings("ignore")
31
 
32
- # Clean white theme CSS with black text
33
  css = """
34
  <style>
35
- /* WHITE BACKGROUND THEME */
36
- body, .gradio-container {
37
  background: white !important;
38
- color: #333333 !important;
39
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
40
- margin: 0;
41
  padding: 20px;
42
- min-height: 100vh;
43
  }
44
 
45
- /* Header */
46
- .header {
47
- text-align: center;
48
- padding: 2.5rem;
49
- background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%);
50
- border-radius: 16px;
51
- margin-bottom: 2rem;
52
- color: white;
53
- box-shadow: 0 4px 20px rgba(79, 70, 229, 0.2);
54
- }
55
-
56
- .header h1 {
57
- font-size: 2.5em;
58
- margin: 0 0 0.5rem 0;
59
- font-weight: 700;
60
- letter-spacing: -0.5px;
61
- }
62
-
63
- .header p {
64
- font-size: 1.1em;
65
- opacity: 0.95;
66
- margin: 0;
67
- }
68
-
69
- /* Cards */
70
- .card {
71
- background: white;
72
- border: 1px solid #E5E7EB;
73
- border-radius: 16px;
74
- padding: 1.5rem;
75
- margin-bottom: 1.5rem;
76
- box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
77
- }
78
-
79
- /* Text Input - BLACK TEXT ON WHITE */
80
  textarea {
81
  background: white !important;
82
- border: 2px solid #D1D5DB !important;
83
- border-radius: 12px !important;
84
- color: #000000 !important; /* Pure black text */
85
- padding: 1rem !important;
86
  font-size: 16px !important;
87
- font-family: 'SF Mono', Monaco, 'Courier New', monospace !important;
88
  width: 100% !important;
89
- min-height: 120px !important;
90
- line-height: 1.5 !important;
91
  }
92
 
93
- textarea:focus {
94
- border-color: #4F46E5 !important;
95
- box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.1) !important;
96
- outline: none !important;
97
- color: #000000 !important;
98
- }
99
-
100
- textarea::placeholder {
101
- color: #666666 !important;
102
- opacity: 0.8 !important;
103
- }
104
-
105
- /* Buttons */
106
- .btn-primary {
107
- background: linear-gradient(135deg, #4F46E5 0%, #7C3AED 100%) !important;
108
- border: none !important;
109
  color: white !important;
110
- padding: 0.75rem 1.5rem !important;
111
- border-radius: 10px !important;
112
- font-weight: 600 !important;
113
- font-size: 1rem !important;
114
- cursor: pointer !important;
115
- margin: 0.5rem !important;
116
- }
117
-
118
- .btn-primary:hover {
119
- transform: translateY(-2px) !important;
120
- box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3) !important;
121
- }
122
-
123
- .btn-secondary {
124
- background: white !important;
125
- border: 2px solid #D1D5DB !important;
126
- color: #374151 !important;
127
- padding: 0.75rem 1.5rem !important;
128
- border-radius: 10px !important;
129
- font-weight: 500 !important;
130
- cursor: pointer !important;
131
- margin: 0.5rem !important;
132
- }
133
-
134
- .btn-secondary:hover {
135
- border-color: #4F46E5 !important;
136
- color: #4F46E5 !important;
137
- background: #F5F3FF !important;
138
- }
139
-
140
- /* Slider */
141
- input[type="range"] {
142
- width: 100% !important;
143
- height: 6px !important;
144
- background: #E5E7EB !important;
145
- border-radius: 10px !important;
146
- outline: none !important;
147
- margin: 1rem 0 !important;
148
- }
149
-
150
- input[type="range"]::-webkit-slider-thumb {
151
- width: 20px !important;
152
- height: 20px !important;
153
- background: #4F46E5 !important;
154
- border: 3px solid white !important;
155
- border-radius: 50% !important;
156
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.2) !important;
157
- cursor: pointer !important;
158
- }
159
-
160
- /* Audio Player */
161
- .audio-player {
162
- background: #F9FAFB !important;
163
- border-radius: 12px !important;
164
- padding: 1.5rem !important;
165
- margin-top: 1rem !important;
166
- border: 1px solid #E5E7EB !important;
167
- }
168
-
169
- .audio-player audio {
170
- width: 100% !important;
171
- border-radius: 8px !important;
172
- }
173
-
174
- /* Stats */
175
- .stats-grid {
176
- display: grid;
177
- grid-template-columns: repeat(2, 1fr);
178
- gap: 1rem;
179
- margin-top: 1rem;
180
- }
181
-
182
- .stat-box {
183
- background: white;
184
- border: 1px solid #E5E7EB;
185
- border-radius: 12px;
186
- padding: 1rem;
187
- text-align: center;
188
- }
189
-
190
- .stat-value {
191
- font-size: 1.8em;
192
- font-weight: 700;
193
- color: #4F46E5 !important;
194
- margin-bottom: 0.25rem;
195
- }
196
-
197
- .stat-label {
198
- color: #6B7280;
199
- font-size: 0.8em;
200
- text-transform: uppercase;
201
- letter-spacing: 0.5px;
202
- font-weight: 600;
203
- }
204
-
205
- /* Status messages */
206
- .status-success {
207
- background: #DCFCE7 !important;
208
- border: 1px solid #86EFAC !important;
209
- border-left: 4px solid #10B981 !important;
210
- color: #065F46 !important;
211
- padding: 1rem !important;
212
- border-radius: 8px !important;
213
- margin: 1rem 0 !important;
214
- }
215
-
216
- .status-error {
217
- background: #FEE2E2 !important;
218
- border: 1px solid #FCA5A5 !important;
219
- border-left: 4px solid #EF4444 !important;
220
- color: #991B1B !important;
221
- padding: 1rem !important;
222
- border-radius: 8px !important;
223
- margin: 1rem 0 !important;
224
- }
225
-
226
- /* Footer */
227
- .footer {
228
- text-align: center;
229
- padding: 2rem;
230
- margin-top: 2rem;
231
- color: #6B7280;
232
- border-top: 1px solid #E5E7EB;
233
- }
234
-
235
- /* Grid layout */
236
- .container {
237
- max-width: 1000px;
238
- margin: 0 auto;
239
- }
240
-
241
- .row {
242
- display: flex;
243
- gap: 1.5rem;
244
- margin-bottom: 1.5rem;
245
- }
246
-
247
- .col {
248
- flex: 1;
249
- }
250
-
251
- .col-2 {
252
- flex: 2;
253
- }
254
-
255
- /* Markdown content - BLACK TEXT */
256
- .markdown {
257
- color: #374151 !important;
258
- line-height: 1.6 !important;
259
- }
260
-
261
- .markdown h1, .markdown h2, .markdown h3 {
262
- color: #111827 !important;
263
- font-weight: 600 !important;
264
- }
265
-
266
- .markdown p {
267
- color: #4B5563 !important;
268
  }
269
  </style>
270
  """
271
 
272
- # Initialize stats
273
- class Stats:
274
- def __init__(self):
275
- self.generations = 0
276
- self.characters = 0
277
- self.start_time = time.time()
278
-
279
- def add(self, text):
280
- self.generations += 1
281
- self.characters += len(text)
282
-
283
- def get_stats(self):
284
- uptime = time.time() - self.start_time
285
- hours = int(uptime // 3600)
286
- minutes = int((uptime % 3600) // 60)
287
-
288
- return {
289
- 'generations': self.generations,
290
- 'characters': self.characters,
291
- 'avg_length': self.characters // max(self.generations, 1),
292
- 'uptime': f"{hours}h {minutes}m"
293
- }
294
-
295
- stats = Stats()
296
-
297
- def create_speech_audio(text, speed=1.0):
298
- """Create speech audio from text"""
299
- if not text or not text.strip():
300
  return None
301
 
302
- # Create audio based on text
303
- duration = min(len(text) * 0.05, 5) # Up to 5 seconds
304
- sampling_rate = 24000
305
-
306
- # Generate time array
307
- t = np.linspace(0, duration, int(sampling_rate * duration))
308
-
309
- # Create base tone
310
- base_freq = 220
311
- audio = np.zeros_like(t)
312
-
313
- # Add harmonics based on text
314
- for i, char in enumerate(text[:20]):
315
- freq = base_freq + (ord(char) % 300)
316
- amplitude = 0.5 / (i + 1)
317
- audio += amplitude * np.sin(2 * np.pi * freq * t * (i + 1) / 10)
318
-
319
- # Apply envelope
320
- envelope = np.exp(-2 * t) * (1 - np.exp(-10 * t))
321
- audio *= envelope
322
-
323
- # Normalize
324
- max_val = np.max(np.abs(audio))
325
- if max_val > 0:
326
- audio = audio / max_val * 0.8
327
-
328
- # Adjust speed
329
- if speed != 1.0:
330
- from scipy import signal
331
- new_length = int(len(audio) / speed)
332
- audio = signal.resample(audio, new_length)
333
-
334
- # Save to file
335
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
336
- scipy.io.wavfile.write(f.name, sampling_rate, audio.astype(np.float32))
337
- return f.name
338
-
339
- def generate_tts(text, speed=1.0, emotion="neutral"):
340
- """Generate TTS with status message"""
341
- if not text or not text.strip():
342
- return None, "⚠️ Please enter some text first", get_stats_html()
343
-
344
- # Update stats
345
- stats.add(text)
346
-
347
  try:
348
- audio_file = create_speech_audio(text, speed)
 
 
349
 
350
- if audio_file:
351
- duration = len(text) * 0.05 / speed
352
- duration = min(duration, 5)
353
-
354
- message = f"""
355
- <div class="status-success">
356
- <div style="font-weight: 600; margin-bottom: 0.5rem;">✅ Speech Generated Successfully!</div>
357
- <div style="color: #065F46;">
358
- <strong>{len(text)} characters</strong> •
359
- <strong>{duration:.1f}s duration</strong> •
360
- Speed: <strong>{speed}x</strong>
361
- </div>
362
- </div>
363
- """
364
- else:
365
- message = """
366
- <div class="status-error">
367
- <div style="font-weight: 600;">❌ Failed to generate audio</div>
368
- <div>Please try again with different text.</div>
369
- </div>
370
- """
371
 
372
- return audio_file, message, get_stats_html()
 
 
 
 
373
 
374
  except Exception as e:
375
- error_msg = f"""
376
- <div class="status-error">
377
- <div style="font-weight: 600;">❌ Error occurred</div>
378
- <div>{str(e)[:100]}</div>
379
- </div>
380
- """
381
- return None, error_msg, get_stats_html()
382
-
383
- def get_stats_html():
384
- """Generate HTML for statistics"""
385
- data = stats.get_stats()
386
-
387
- return f"""
388
- <div class="stats-grid">
389
- <div class="stat-box">
390
- <div class="stat-value">{data['generations']}</div>
391
- <div class="stat-label">Generations</div>
392
- </div>
393
- <div class="stat-box">
394
- <div class="stat-value">{data['characters']}</div>
395
- <div class="stat-label">Characters</div>
396
- </div>
397
- <div class="stat-box">
398
- <div class="stat-value">{data['avg_length']}</div>
399
- <div class="stat-label">Avg Length</div>
400
- </div>
401
- <div class="stat-box">
402
- <div class="stat-value">{data['uptime']}</div>
403
- <div class="stat-label">Uptime</div>
404
- </div>
405
- </div>
406
- """
407
 
408
- # Create the interface
409
- with gr.Blocks() as demo:
410
- # Add CSS
411
- gr.HTML(css)
412
 
413
- # Main container
414
- with gr.Column(elem_id="container"):
415
- # Header
416
- gr.HTML("""
417
- <div class="header">
418
- <h1>🎵 VibeVoice TTS</h1>
419
- <p>Text-to-Speech with Clean White Interface</p>
420
- </div>
421
- """)
422
-
423
- # Main content row
424
- with gr.Row(elem_classes="row"):
425
- # Left column - Input
426
- with gr.Column(scale=2, elem_classes="col-2"):
427
- with gr.Column(elem_classes="card"):
428
- gr.Markdown("### 📝 Input Text")
429
-
430
- text_input = gr.Textbox(
431
- label="",
432
- placeholder="Type or paste your text here... (Black text on white background)",
433
- lines=5
434
- )
435
-
436
- gr.Markdown("### ⚙️ Settings")
437
-
438
- with gr.Row():
439
- emotion = gr.Dropdown(
440
- label="Voice Style",
441
- choices=["Neutral", "Happy", "Calm"],
442
- value="Neutral"
443
- )
444
-
445
- speed = gr.Slider(
446
- minimum=0.5,
447
- maximum=2.0,
448
- value=1.0,
449
- step=0.1,
450
- label="Speaking Speed"
451
- )
452
-
453
- with gr.Row():
454
- generate_btn = gr.Button(
455
- "✨ Generate Speech",
456
- variant="primary",
457
- elem_classes="btn-primary"
458
- )
459
- clear_btn = gr.Button(
460
- "Clear",
461
- variant="secondary",
462
- elem_classes="btn-secondary"
463
- )
464
-
465
- # Right column - Output
466
- with gr.Column(scale=1, elem_classes="col"):
467
- with gr.Column(elem_classes="card"):
468
- gr.Markdown("### 🎧 Audio Output")
469
-
470
- with gr.Column(elem_classes="audio-player"):
471
- audio_output = gr.Audio(
472
- type="filepath",
473
- label=""
474
- )
475
-
476
- status_display = gr.HTML(
477
- """<div style="text-align: center; color: #6B7280; padding: 1rem;">
478
- Ready. Enter text and click Generate.
479
- </div>"""
480
- )
481
-
482
- # Statistics
483
- with gr.Column(elem_classes="card"):
484
- gr.Markdown("### 📊 Statistics")
485
- stats_display = gr.HTML(get_stats_html())
486
-
487
- # Examples
488
- with gr.Column(elem_classes="card"):
489
- gr.Markdown("### 💡 Examples")
490
-
491
- gr.Examples(
492
- examples=[
493
- ["Hello! Welcome to the text-to-speech system."],
494
- ["The quick brown fox jumps over the lazy dog."],
495
- ["This is a test of the audio generation."],
496
- ["The weather is beautiful today."]
497
- ],
498
- inputs=text_input,
499
- label="Click to try:"
500
- )
501
-
502
- # About section
503
- with gr.Column(elem_classes="card"):
504
- gr.Markdown("### ℹ️ About")
505
- gr.Markdown("""
506
- **VibeVoice TTS** converts text to audio.
507
-
508
- **Features:**
509
- - 🎵 Audio generation
510
- - ⚡ Fast processing
511
- - 🎭 Voice styles
512
- - ⚙️ Speed control
513
-
514
- **Note:** Text input shows **black text on white background**.
515
- """)
516
 
517
- # Event handlers
518
- def process_text(text, speed_val, emotion_val):
519
- """Process text to generate speech"""
520
- if not text or not text.strip():
521
- return None, """
522
- <div class="status-error">
523
- <div style="font-weight: 600;">⚠️ Please enter text</div>
524
- </div>
525
- """, get_stats_html()
526
-
527
- return generate_tts(text, speed_val, emotion_val)
528
 
529
- def clear_all():
530
- """Clear all inputs"""
531
- return "", None, """
532
- <div style="text-align: center; color: #6B7280; padding: 1rem;">
533
- Cleared. Ready for new text.
534
- </div>
535
- """, get_stats_html()
536
 
537
- # Connect buttons
538
- generate_btn.click(
539
- fn=process_text,
540
- inputs=[text_input, speed, emotion],
541
- outputs=[audio_output, status_display, stats_display]
 
 
542
  )
543
 
544
- clear_btn.click(
545
- fn=clear_all,
546
- inputs=[],
547
- outputs=[text_input, audio_output, status_display, stats_display]
548
- )
 
 
 
 
 
 
549
 
550
- # Launch the app - SIMPLIFIED for Gradio 3.x
551
  if __name__ == "__main__":
552
- # Clean up any existing event loops before starting
553
- try:
554
- import asyncio
555
- loop = asyncio.get_event_loop()
556
- if loop.is_running():
557
- loop.close()
558
- except:
559
- pass
560
-
561
- # Launch with minimal parameters
562
- demo.launch(
563
- server_name="0.0.0.0",
564
- server_port=7860,
565
- show_error=True,
566
- quiet=True,
567
- debug=False
568
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  import tempfile
3
+ import os
 
4
  import warnings
5
  warnings.filterwarnings("ignore")
6
 
7
+ # CSS for white background with black text
8
  css = """
9
  <style>
10
+ body {
 
11
  background: white !important;
 
 
 
12
  padding: 20px;
 
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  textarea {
16
  background: white !important;
17
+ color: black !important;
18
+ border: 2px solid #4CAF50 !important;
19
+ border-radius: 10px !important;
20
+ padding: 15px !important;
21
  font-size: 16px !important;
 
22
  width: 100% !important;
 
 
23
  }
24
 
25
+ button {
26
+ background: #4CAF50 !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  color: white !important;
28
+ border: none !important;
29
+ padding: 10px 20px !important;
30
+ border-radius: 5px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
  </style>
33
  """
34
 
35
+ def text_to_speech_actual(text):
36
+ """Use actual TTS engine"""
37
+ if not text:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return None
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  try:
41
+ # Try using gTTS (Google Text-to-Speech) - works well and is free
42
+ from gtts import gTTS
43
+ import pygame
44
 
45
+ # Create temporary file
46
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
47
+ temp_file = f.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Generate speech
50
+ tts = gTTS(text=text, lang='en', slow=False)
51
+ tts.save(temp_file)
52
+
53
+ return temp_file
54
 
55
  except Exception as e:
56
+ print(f"TTS Error: {e}")
57
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
+ # Simple interface
60
+ with gr.Blocks(css=css) as demo:
61
+ gr.Markdown("# 🎵 Actual Text-to-Speech")
62
+ gr.Markdown("This uses real TTS to convert text to speech")
63
 
64
+ text_input = gr.Textbox(
65
+ label="Enter Text",
66
+ placeholder="Type your text here...",
67
+ lines=4
68
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ with gr.Row():
71
+ generate_btn = gr.Button("Generate Speech")
72
+ clear_btn = gr.Button("Clear")
 
 
 
 
 
 
 
 
73
 
74
+ audio_output = gr.Audio(type="filepath", label="Speech Output")
75
+ status = gr.Markdown("Ready...")
 
 
 
 
 
76
 
77
+ gr.Examples(
78
+ examples=[
79
+ ["Hello! This is actual text-to-speech conversion."],
80
+ ["Welcome to the speech synthesis system."],
81
+ ["The quick brown fox jumps over the lazy dog."]
82
+ ],
83
+ inputs=text_input
84
  )
85
 
86
+ def process(text):
87
+ audio = text_to_speech_actual(text)
88
+ if audio:
89
+ return audio, "✅ Speech generated successfully!"
90
+ return None, "❌ Failed to generate speech"
91
+
92
+ def clear():
93
+ return "", None, "Cleared"
94
+
95
+ generate_btn.click(process, text_input, [audio_output, status])
96
+ clear_btn.click(clear, [], [text_input, audio_output, status])
97
 
 
98
  if __name__ == "__main__":
99
+ demo.launch()