File size: 17,462 Bytes
5d0a7e7
 
ceefeb6
7a3ed03
242da97
 
 
 
5d0a7e7
 
242da97
 
 
 
 
 
799eb1a
24b186c
 
 
242da97
 
 
5d0a7e7
 
242da97
5d0a7e7
 
 
242da97
5d0a7e7
 
242da97
 
5d0a7e7
 
 
242da97
5d0a7e7
242da97
973f53e
 
a2bc31b
 
973f53e
a2bc31b
5d0a7e7
242da97
 
 
 
5d0a7e7
 
 
 
 
 
 
 
 
ad19b2a
5d0a7e7
ad19b2a
5d0a7e7
 
520603b
5d0a7e7
 
 
 
ad19b2a
5d0a7e7
 
 
ad19b2a
5d0a7e7
 
ad19b2a
 
 
5d0a7e7
 
559f05c
ad19b2a
 
520603b
5d0a7e7
 
 
ad19b2a
 
5d0a7e7
 
 
559f05c
 
5d0a7e7
 
559f05c
520603b
f570a03
 
 
 
 
559f05c
f570a03
559f05c
 
 
520603b
f570a03
 
559f05c
5d0a7e7
ad19b2a
 
 
 
 
 
56515c2
ad19b2a
 
5d0a7e7
ad19b2a
 
 
 
5d0a7e7
ad19b2a
 
 
 
 
 
 
 
 
 
 
5d0a7e7
 
ad19b2a
520603b
 
 
5d0a7e7
 
ad19b2a
 
 
 
 
 
 
520603b
5d0a7e7
520603b
5d0a7e7
 
ad19b2a
 
 
 
 
520603b
ad19b2a
5d0a7e7
 
ad19b2a
520603b
ad19b2a
 
 
 
 
 
 
 
 
 
 
 
 
 
5d0a7e7
 
ad19b2a
5d0a7e7
ad19b2a
 
 
520603b
ad19b2a
 
 
5d0a7e7
 
ad19b2a
5d0a7e7
 
 
ad19b2a
5d0a7e7
7a3ed03
ad19b2a
5d0a7e7
ad19b2a
5d0a7e7
ad19b2a
 
 
24b186c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad19b2a
 
520603b
 
5d0a7e7
ad19b2a
 
 
5d0a7e7
 
405e984
a2bc31b
520603b
5d0a7e7
 
 
520603b
 
9ac4f2a
520603b
 
 
9ac4f2a
 
 
5d0a7e7
 
 
 
 
 
83e26c3
5d0a7e7
520603b
 
5d0a7e7
7a3ed03
5d0a7e7
 
 
 
 
 
7a3ed03
5d0a7e7
 
 
 
 
 
7a3ed03
5d0a7e7
 
 
 
 
7a3ed03
5d0a7e7
 
242da97
5d0a7e7
 
 
 
 
ad19b2a
5d0a7e7
520603b
5d0a7e7
 
 
 
 
 
 
 
023599d
5d0a7e7
799eb1a
a2bc31b
520603b
405e984
 
a2bc31b
799eb1a
 
a2bc31b
520603b
 
 
a478940
 
 
 
5d0a7e7
 
 
 
 
 
 
 
 
 
 
 
 
7a3ed03
5d0a7e7
 
 
242da97
a478940
5d0a7e7
7a3ed03
799eb1a
a2bc31b
799eb1a
a2bc31b
799eb1a
7a3ed03
5d0a7e7
ad19b2a
5d0a7e7
 
520603b
5d0a7e7
520603b
5d0a7e7
 
 
 
 
 
 
 
 
3cda682
559f05c
 
3cda682
5d0a7e7
 
520603b
5d0a7e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242da97
3cda682
a2bc31b
7a3ed03
 
24b186c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d0a7e7
 
520603b
 
24b186c
5d0a7e7
 
 
 
 
520603b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
import os
import base64
import tempfile
import gradio as gr
import base64
import os
import tempfile
import gradio as gr
from pathlib import Path

from core import (
    get_voice_choices,
    transcribe_audio as core_transcribe,
    synthesize_speech as core_synthesize,
    clone_voice as core_clone
)

# ─── Constants ────────────────────────────────────────────────────────────────
SAMPLE_AUDIO_URL = "https://eburon.ai/sample/sample1.mp3"

# ─── Gradio App Wrappers ──────────────────────────────────────────────────────
def transcribe_handler(audio_path, language):
    if not audio_path:
        return "⚠️ Please record or upload an audio file first."
    try:
        return core_transcribe(audio_path, language)
    except Exception as e:
        return f"❌ Error: {str(e)}"

def synthesize_handler(text, voice_id_input, ref_audio_path, audio_format):
    try:
        voice_id = voice_id_input.strip() if voice_id_input and voice_id_input.strip() else None
        output_path, num_bytes = core_synthesize(text, voice_id, ref_audio_path, audio_format)
        return output_path, f"βœ… Generated {num_bytes:,} bytes of {audio_format.upper()} audio."
    except Exception as e:
        return None, f"❌ Error: {str(e)}"

def clone_handler(audio_path, url_input, voice_name, gender, languages_str):
    try:
        voice = core_clone(audio_path, url_input, voice_name, gender, languages_str)
        # Build new choices specifically for this user session: Official Voices + Their new clone
        new_session_choices = get_voice_choices() + [(f"{voice.name} (Custom Session Clone)", voice.id)]
        return (
            f"βœ… Voice created!\n\n**Voice ID:** `{voice.id}`\n**Name:** {voice.name}\n**Languages:** {', '.join(voice.languages)}\n\nThis voice has been automatically selected in the Text-to-Speech tab!",
            gr.update(choices=new_session_choices, value=voice.id)
        )
    except Exception as e:
        err_msg = str(e)
        if "Sign in to confirm" in err_msg or "bot" in err_msg.lower() or "youtube" in err_msg.lower():
            return "❌ YouTube blocked the proxy crawler. Please use a TikTok/Twitter link, OR paste a direct .MP3 URL, OR upload the file manually.", gr.update()
        return f"❌ Error: {err_msg}", gr.update()


# ─── UI ───────────────────────────────────────────────────────────────────────
LANGUAGES = [
    "Auto-detect", "en", "fr", "es", "de", "it", "pt",
    "zh", "ja", "ko", "ar", "ru", "hi", "nl"
]

css = """
@import url('https://fonts.googleapis.com/css2?family=Outfit:wght@300;400;500;600;700;800&display=swap');

* { font-family: 'Outfit', sans-serif; }

body, .gradio-container {
    background: radial-gradient(circle at 10% 20%, #120d22 0%, #05030a 100%) !important;
    min-height: 100vh;
}

.gradio-container {
    max-width: 1050px !important;
    margin: 0 auto !important;
}

/* App Header */
.app-header {
    text-align: center;
    padding: 3.5rem 1rem 1.5rem;
    position: relative;
    z-index: 10;
}
.app-header h1 {
    font-size: 3.2rem;
    font-weight: 800;
    letter-spacing: -1.5px;
    background: linear-gradient(135deg, #8b5cf6 0%, #06b6d4 50%, #f59e0b 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    background-clip: text;
    margin-bottom: 0.5rem;
    animation: glow-pulse 3s infinite alternate;
}
.app-header p {
    color: #94a3b8;
    font-size: 1.25rem;
    font-weight: 500;
    margin-top: 0;
}
.highlight-badge {
    background: linear-gradient(135deg, #06b6d4, #8b5cf6) !important;
    -webkit-background-clip: border-box !important;
    background-clip: border-box !important;
    -webkit-text-fill-color: white !important;
    color: white !important;
    padding: 4px 10px;
    border-radius: 8px;
    font-size: 0.9rem;
    font-weight: 800;
    vertical-align: top;
    margin-left: 10px;
    box-shadow: 0 0 15px rgba(139, 92, 246, 0.45);
    display: inline-block;
    letter-spacing: 0.5px;
}

/* Glass panel wrapper */
div.tabs-container, .panel-box {
    background: rgba(255, 255, 255, 0.02) !important;
    border: 1px solid rgba(255, 255, 255, 0.05) !important;
    border-radius: 20px !important;
    box-shadow: 0 10px 40px 0 rgba(0, 0, 0, 0.4) !important;
    overflow: visible !important;
}

/* Tabs */
.tab-nav {
    border-bottom: 1px solid rgba(255,255,255,0.05) !important;
    padding: 10px 10px 0 10px !important;
}
.tab-nav button {
    background: transparent !important;
    border: none !important;
    border-bottom: 3px solid transparent !important;
    color: #64748b !important;
    border-radius: 0 !important;
    margin: 0 !important;
    padding: 1rem 2rem !important;
    font-weight: 600 !important;
    font-size: 1.05rem !important;
    transition: all 0.3s ease !important;
    box-shadow: none !important;
}
.tab-nav button.selected, .tab-nav button:hover {
    color: #f8fafc !important;
    border-bottom: 3px solid #06b6d4 !important;
    box-shadow: 0 20px 20px -20px rgba(6,182,212,0.30) !important;
    background: linear-gradient(0deg, rgba(6,182,212,0.10) 0%, transparent 100%) !important;
}

/* Override Gradio layout borders */
div.form {
    border: none !important;
    box-shadow: none !important;
    background: transparent !important;
}

/* Primary Buttons */
button.primary {
    background: linear-gradient(135deg, #8b5cf6 0%, #06b6d4 100%) !important;
    border: none !important;
    color: white !important;
    border-radius: 14px !important;
    font-weight: 700 !important;
    font-size: 1.15rem !important;
    padding: 0.9rem !important;
    letter-spacing: 0.5px !important;
    box-shadow: 0 4px 15px rgba(6,182,212,0.25) !important;
    transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
}
button.primary:hover {
    transform: translateY(-3px) !important;
    box-shadow: 0 8px 25px rgba(6,182,212,0.40) !important;
}

/* Secondary Button */
button.secondary {
    background: rgba(255,255,255,0.05) !important;
    border: 1px solid rgba(255,255,255,0.1) !important;
    border-radius: 14px !important;
    color: #e2e8f0 !important;
    transition: all 0.2s ease !important;
    font-weight: 600 !important;
}
button.secondary:hover {
    background: rgba(255,255,255,0.15) !important;
    border-color: rgba(255,255,255,0.3) !important;
}

/* Status text box */
.status-text {
    background: rgba(0,0,0,0.4);
    padding: 1.5rem;
    border-radius: 16px;
    border-left: 5px solid #06b6d4;
    color: #e2e8f0;
    font-size: 1rem;
    line-height: 1.6;
}

/* Highlight labels */
label span {
    color: #cbd5e1 !important;
    font-weight: 500 !important;
    letter-spacing: 0.2px !important;
}

/* Clean audio components */
.audio-component {
    border-radius: 16px !important;
    overflow: hidden !important;
    border: 1px solid rgba(255,255,255,0.05) !important;
}

/* Sample tab */
.sample-card {
    background: rgba(255,255,255,0.03);
    border: 1px solid rgba(255,255,255,0.08);
    border-radius: 18px;
    padding: 1.5rem;
    box-shadow: 0 10px 30px rgba(0,0,0,0.25);
}
.sample-card h3 {
    margin-top: 0;
    color: #f8fafc;
    font-size: 1.2rem;
    font-weight: 700;
}
.sample-card p {
    color: #94a3b8;
    margin-bottom: 1rem;
}
.sample-audio-wrap {
    background: rgba(0,0,0,0.35);
    border: 1px solid rgba(255,255,255,0.06);
    border-radius: 16px;
    padding: 1rem;
}
.sample-audio-wrap audio {
    width: 100%;
    outline: none;
    border-radius: 12px;
}

/* Global Animations */
@keyframes glow-pulse {
    0% { filter: drop-shadow(0 0 15px rgba(139, 92, 246, 0.25)); }
    100% { filter: drop-shadow(0 0 30px rgba(6, 182, 212, 0.45)); }
}

/* Footer Hide */
footer { display: none !important; }
"""

INITIAL_VOICES = get_voice_choices()

with gr.Blocks(title="Eburon Voice Studio", css=css) as demo:

    gr.HTML("""
    <div class="app-header">
        <h1>πŸŽ™οΈ Eburon Voice Studio <span class="highlight-badge">VOICE LAB</span></h1>
        <p>Powered by Eburon Audio Β· Speech-to-Text, Text-to-Speech, and Instant Voice Cloning</p>
        <div style="margin-top: 15px;">
            <a href="https://eburon.ai" target="_blank" style="text-decoration: none;">
                <span style="background: linear-gradient(135deg, #8b5cf6, #06b6d4); color: white; padding: 6px 14px; border-radius: 20px; font-weight: bold; font-size: 0.9rem; box-shadow: 0 4px 15px rgba(6, 182, 212, 0.35); display: inline-block; cursor: pointer; transition: transform 0.2s;">
                    ✨ Visit Eburon
                </span>
            </a>
        </div>
    </div>
    """)

    with gr.Tabs():

        # ── TAB 1: Speech to Text ──────────────────────────────────────────
        with gr.TabItem("🎀 Speech β†’ Text", elem_classes=["tabs-container"]):
            gr.Markdown("""
            **Upload or record audio** and Eburon Audio will transcribe it with high accuracy.
            Supports multiple languages, handles noisy inputs, and can detect the language automatically.
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    stt_audio = gr.Audio(
                        label="Audio Input",
                        sources=["microphone", "upload"],
                        type="filepath",
                        elem_classes=["audio-component"],
                    )
                    stt_language = gr.Dropdown(
                        choices=LANGUAGES,
                        value="Auto-detect",
                        label="Language",
                    )
                    stt_btn = gr.Button("✨ Transcribe", variant="primary")

                with gr.Column(scale=1):
                    stt_output = gr.Textbox(
                        label="Transcription",
                        lines=12,
                        placeholder="Your transcribed text will appear here...",
                    )

            stt_btn.click(
                fn=transcribe_handler,
                inputs=[stt_audio, stt_language],
                outputs=stt_output,
            )

        # ── TAB 2: Text to Speech ──────────────────────────────────────────
        with gr.TabItem("πŸ”Š Text β†’ Speech", elem_classes=["tabs-container"]):
            gr.Markdown("""
            **Type text** and Eburon Audio converts it into natural speech.
            Optionally paste a **Voice ID** from the Voice Cloning tab to use your own cloned voice.
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    tts_text = gr.Textbox(
                        label="Text to speak",
                        lines=8,
                        placeholder="Enter text here (max ~300 words for best results). Avoid markdown or special characters.",
                        value="And that is what makes this moment so important.\n\nBecause we are no longer in the phase where AI is only a spectacle.\nWe are entering the phase where it must become dependable.\nIntegrated.\nEfficient.\nAnd truly beneficial.\n\nThe projects that matter now will be the ones that combine vision with grounded execution.\nThe ones that understand cost as well as capability.\nLatency as well as intelligence.\nHuman need as well as model performance.",
                    )
                    with gr.Row():
                        tts_voice_id = gr.Dropdown(
                            label="Select a Voice or Your Session Clones",
                            choices=INITIAL_VOICES,
                            value=INITIAL_VOICES[0][1] if INITIAL_VOICES else None,
                            allow_custom_value=True,
                            scale=3,
                        )
                        voices_btn = gr.Button("πŸ”„ Refresh List", size="sm", scale=1)

                    voices_list_out = gr.Markdown(visible=False)

                    tts_ref_audio = gr.Audio(
                        label="OR: Reference Audio (Set voice tone instantly)",
                        sources=["upload", "microphone"],
                        type="filepath",
                    )
                    tts_format = gr.Dropdown(
                        choices=["mp3", "wav", "flac", "opus"],
                        value="mp3",
                        label="Audio Format",
                    )
                    tts_btn = gr.Button("🎡 Generate Speech", variant="primary")

                with gr.Column(scale=1):
                    tts_audio_out = gr.Audio(
                        label="Generated Audio",
                        type="filepath",
                        elem_classes=["audio-component"],
                    )
                    tts_status = gr.Markdown(elem_classes=["status-text"])

            tts_btn.click(
                fn=synthesize_handler,
                inputs=[tts_text, tts_voice_id, tts_ref_audio, tts_format],
                outputs=[tts_audio_out, tts_status],
            )
            voices_btn.click(
                fn=lambda: gr.update(choices=get_voice_choices()),
                inputs=[],
                outputs=tts_voice_id,
            )

        # ── TAB 3: Voice Cloning ───────────────────────────────────────────
        with gr.TabItem("🧬 Voice Cloning", elem_classes=["tabs-container"]):
            gr.Markdown("""
            **Clone any voice** by uploading a short audio sample (10–60 seconds recommended).
            The system will save it as a reusable voice. Copy the Voice ID and paste it in the Text-to-Speech tab.

            > ⚠️ Only clone voices with explicit consent.
            """)
            with gr.Row():
                with gr.Column(scale=1):
                    clone_audio = gr.Audio(
                        label="Voice Sample (upload or record)",
                        sources=["microphone", "upload"],
                        type="filepath",
                        elem_classes=["audio-component"],
                    )
                    clone_url = gr.Textbox(
                        label="OR: Media URL (TikTok, Twitter, or direct .MP3/.WAV link)",
                        placeholder="https://...link_to_audio_or_video...",
                    )
                    clone_name = gr.Textbox(
                        label="Voice Name",
                        placeholder="e.g. eburon-assistant-voice",
                    )
                    clone_gender = gr.Dropdown(
                        choices=["Female", "Male"],
                        value="Female",
                        label="Gender",
                    )
                    clone_langs = gr.Textbox(
                        label="Languages (comma-separated)",
                        value="en",
                        placeholder="en, fr, es",
                    )
                    clone_btn = gr.Button("🧬 Clone Voice", variant="primary")

                with gr.Column(scale=1):
                    clone_result = gr.Markdown(
                        value="Your new Voice ID will appear here after cloning.",
                        elem_classes=["status-text"],
                    )

            clone_btn.click(
                fn=clone_handler,
                inputs=[clone_audio, clone_url, clone_name, clone_gender, clone_langs],
                outputs=[clone_result, tts_voice_id],
            )

        # ── TAB 4: Sample ──────────────────────────────────────────────────
        with gr.TabItem("🎧 Sample", elem_classes=["tabs-container"]):
            gr.Markdown("""
            **Preview the sample audio** below.
            This is rendered as a native playable audio sample without affecting the existing app flow.
            """)
            gr.HTML(f"""
            <div class="sample-card">
                <h3>Playable Audio Sample</h3>
                <p>Loaded from: <a href="{SAMPLE_AUDIO_URL}" target="_blank" style="color:#06b6d4;">{SAMPLE_AUDIO_URL}</a></p>
                <div class="sample-audio-wrap">
                    <audio controls preload="metadata">
                        <source src="{SAMPLE_AUDIO_URL}" type="audio/mpeg">
                        Your browser does not support the audio element.
                    </audio>
                </div>
            </div>
            """)

    gr.HTML("""
    <div style="text-align:center; padding: 1.5rem; color: #475569; font-size: 0.85rem;">
        Built for <a href="https://eburon.ai" target="_blank" style="color:#06b6d4;">Eburon</a>
        Β· Powered by your existing audio backend
        Β· <a href="https://echo.eburon.ai" target="_blank" style="color:#8b5cf6;">Echo Space</a>
    </div>
    """)


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)