Spaces:
Sleeping
Sleeping
fix: populate voices via lazy load, add direct mp3 audio curl bypass for YT blocks, add clone badge
Browse files
app.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
import os
|
| 2 |
import base64
|
| 3 |
-
import tempfile
|
| 4 |
import gradio as gr
|
| 5 |
from pathlib import Path
|
| 6 |
import base64
|
| 7 |
import os
|
|
|
|
| 8 |
from mistralai.client import Mistral
|
| 9 |
|
| 10 |
def list_user_voices():
|
|
@@ -118,34 +118,49 @@ def clone_voice(audio_path, url_input, voice_name, gender, languages_str):
|
|
| 118 |
final_audio_path = audio_path
|
| 119 |
|
| 120 |
try:
|
| 121 |
-
# If URL is provided,
|
| 122 |
if url_input.strip():
|
| 123 |
-
|
| 124 |
base_out = tempfile.mktemp()
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
client = get_client()
|
| 151 |
sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()
|
|
@@ -201,7 +216,7 @@ body, .gradio-container {
|
|
| 201 |
z-index: 10;
|
| 202 |
}
|
| 203 |
.app-header h1 {
|
| 204 |
-
font-size: 3.
|
| 205 |
font-weight: 800;
|
| 206 |
letter-spacing: -1.5px;
|
| 207 |
background: linear-gradient(135deg, #c084fc 0%, #ec4899 50%, #facc15 100%);
|
|
@@ -213,10 +228,21 @@ body, .gradio-container {
|
|
| 213 |
}
|
| 214 |
.app-header p {
|
| 215 |
color: #94a3b8;
|
| 216 |
-
font-size: 1.
|
| 217 |
-
font-weight:
|
| 218 |
margin-top: 0;
|
| 219 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
|
| 221 |
/* Glass panel wrapper */
|
| 222 |
div.tabs-container, .panel-box {
|
|
@@ -344,15 +370,19 @@ label span {
|
|
| 344 |
footer { display: none !important; }
|
| 345 |
"""
|
| 346 |
|
| 347 |
-
#
|
| 348 |
-
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
|
| 351 |
|
| 352 |
gr.HTML("""
|
| 353 |
<div class="app-header">
|
| 354 |
-
<h1>🎙️ Voxtral Studio</h1>
|
| 355 |
-
<p>Powered by Mistral AI ·
|
|
|
|
| 356 |
</div>
|
| 357 |
""")
|
| 358 |
|
|
@@ -408,9 +438,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
|
|
| 408 |
)
|
| 409 |
with gr.Row():
|
| 410 |
tts_voice_id = gr.Dropdown(
|
| 411 |
-
label="Select a Mistral Voice",
|
| 412 |
-
choices=
|
| 413 |
-
value=INITIAL_VOICES[0][1] if INITIAL_VOICES else None,
|
| 414 |
allow_custom_value=True,
|
| 415 |
scale=3,
|
| 416 |
)
|
|
@@ -466,8 +495,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
|
|
| 466 |
elem_classes=["audio-component"],
|
| 467 |
)
|
| 468 |
clone_url = gr.Textbox(
|
| 469 |
-
label="OR: Media URL (
|
| 470 |
-
placeholder="https://
|
| 471 |
)
|
| 472 |
clone_name = gr.Textbox(
|
| 473 |
label="Voice Name",
|
|
@@ -505,5 +534,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
|
|
| 505 |
""")
|
| 506 |
|
| 507 |
|
|
|
|
|
|
|
|
|
|
| 508 |
if __name__ == "__main__":
|
| 509 |
-
demo.launch(server_name="0.0.0.0", server_port=7860,
|
|
|
|
| 1 |
import os
|
| 2 |
import base64
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
from pathlib import Path
|
| 5 |
import base64
|
| 6 |
import os
|
| 7 |
+
import requests
|
| 8 |
from mistralai.client import Mistral
|
| 9 |
|
| 10 |
def list_user_voices():
|
|
|
|
| 118 |
final_audio_path = audio_path
|
| 119 |
|
| 120 |
try:
|
| 121 |
+
# If URL is provided, handle direct links or yt-dlp
|
| 122 |
if url_input.strip():
|
| 123 |
+
url = url_input.strip()
|
| 124 |
base_out = tempfile.mktemp()
|
| 125 |
+
|
| 126 |
+
# If it's a direct audio file link, bypass yt-dlp and download it directly
|
| 127 |
+
if url.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.m4a')):
|
| 128 |
+
try:
|
| 129 |
+
ext = url.split('.')[-1]
|
| 130 |
+
final_audio_path = f"{base_out}.{ext}"
|
| 131 |
+
with requests.get(url, stream=True, timeout=15) as r:
|
| 132 |
+
r.raise_for_status()
|
| 133 |
+
with open(final_audio_path, 'wb') as f:
|
| 134 |
+
for chunk in r.iter_content(chunk_size=8192):
|
| 135 |
+
f.write(chunk)
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return f"❌ Error downloading direct audio link: {str(e)}", gr.update()
|
| 138 |
+
# Otherwise use yt-dlp for TikTok, Twitter, YouTube (if not blocked), etc.
|
| 139 |
+
else:
|
| 140 |
+
import yt_dlp
|
| 141 |
+
ydl_opts = {
|
| 142 |
+
'format': 'bestaudio/best',
|
| 143 |
+
'outtmpl': base_out + '.%(ext)s',
|
| 144 |
+
'quiet': True,
|
| 145 |
+
'postprocessors': [{
|
| 146 |
+
'key': 'FFmpegExtractAudio',
|
| 147 |
+
'preferredcodec': 'mp3',
|
| 148 |
+
'preferredquality': '128',
|
| 149 |
+
}],
|
| 150 |
+
'postprocessor_args': [
|
| 151 |
+
'-t', '60' # Limit to first 60 seconds
|
| 152 |
+
],
|
| 153 |
+
}
|
| 154 |
+
try:
|
| 155 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 156 |
+
info = ydl.extract_info(url, download=True)
|
| 157 |
+
final_audio_path = base_out + '.mp3'
|
| 158 |
+
except Exception as e:
|
| 159 |
+
err_msg = str(e)
|
| 160 |
+
if "Sign in to confirm" in err_msg or "bot" in err_msg.lower() or "youtube" in err_msg.lower():
|
| 161 |
+
raise gr.Error("YouTube blocked the Hugging Face Server. Please use a TikTok/Twitter link, OR paste a direct .MP3 URL, OR upload the file manually.")
|
| 162 |
+
else:
|
| 163 |
+
raise gr.Error(f"Video download failed: {err_msg}")
|
| 164 |
|
| 165 |
client = get_client()
|
| 166 |
sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()
|
|
|
|
| 216 |
z-index: 10;
|
| 217 |
}
|
| 218 |
.app-header h1 {
|
| 219 |
+
font-size: 3.2rem;
|
| 220 |
font-weight: 800;
|
| 221 |
letter-spacing: -1.5px;
|
| 222 |
background: linear-gradient(135deg, #c084fc 0%, #ec4899 50%, #facc15 100%);
|
|
|
|
| 228 |
}
|
| 229 |
.app-header p {
|
| 230 |
color: #94a3b8;
|
| 231 |
+
font-size: 1.25rem;
|
| 232 |
+
font-weight: 500;
|
| 233 |
margin-top: 0;
|
| 234 |
}
|
| 235 |
+
.highlight-badge {
|
| 236 |
+
background: linear-gradient(135deg, #f59e0b, #ef4444);
|
| 237 |
+
color: white;
|
| 238 |
+
padding: 2px 8px;
|
| 239 |
+
border-radius: 8px;
|
| 240 |
+
font-size: 0.8rem;
|
| 241 |
+
font-weight: 800;
|
| 242 |
+
vertical-align: top;
|
| 243 |
+
margin-left: 10px;
|
| 244 |
+
box-shadow: 0 0 10px rgba(239, 68, 68, 0.6);
|
| 245 |
+
}
|
| 246 |
|
| 247 |
/* Glass panel wrapper */
|
| 248 |
div.tabs-container, .panel-box {
|
|
|
|
| 370 |
footer { display: none !important; }
|
| 371 |
"""
|
| 372 |
|
| 373 |
+
# Helper to initialize voices on ui load
|
| 374 |
+
def init_voices_ui():
|
| 375 |
+
choices = get_voice_choices()
|
| 376 |
+
default_val = choices[0][1] if choices else None
|
| 377 |
+
return gr.update(choices=choices, value=default_val)
|
| 378 |
|
| 379 |
with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
|
| 380 |
|
| 381 |
gr.HTML("""
|
| 382 |
<div class="app-header">
|
| 383 |
+
<h1>🎙️ Voxtral Studio <span class="highlight-badge">VOICE CLONING</span></h1>
|
| 384 |
+
<p>Powered by Mistral AI · STT & Elite Text-to-Speech + Instant Zero-Shot Cloning</p>
|
| 385 |
+
|
| 386 |
</div>
|
| 387 |
""")
|
| 388 |
|
|
|
|
| 438 |
)
|
| 439 |
with gr.Row():
|
| 440 |
tts_voice_id = gr.Dropdown(
|
| 441 |
+
label="Select a Mistral Voice or Your Clones",
|
| 442 |
+
choices=[], # Populated on load
|
|
|
|
| 443 |
allow_custom_value=True,
|
| 444 |
scale=3,
|
| 445 |
)
|
|
|
|
| 495 |
elem_classes=["audio-component"],
|
| 496 |
)
|
| 497 |
clone_url = gr.Textbox(
|
| 498 |
+
label="OR: Media URL (TikTok, Twitter, or direct .MP3/.WAV link)",
|
| 499 |
+
placeholder="https://...link_to_audio_or_video...",
|
| 500 |
)
|
| 501 |
clone_name = gr.Textbox(
|
| 502 |
label="Voice Name",
|
|
|
|
| 534 |
""")
|
| 535 |
|
| 536 |
|
| 537 |
+
# Populate choices dynamically when the page loads for each user!
|
| 538 |
+
demo.load(fn=init_voices_ui, outputs=tts_voice_id)
|
| 539 |
+
|
| 540 |
if __name__ == "__main__":
|
| 541 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)
|