mehdilaalali commited on
Commit
559f05c
·
verified ·
1 Parent(s): 66e6d77

fix: populate voices via lazy load, add direct mp3 audio curl bypass for YT blocks, add clone badge

Browse files
Files changed (1) hide show
  1. app.py +72 -40
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import os
2
  import base64
3
- import tempfile
4
  import gradio as gr
5
  from pathlib import Path
6
  import base64
7
  import os
 
8
  from mistralai.client import Mistral
9
 
10
  def list_user_voices():
@@ -118,34 +118,49 @@ def clone_voice(audio_path, url_input, voice_name, gender, languages_str):
118
  final_audio_path = audio_path
119
 
120
  try:
121
- # If URL is provided, download it with yt-dlp
122
  if url_input.strip():
123
- import yt_dlp
124
  base_out = tempfile.mktemp()
125
- ydl_opts = {
126
- 'format': 'bestaudio/best',
127
- 'outtmpl': base_out + '.%(ext)s',
128
- 'quiet': True,
129
- 'postprocessors': [{
130
- 'key': 'FFmpegExtractAudio',
131
- 'preferredcodec': 'mp3',
132
- 'preferredquality': '128',
133
- }],
134
- 'postprocessor_args': [
135
- '-t', '60' # Limit to first 60 seconds to avoid exceeding API limits
136
- ],
137
- }
138
- try:
139
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
140
- info = ydl.extract_info(url_input.strip(), download=True)
141
- # after postprocessing, file has .mp3 extension
142
- final_audio_path = base_out + '.mp3'
143
- except Exception as e:
144
- err_msg = str(e)
145
- if "Sign in to confirm" in err_msg or "bot" in err_msg.lower() or "youtube" in err_msg.lower():
146
- raise gr.Error("YouTube blocked the Hugging Face datacenter IP. Please try a TikTok/Twitter link instead, or download the MP3 manually and upload it above.")
147
- else:
148
- raise gr.Error(f"Video download failed: {err_msg}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
  client = get_client()
151
  sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()
@@ -201,7 +216,7 @@ body, .gradio-container {
201
  z-index: 10;
202
  }
203
  .app-header h1 {
204
- font-size: 3.8rem;
205
  font-weight: 800;
206
  letter-spacing: -1.5px;
207
  background: linear-gradient(135deg, #c084fc 0%, #ec4899 50%, #facc15 100%);
@@ -213,10 +228,21 @@ body, .gradio-container {
213
  }
214
  .app-header p {
215
  color: #94a3b8;
216
- font-size: 1.15rem;
217
- font-weight: 400;
218
  margin-top: 0;
219
  }
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  /* Glass panel wrapper */
222
  div.tabs-container, .panel-box {
@@ -344,15 +370,19 @@ label span {
344
  footer { display: none !important; }
345
  """
346
 
347
- # Initialize voices at startup
348
- INITIAL_VOICES = get_voice_choices()
 
 
 
349
 
350
  with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
351
 
352
  gr.HTML("""
353
  <div class="app-header">
354
- <h1>🎙️ Voxtral Studio</h1>
355
- <p>Powered by Mistral AI · Speech-to-Text &amp; Text-to-Speech with Voice Cloning</p>
 
356
  </div>
357
  """)
358
 
@@ -408,9 +438,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
408
  )
409
  with gr.Row():
410
  tts_voice_id = gr.Dropdown(
411
- label="Select a Mistral Voice",
412
- choices=INITIAL_VOICES,
413
- value=INITIAL_VOICES[0][1] if INITIAL_VOICES else None,
414
  allow_custom_value=True,
415
  scale=3,
416
  )
@@ -466,8 +495,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
466
  elem_classes=["audio-component"],
467
  )
468
  clone_url = gr.Textbox(
469
- label="OR: Media URL (YouTube, TikTok, MP3, etc.)",
470
- placeholder="https://www.youtube.com/watch?v=...",
471
  )
472
  clone_name = gr.Textbox(
473
  label="Voice Name",
@@ -505,5 +534,8 @@ with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
505
  """)
506
 
507
 
 
 
 
508
  if __name__ == "__main__":
509
- demo.launch(server_name="0.0.0.0", server_port=7860, css=css, ssr_mode=False)
 
1
  import os
2
  import base64
 
3
  import gradio as gr
4
  from pathlib import Path
5
  import base64
6
  import os
7
+ import requests
8
  from mistralai.client import Mistral
9
 
10
  def list_user_voices():
 
118
  final_audio_path = audio_path
119
 
120
  try:
121
+ # If URL is provided, handle direct links or yt-dlp
122
  if url_input.strip():
123
+ url = url_input.strip()
124
  base_out = tempfile.mktemp()
125
+
126
+ # If it's a direct audio file link, bypass yt-dlp and download it directly
127
+ if url.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.m4a')):
128
+ try:
129
+ ext = url.split('.')[-1]
130
+ final_audio_path = f"{base_out}.{ext}"
131
+ with requests.get(url, stream=True, timeout=15) as r:
132
+ r.raise_for_status()
133
+ with open(final_audio_path, 'wb') as f:
134
+ for chunk in r.iter_content(chunk_size=8192):
135
+ f.write(chunk)
136
+ except Exception as e:
137
+ return f"❌ Error downloading direct audio link: {str(e)}", gr.update()
138
+ # Otherwise use yt-dlp for TikTok, Twitter, YouTube (if not blocked), etc.
139
+ else:
140
+ import yt_dlp
141
+ ydl_opts = {
142
+ 'format': 'bestaudio/best',
143
+ 'outtmpl': base_out + '.%(ext)s',
144
+ 'quiet': True,
145
+ 'postprocessors': [{
146
+ 'key': 'FFmpegExtractAudio',
147
+ 'preferredcodec': 'mp3',
148
+ 'preferredquality': '128',
149
+ }],
150
+ 'postprocessor_args': [
151
+ '-t', '60' # Limit to first 60 seconds
152
+ ],
153
+ }
154
+ try:
155
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
156
+ info = ydl.extract_info(url, download=True)
157
+ final_audio_path = base_out + '.mp3'
158
+ except Exception as e:
159
+ err_msg = str(e)
160
+ if "Sign in to confirm" in err_msg or "bot" in err_msg.lower() or "youtube" in err_msg.lower():
161
+ raise gr.Error("YouTube blocked the Hugging Face Server. Please use a TikTok/Twitter link, OR paste a direct .MP3 URL, OR upload the file manually.")
162
+ else:
163
+ raise gr.Error(f"Video download failed: {err_msg}")
164
 
165
  client = get_client()
166
  sample_b64 = base64.b64encode(Path(final_audio_path).read_bytes()).decode()
 
216
  z-index: 10;
217
  }
218
  .app-header h1 {
219
+ font-size: 3.2rem;
220
  font-weight: 800;
221
  letter-spacing: -1.5px;
222
  background: linear-gradient(135deg, #c084fc 0%, #ec4899 50%, #facc15 100%);
 
228
  }
229
  .app-header p {
230
  color: #94a3b8;
231
+ font-size: 1.25rem;
232
+ font-weight: 500;
233
  margin-top: 0;
234
  }
235
+ .highlight-badge {
236
+ background: linear-gradient(135deg, #f59e0b, #ef4444);
237
+ color: white;
238
+ padding: 2px 8px;
239
+ border-radius: 8px;
240
+ font-size: 0.8rem;
241
+ font-weight: 800;
242
+ vertical-align: top;
243
+ margin-left: 10px;
244
+ box-shadow: 0 0 10px rgba(239, 68, 68, 0.6);
245
+ }
246
 
247
  /* Glass panel wrapper */
248
  div.tabs-container, .panel-box {
 
370
  footer { display: none !important; }
371
  """
372
 
373
+ # Helper to initialize voices on ui load
374
+ def init_voices_ui():
375
+ choices = get_voice_choices()
376
+ default_val = choices[0][1] if choices else None
377
+ return gr.update(choices=choices, value=default_val)
378
 
379
  with gr.Blocks(title="Voxtral Studio — Mistral AI Audio") as demo:
380
 
381
  gr.HTML("""
382
  <div class="app-header">
383
+ <h1>🎙️ Voxtral Studio <span class="highlight-badge">VOICE CLONING</span></h1>
384
+ <p>Powered by Mistral AI · STT & Elite Text-to-Speech + Instant Zero-Shot Cloning</p>
385
+
386
  </div>
387
  """)
388
 
 
438
  )
439
  with gr.Row():
440
  tts_voice_id = gr.Dropdown(
441
+ label="Select a Mistral Voice or Your Clones",
442
+ choices=[], # Populated on load
 
443
  allow_custom_value=True,
444
  scale=3,
445
  )
 
495
  elem_classes=["audio-component"],
496
  )
497
  clone_url = gr.Textbox(
498
+ label="OR: Media URL (TikTok, Twitter, or direct .MP3/.WAV link)",
499
+ placeholder="https://...link_to_audio_or_video...",
500
  )
501
  clone_name = gr.Textbox(
502
  label="Voice Name",
 
534
  """)
535
 
536
 
537
+ # Populate choices dynamically when the page loads for each user!
538
+ demo.load(fn=init_voices_ui, outputs=tts_voice_id)
539
+
540
  if __name__ == "__main__":
541
+ demo.launch(server_name="0.0.0.0", server_port=7860, ssr_mode=False)