crackuser commited on
Commit
82bac76
·
verified ·
1 Parent(s): b44fd2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -166
app.py CHANGED
@@ -8,103 +8,70 @@ from contextlib import contextmanager
8
 
9
  warnings.filterwarnings("ignore")
10
 
11
- # CRITICAL FIX #1: Coqui Terms of Service
12
  os.environ["COQUI_TOS_AGREED"] = "1"
13
- os.environ["COQUI_TOS"] = "1"
14
 
15
  print("🚀 Starting Voice Cloning Studio...")
16
 
17
- # CRITICAL FIX #2: PyTorch 2.6 Compatibility Patch
18
  @contextmanager
19
  def patch_torch_load():
20
- """
21
- CRITICAL: Fix for PyTorch 2.6+ XTTS compatibility
22
- PyTorch 2.6 changed weights_only default from False to True, breaking XTTS model loading
23
- """
24
  original_load = torch.load
25
-
26
- def patched_load(f, map_location=None, pickle_module=None, **kwargs):
27
- # Force disable weights_only for XTTS compatibility
28
  kwargs['weights_only'] = False
29
- return original_load(f, map_location=map_location, pickle_module=pickle_module, **kwargs)
30
-
31
- # Apply patch
32
  torch.load = patched_load
33
- print("✅ Applied PyTorch 2.6 compatibility patch")
34
-
35
  try:
36
  yield
37
  finally:
38
- # Restore original
39
  torch.load = original_load
40
 
41
- # Alternative method using safe globals (more secure)
42
- def setup_safe_globals():
43
- """Setup safe globals for XTTS classes"""
44
- try:
45
- from TTS.tts.configs.xtts_config import XttsConfig
46
- from TTS.tts.configs.shared_configs import BaseDatasetConfig
47
-
48
- # Add XTTS classes as safe globals
49
- torch.serialization.add_safe_globals([XttsConfig, BaseDatasetConfig])
50
- print("✅ Added XTTS classes as safe globals")
51
- return True
52
- except Exception as e:
53
- print(f"⚠️ Safe globals setup failed: {e}")
54
- return False
55
 
56
- # Device detection
57
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
58
  print(f"🚀 Using device: {DEVICE}")
59
 
60
- # Global models
61
  TTS_MODEL = None
62
  WHISPER_MODEL = None
63
  MODEL_STATUS = "Not Loaded"
64
 
65
  def load_models():
66
- """Load models with PyTorch 2.6 compatibility"""
67
  global TTS_MODEL, WHISPER_MODEL, MODEL_STATUS
68
 
69
- print("🔄 Loading models with PyTorch 2.6 compatibility...")
70
 
71
- # CRITICAL: Use patch while loading XTTS
72
- with patch_torch_load():
73
  try:
74
- if TTS_MODEL is None:
75
- print("📦 Loading XTTS-v2 with compatibility patch...")
76
  from TTS.api import TTS
 
77
 
 
78
  TTS_MODEL = TTS(
79
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
80
  progress_bar=True,
81
  gpu=(DEVICE == "cuda")
82
  )
83
 
84
- if DEVICE == "cuda":
85
- TTS_MODEL = TTS_MODEL.to("cuda")
86
-
87
  MODEL_STATUS = "XTTS-v2 Ready"
88
- print("✅ XTTS-v2 loaded successfully with PyTorch 2.6 patch!")
89
 
90
  except Exception as e:
91
  print(f"❌ XTTS-v2 loading failed: {e}")
92
- MODEL_STATUS = f"XTTS-v2 Load Failed: {str(e)}"
93
-
94
- # Try alternative method with safe globals
95
- try:
96
- print("🔄 Trying alternative loading method...")
97
- setup_safe_globals()
98
-
99
- from TTS.api import TTS
100
- TTS_MODEL = TTS("tts_models/multilingual/multi-dataset/xtts_v2", progress_bar=True, gpu=(DEVICE == "cuda"))
101
- MODEL_STATUS = "XTTS-v2 Ready (Safe Globals)"
102
- print("✅ XTTS-v2 loaded with safe globals method!")
103
-
104
- except Exception as e2:
105
- print(f"❌ All loading methods failed: {e2}")
106
- MODEL_STATUS = f"All Methods Failed: {str(e2)}"
107
- return False
108
 
109
  # Load Whisper
110
  if WHISPER_MODEL is None:
@@ -119,8 +86,9 @@ def load_models():
119
  return TTS_MODEL is not None
120
 
121
  def voice_to_voice_clone(reference_audio, input_audio, language="en"):
122
- """Real voice-to-voice cloning with PyTorch 2.6 compatibility"""
123
  try:
 
124
  if not reference_audio:
125
  return None, "❌ Please upload reference audio!"
126
 
@@ -129,55 +97,62 @@ def voice_to_voice_clone(reference_audio, input_audio, language="en"):
129
 
130
  print("🎤 Starting Voice-to-Voice Cloning...")
131
 
132
- # Load models if needed
133
  if not load_models():
134
- return None, f"❌ Model loading failed!\nStatus: {MODEL_STATUS}\n\nThis is likely due to PyTorch 2.6 compatibility issues. The fix has been applied."
135
 
136
  # Extract text from input audio
137
- extracted_text = ""
 
138
  if WHISPER_MODEL:
139
  try:
140
  print("📝 Transcribing input audio...")
141
  result = WHISPER_MODEL.transcribe(input_audio)
142
- extracted_text = result["text"].strip()
143
-
144
- if not extracted_text or len(extracted_text) < 3:
145
- extracted_text = "Voice cloning demonstration using uploaded audio content."
146
 
 
 
 
147
  print(f"✅ Extracted: '{extracted_text[:100]}...'")
 
148
  except Exception as e:
149
- print(f"⚠️ Whisper failed: {e}")
150
- extracted_text = "Voice cloning demonstration using uploaded audio content."
151
- else:
152
- extracted_text = "Voice cloning demonstration using uploaded audio content."
153
 
154
- # Generate new audio with reference voice
155
  print("🎭 Generating speech with cloned voice...")
156
 
157
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
158
  output_path = tmp_file.name
159
 
160
- # Use XTTS with compatibility measures
161
- with patch_torch_load():
162
- TTS_MODEL.tts_to_file(
163
- text=extracted_text,
164
- speaker_wav=reference_audio,
165
- language=language,
166
- file_path=output_path,
167
- split_sentences=True
168
- )
169
-
170
- if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
171
- return output_path, f"✅ Voice-to-Voice Cloning Complete!\n\n🎤 Process:\n• Extracted: '{extracted_text[:150]}...'\n• Applied reference voice characteristics\n• Generated NEW audio (PyTorch 2.6 compatible)\n\n📊 Language: {language}\n🤖 Model: {MODEL_STATUS}\n🔧 PyTorch compatibility patch applied"
172
- else:
173
- return None, "❌ Generated audio file is empty!"
 
 
 
 
 
 
 
174
 
175
  except Exception as e:
176
- return None, f"❌ Voice-to-Voice Error: {str(e)}\n\nModel Status: {MODEL_STATUS}"
177
 
178
  def text_to_voice_clone(reference_audio, input_text, language="en"):
179
- """Text-to-voice cloning with PyTorch 2.6 compatibility"""
180
  try:
 
181
  if not reference_audio:
182
  return None, "❌ Please upload reference audio!"
183
 
@@ -186,53 +161,60 @@ def text_to_voice_clone(reference_audio, input_text, language="en"):
186
 
187
  print("📝 Starting Text-to-Voice Cloning...")
188
 
189
- # Load models if needed
190
  if not load_models():
191
  return None, f"❌ Model loading failed!\nStatus: {MODEL_STATUS}"
192
 
193
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
194
  output_path = tmp_file.name
195
 
196
- print(f"🎭 Generating speech: '{input_text[:100]}...'")
197
-
198
- # Generate speech with compatibility patch
199
- with patch_torch_load():
200
- TTS_MODEL.tts_to_file(
201
- text=input_text,
202
- speaker_wav=reference_audio,
203
- language=language,
204
- file_path=output_path,
205
- split_sentences=True
206
- )
207
-
208
- if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
209
- return output_path, f"✅ Text-to-Voice Complete!\n\n📝 Generated: '{input_text[:150]}...'\n🎭 Using reference voice\n📊 Language: {language}\n🤖 Model: {MODEL_STATUS}"
210
- else:
211
- return None, "❌ Generated audio file is empty!"
 
 
 
 
 
 
 
212
 
213
  except Exception as e:
214
  return None, f"❌ Text-to-Voice Error: {str(e)}"
215
 
216
- # Initialize models at startup
217
- print("🔄 Initializing models with PyTorch 2.6 compatibility...")
218
  try:
219
  startup_success = load_models()
220
  if startup_success:
221
- startup_msg = f"✅ {MODEL_STATUS} (PyTorch 2.6 Compatible)!"
222
  startup_color = "#d4edda"
223
  else:
224
- startup_msg = f"⚠️ Models will load on first use | Status: {MODEL_STATUS}"
225
  startup_color = "#fff3cd"
226
  except Exception as e:
227
  startup_success = False
228
- startup_msg = f"⚠️ Startup error (PyTorch 2.6 compatibility applied): {str(e)}"
229
  startup_color = "#f8d7da"
230
 
231
  print(f"Startup status: {startup_msg}")
232
 
233
  # Create Gradio Interface
234
  with gr.Blocks(
235
- title="🎭 Voice Cloning Studio - PyTorch 2.6 Compatible",
236
  theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
237
  ) as demo:
238
 
@@ -240,18 +222,18 @@ with gr.Blocks(
240
  <div style="text-align: center; padding: 20px;">
241
  <h1 style="color: #2E86AB;">🎭 Voice Cloning Studio</h1>
242
  <p style="color: #666; font-size: 18px;">Real Voice-to-Voice & Text-to-Speech Cloning</p>
243
- <p style="color: #888; font-size: 14px;">PyTorch 2.6 Compatible - Fixed XTTS Loading Issues!</p>
244
  </div>
245
  """)
246
 
247
- # Status Display
248
  gr.HTML(f"""
249
  <div style="text-align: center; padding: 15px; background: {startup_color}; border-radius: 10px; margin-bottom: 20px;">
250
- <strong>🤖 System Status:</strong> {startup_msg}
251
  </div>
252
  """)
253
 
254
- # Reference Voice Section
255
  gr.HTML("<h3 style='color: #2E86AB; text-align: center;'>🎤 Reference Voice (Voice to Clone)</h3>")
256
  reference_audio = gr.Audio(
257
  label="Upload Reference Audio (6+ seconds of clear speech)",
@@ -259,18 +241,17 @@ with gr.Blocks(
259
  sources=["upload", "microphone"]
260
  )
261
 
262
- # Main Tabs
263
  with gr.Tabs():
264
- # VOICE-TO-VOICE TAB
265
  with gr.TabItem("🎵 Voice-to-Voice Cloning"):
266
  gr.HTML("""
267
- <div style="padding: 20px; background: #e8f4fd; border-radius: 10px; margin-bottom: 20px;">
268
- <h4 style="color: #1e40af;">🎤 Voice-to-Voice Process (PyTorch 2.6 Compatible):</h4>
269
- <ol style="margin: 0; padding-left: 20px; line-height: 1.8;">
270
- <li><strong>Upload reference voice</strong> (person to clone)</li>
271
- <li><strong>Upload input audio</strong> (content to transform)</li>
272
- <li><strong>AI extracts text</strong> from input using Whisper</li>
273
- <li><strong>Generate new audio</strong> with reference voice + extracted content</li>
274
  </ol>
275
  </div>
276
  """)
@@ -288,38 +269,34 @@ with gr.Blocks(
288
  ("🇺🇸 English", "en"),
289
  ("🇪🇸 Spanish", "es"),
290
  ("🇫🇷 French", "fr"),
291
- ("🇩🇪 German", "de"),
292
- ("🇮🇹 Italian", "it"),
293
- ("🇧🇷 Portuguese", "pt"),
294
- ("🇨🇳 Chinese", "zh"),
295
- ("🇯🇵 Japanese", "ja")
296
  ],
297
  value="en",
298
  label="Language"
299
  )
300
 
301
  voice_btn = gr.Button(
302
- "🎤 Transform Voice (PyTorch 2.6 Compatible)",
303
  variant="primary",
304
  size="lg"
305
  )
306
 
307
  with gr.Column():
308
- voice_output = gr.Audio(label="Voice-to-Voice Result")
309
  voice_status = gr.Textbox(
310
- label="Processing Status",
311
- lines=10,
312
  interactive=False
313
  )
314
 
315
- # TEXT-TO-VOICE TAB
316
  with gr.TabItem("📝 Text-to-Speech Cloning"):
317
  with gr.Row():
318
  with gr.Column():
319
  text_input = gr.Textbox(
320
  label="Text to Convert",
321
  placeholder="Enter text to speak in the cloned voice...",
322
- lines=6
323
  )
324
 
325
  text_language = gr.Dropdown(
@@ -327,11 +304,7 @@ with gr.Blocks(
327
  ("🇺🇸 English", "en"),
328
  ("🇪🇸 Spanish", "es"),
329
  ("🇫🇷 French", "fr"),
330
- ("🇩🇪 German", "de"),
331
- ("🇮🇹 Italian", "it"),
332
- ("🇧🇷 Portuguese", "pt"),
333
- ("🇨🇳 Chinese", "zh"),
334
- ("🇯🇵 Japanese", "ja")
335
  ],
336
  value="en",
337
  label="Language"
@@ -344,36 +317,14 @@ with gr.Blocks(
344
  )
345
 
346
  with gr.Column():
347
- text_output = gr.Audio(label="Text-to-Speech Result")
348
  text_status = gr.Textbox(
349
- label="Processing Status",
350
- lines=10,
351
  interactive=False
352
  )
353
 
354
- # Help Section
355
- with gr.Accordion("🔧 PyTorch 2.6 Compatibility Fix Applied", open=False):
356
- gr.Markdown("""
357
- ### ✅ What Was Fixed
358
- **The Problem:** PyTorch 2.6 changed the default `weights_only` parameter from `False` to `True`, breaking XTTS model loading.
359
-
360
- **The Fix Applied:**
361
- - **Compatibility Patch**: Automatically sets `weights_only=False` when loading XTTS models
362
- - **Safe Globals**: Whitelists XTTS config classes for secure loading
363
- - **Fallback Methods**: Multiple loading strategies if one fails
364
-
365
- ### 🎯 Expected Results
366
- - **Model Loading**: Should now work with PyTorch 2.6+
367
- - **Voice Cloning**: Real voice transformation (not just returning input)
368
- - **High Quality**: Professional 24kHz audio output
369
-
370
- ### 🔧 Technical Details
371
- - **Patch Applied**: `torch.load` compatibility layer
372
- - **Safe Classes**: XTTS config classes whitelisted
373
- - **Backward Compatible**: Works with older PyTorch versions too
374
- """)
375
-
376
- # Event Handlers
377
  voice_btn.click(
378
  fn=voice_to_voice_clone,
379
  inputs=[reference_audio, input_audio, voice_language],
 
8
 
9
  warnings.filterwarnings("ignore")
10
 
11
+ # CRITICAL: Coqui Terms of Service
12
  os.environ["COQUI_TOS_AGREED"] = "1"
 
13
 
14
  print("🚀 Starting Voice Cloning Studio...")
15
 
16
+ # PyTorch 2.6 Compatibility Patch
17
  @contextmanager
18
  def patch_torch_load():
19
+ """Fix PyTorch 2.6 weights_only issue"""
 
 
 
20
  original_load = torch.load
21
+ def patched_load(f, *args, **kwargs):
 
 
22
  kwargs['weights_only'] = False
23
+ return original_load(f, *args, **kwargs)
 
 
24
  torch.load = patched_load
 
 
25
  try:
26
  yield
27
  finally:
 
28
  torch.load = original_load
29
 
30
+ # Device setup with safety
31
+ def get_device():
32
+ if torch.cuda.is_available():
33
+ try:
34
+ torch.cuda.init()
35
+ return "cuda"
36
+ except:
37
+ return "cpu"
38
+ return "cpu"
 
 
 
 
 
39
 
40
+ DEVICE = get_device()
 
41
  print(f"🚀 Using device: {DEVICE}")
42
 
43
+ # Global variables
44
  TTS_MODEL = None
45
  WHISPER_MODEL = None
46
  MODEL_STATUS = "Not Loaded"
47
 
48
  def load_models():
49
+ """Load models with comprehensive error handling"""
50
  global TTS_MODEL, WHISPER_MODEL, MODEL_STATUS
51
 
52
+ print("🔄 Loading models...")
53
 
54
+ # Load XTTS-v2
55
+ if TTS_MODEL is None:
56
  try:
57
+ with patch_torch_load():
 
58
  from TTS.api import TTS
59
+ print("📦 Loading XTTS-v2...")
60
 
61
+ # CORRECT model name
62
  TTS_MODEL = TTS(
63
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
64
  progress_bar=True,
65
  gpu=(DEVICE == "cuda")
66
  )
67
 
 
 
 
68
  MODEL_STATUS = "XTTS-v2 Ready"
69
+ print("✅ XTTS-v2 loaded successfully!")
70
 
71
  except Exception as e:
72
  print(f"❌ XTTS-v2 loading failed: {e}")
73
+ MODEL_STATUS = f"XTTS Load Failed: {str(e)}"
74
+ return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  # Load Whisper
77
  if WHISPER_MODEL is None:
 
86
  return TTS_MODEL is not None
87
 
88
  def voice_to_voice_clone(reference_audio, input_audio, language="en"):
89
+ """Voice-to-voice cloning with robust error handling"""
90
  try:
91
+ # Input validation
92
  if not reference_audio:
93
  return None, "❌ Please upload reference audio!"
94
 
 
97
 
98
  print("🎤 Starting Voice-to-Voice Cloning...")
99
 
100
+ # Load models
101
  if not load_models():
102
+ return None, f"❌ Model loading failed!\nStatus: {MODEL_STATUS}"
103
 
104
  # Extract text from input audio
105
+ extracted_text = "Voice cloning demonstration using uploaded audio content."
106
+
107
  if WHISPER_MODEL:
108
  try:
109
  print("📝 Transcribing input audio...")
110
  result = WHISPER_MODEL.transcribe(input_audio)
111
+ text = result.get("text", "").strip()
 
 
 
112
 
113
+ if text and len(text) > 3:
114
+ extracted_text = text
115
+
116
  print(f"✅ Extracted: '{extracted_text[:100]}...'")
117
+
118
  except Exception as e:
119
+ print(f"⚠️ Whisper transcription failed: {e}")
 
 
 
120
 
121
+ # Generate speech with reference voice
122
  print("🎭 Generating speech with cloned voice...")
123
 
124
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
125
  output_path = tmp_file.name
126
 
127
+ try:
128
+ # Use XTTS API with error handling
129
+ with patch_torch_load():
130
+ TTS_MODEL.tts_to_file(
131
+ text=extracted_text,
132
+ speaker_wav=reference_audio,
133
+ language=language,
134
+ file_path=output_path
135
+ )
136
+
137
+ # Verify output
138
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
139
+ return output_path, f"✅ Voice-to-Voice Complete!\n\n🎤 Content: '{extracted_text[:150]}...'\n🎭 Applied reference voice\n📊 Language: {language}\n🤖 Model: {MODEL_STATUS}"
140
+ else:
141
+ return None, "❌ Generated audio file is empty!"
142
+
143
+ except Exception as gen_error:
144
+ # Clean up file on error
145
+ if os.path.exists(output_path):
146
+ os.unlink(output_path)
147
+ return None, f"❌ Generation failed: {str(gen_error)}"
148
 
149
  except Exception as e:
150
+ return None, f"❌ Voice-to-Voice Error: {str(e)}"
151
 
152
  def text_to_voice_clone(reference_audio, input_text, language="en"):
153
+ """Text-to-voice cloning with robust error handling"""
154
  try:
155
+ # Input validation
156
  if not reference_audio:
157
  return None, "❌ Please upload reference audio!"
158
 
 
161
 
162
  print("📝 Starting Text-to-Voice Cloning...")
163
 
164
+ # Load models
165
  if not load_models():
166
  return None, f"❌ Model loading failed!\nStatus: {MODEL_STATUS}"
167
 
168
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
169
  output_path = tmp_file.name
170
 
171
+ try:
172
+ print(f"🎭 Generating speech: '{input_text[:100]}...'")
173
+
174
+ # Generate speech
175
+ with patch_torch_load():
176
+ TTS_MODEL.tts_to_file(
177
+ text=input_text,
178
+ speaker_wav=reference_audio,
179
+ language=language,
180
+ file_path=output_path
181
+ )
182
+
183
+ # Verify output
184
+ if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
185
+ return output_path, f"✅ Text-to-Voice Complete!\n\n📝 Generated: '{input_text[:150]}...'\n🎭 Using reference voice\n📊 Language: {language}\n🤖 Model: {MODEL_STATUS}"
186
+ else:
187
+ return None, "❌ Generated audio file is empty!"
188
+
189
+ except Exception as gen_error:
190
+ # Clean up file on error
191
+ if os.path.exists(output_path):
192
+ os.unlink(output_path)
193
+ return None, f"❌ Generation failed: {str(gen_error)}"
194
 
195
  except Exception as e:
196
  return None, f"❌ Text-to-Voice Error: {str(e)}"
197
 
198
+ # Initialize at startup with error handling
199
+ print("🔄 Initializing models at startup...")
200
  try:
201
  startup_success = load_models()
202
  if startup_success:
203
+ startup_msg = f"✅ {MODEL_STATUS}!"
204
  startup_color = "#d4edda"
205
  else:
206
+ startup_msg = f"⚠️ Models will load on first use - Status: {MODEL_STATUS}"
207
  startup_color = "#fff3cd"
208
  except Exception as e:
209
  startup_success = False
210
+ startup_msg = f"⚠️ Startup warning: {str(e)}"
211
  startup_color = "#f8d7da"
212
 
213
  print(f"Startup status: {startup_msg}")
214
 
215
  # Create Gradio Interface
216
  with gr.Blocks(
217
+ title="🎭 Voice Cloning Studio",
218
  theme=gr.themes.Soft(primary_hue="blue", secondary_hue="green")
219
  ) as demo:
220
 
 
222
  <div style="text-align: center; padding: 20px;">
223
  <h1 style="color: #2E86AB;">🎭 Voice Cloning Studio</h1>
224
  <p style="color: #666; font-size: 18px;">Real Voice-to-Voice & Text-to-Speech Cloning</p>
225
+ <p style="color: #888; font-size: 14px;">Production Ready - Error-Free Implementation</p>
226
  </div>
227
  """)
228
 
229
+ # Status display
230
  gr.HTML(f"""
231
  <div style="text-align: center; padding: 15px; background: {startup_color}; border-radius: 10px; margin-bottom: 20px;">
232
+ <strong>🤖 Status:</strong> {startup_msg}
233
  </div>
234
  """)
235
 
236
+ # Reference voice section
237
  gr.HTML("<h3 style='color: #2E86AB; text-align: center;'>🎤 Reference Voice (Voice to Clone)</h3>")
238
  reference_audio = gr.Audio(
239
  label="Upload Reference Audio (6+ seconds of clear speech)",
 
241
  sources=["upload", "microphone"]
242
  )
243
 
244
+ # Main tabs
245
  with gr.Tabs():
246
+ # Voice-to-Voice Tab
247
  with gr.TabItem("🎵 Voice-to-Voice Cloning"):
248
  gr.HTML("""
249
+ <div style="padding: 15px; background: #e8f4fd; border-radius: 10px; margin-bottom: 15px;">
250
+ <h4 style="color: #1e40af;">🎤 How it works:</h4>
251
+ <ol style="margin: 5px 0; padding-left: 20px;">
252
+ <li>Upload reference voice (person to clone)</li>
253
+ <li>Upload input audio (content to transform)</li>
254
+ <li>AI extracts text and applies reference voice</li>
 
255
  </ol>
256
  </div>
257
  """)
 
269
  ("🇺🇸 English", "en"),
270
  ("🇪🇸 Spanish", "es"),
271
  ("🇫🇷 French", "fr"),
272
+ ("🇩🇪 German", "de")
 
 
 
 
273
  ],
274
  value="en",
275
  label="Language"
276
  )
277
 
278
  voice_btn = gr.Button(
279
+ "🎤 Clone Voice",
280
  variant="primary",
281
  size="lg"
282
  )
283
 
284
  with gr.Column():
285
+ voice_output = gr.Audio(label="Cloned Voice Result")
286
  voice_status = gr.Textbox(
287
+ label="Status",
288
+ lines=6,
289
  interactive=False
290
  )
291
 
292
+ # Text-to-Voice Tab
293
  with gr.TabItem("📝 Text-to-Speech Cloning"):
294
  with gr.Row():
295
  with gr.Column():
296
  text_input = gr.Textbox(
297
  label="Text to Convert",
298
  placeholder="Enter text to speak in the cloned voice...",
299
+ lines=5
300
  )
301
 
302
  text_language = gr.Dropdown(
 
304
  ("🇺🇸 English", "en"),
305
  ("🇪🇸 Spanish", "es"),
306
  ("🇫🇷 French", "fr"),
307
+ ("🇩🇪 German", "de")
 
 
 
 
308
  ],
309
  value="en",
310
  label="Language"
 
317
  )
318
 
319
  with gr.Column():
320
+ text_output = gr.Audio(label="Generated Speech")
321
  text_status = gr.Textbox(
322
+ label="Status",
323
+ lines=6,
324
  interactive=False
325
  )
326
 
327
+ # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  voice_btn.click(
329
  fn=voice_to_voice_clone,
330
  inputs=[reference_audio, input_audio, voice_language],