yukee1992 commited on
Commit
e65fbd1
Β·
verified Β·
1 Parent(s): 0cb7ac0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -106
app.py CHANGED
@@ -89,7 +89,7 @@ class TTSRequest(BaseModel):
89
  project_id: str
90
  voice_name: Optional[str] = "default"
91
  language: Optional[str] = "en"
92
- model_type: Optional[str] = "xtts-v2" # New: allow model selection
93
 
94
  class BatchTTSRequest(BaseModel):
95
  texts: List[str]
@@ -106,7 +106,7 @@ class VoiceCloneRequest(BaseModel):
106
 
107
  class VoiceStyleRequest(BaseModel):
108
  voice_name: str
109
- style: str # e.g., "happy", "sad", "excited", "calm"
110
  intensity: Optional[float] = 1.0
111
 
112
  # Enhanced helper functions
@@ -115,7 +115,7 @@ def clean_text(text):
115
  import re
116
 
117
  if not text or not isinstance(text, str):
118
- return "Hello" # Default fallback text
119
 
120
  # Remove any problematic characters but keep basic punctuation and multilingual characters
121
  text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:\;\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]', '', text)
@@ -287,7 +287,7 @@ def save_wav(audio, file_path, sample_rate=22050):
287
  return False
288
 
289
  def load_tts_model(model_type="xtts-v2"):
290
- """FIXED: Enhanced model loading with multiple model support"""
291
  global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, active_model_config
292
 
293
  if model_loading:
@@ -315,60 +315,10 @@ def load_tts_model(model_type="xtts-v2"):
315
  model_config = AVAILABLE_MODELS[model_type]
316
  print(f"πŸš€ Loading {model_config['name']}...")
317
 
318
- # Load the selected model
319
  tts = TTS(model_config["model_name"]).to(DEVICE)
320
 
321
- # Test the model - FIXED: Better testing approach for XTTS-v2
322
- test_path = "/tmp/test_output.wav"
323
-
324
- if model_config["voice_cloning"]:
325
- # FIXED: For XTTS-v2, use a simpler test without speaker_wav first
326
- print("πŸ”Š Testing XTTS-v2 model...")
327
- try:
328
- # First try without speaker_wav
329
- tts.tts_to_file(
330
- text="Test",
331
- file_path=test_path,
332
- language="en"
333
- # Don't pass speaker_wav for initial test
334
- )
335
- except Exception as e:
336
- print(f"⚠️ Initial test failed: {e}")
337
- print("πŸ”„ Trying alternative test method...")
338
- # If that fails, try generating audio directly
339
- try:
340
- audio = tts.tts(text="Test", language="en")
341
- if audio is not None:
342
- import soundfile as sf
343
- sf.write(test_path, audio, 22050)
344
- else:
345
- # If we can't test properly, still mark as loaded but warn
346
- print("⚠️ Could not complete full test, but model loaded")
347
- model_loaded = True
348
- current_model = model_config["model_name"]
349
- voice_cloning_supported = model_config["voice_cloning"]
350
- active_model_config = model_config
351
- print(f"βœ… {model_config['name']} loaded (limited test)")
352
- return True
353
- except Exception as alt_error:
354
- print(f"❌ Alternative test failed: {alt_error}")
355
- raise alt_error
356
- else:
357
- # For non-voice-cloning models
358
- tts.tts_to_file(text="This is a test of the voice system.", file_path=test_path)
359
-
360
- # Check if test file was created
361
- if os.path.exists(test_path):
362
- file_size = os.path.getsize(test_path)
363
- print(f"βœ… Test file created: {test_path} ({file_size} bytes)")
364
- try:
365
- os.remove(test_path)
366
- except:
367
- pass
368
- print(f"βœ… {model_config['name']} model tested and working!")
369
- else:
370
- print("⚠️ Test file not created, but continuing with model load...")
371
-
372
  model_loaded = True
373
  current_model = model_config["model_name"]
374
  voice_cloning_supported = model_config["voice_cloning"]
@@ -378,13 +328,33 @@ def load_tts_model(model_type="xtts-v2"):
378
  print(f" Voice cloning: {'βœ… Supported' if voice_cloning_supported else '❌ Not supported'}")
379
  print(f" Languages: {', '.join(model_config['languages'])}")
380
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
  return True
382
 
383
  except Exception as e:
384
- print(f"❌ {model_config['name']} model failed: {e}")
385
  # Fallback to Tacotron2 if XTTS fails
386
  if model_type == "xtts-v2":
387
  print("πŸ”„ Falling back to Tacotron2...")
 
388
  return load_tts_model("tacotron2-ddc")
389
  return False
390
 
@@ -406,10 +376,10 @@ def validate_language(language: str, model_type: str) -> bool:
406
  # Enhanced API endpoints
407
  @app.post("/api/tts")
408
  async def generate_tts(request: TTSRequest):
409
- """FIXED: Enhanced TTS generation with better XTTS-v2 handling"""
410
  try:
411
- # Lazy load model on first request or if model changed
412
- if not model_loaded or active_model_config is None or request.model_type not in list(AVAILABLE_MODELS.keys())[0]:
413
  if not load_tts_model(request.model_type):
414
  return {
415
  "status": "error",
@@ -465,10 +435,14 @@ async def generate_tts(request: TTSRequest):
465
  print(f"πŸ“ Original text: '{request.text}'")
466
  print(f"πŸ“ Cleaned text: '{cleaned_text}'")
467
 
468
- # Generate TTS based on model capabilities - FIXED: Better XTTS handling
 
 
 
 
469
  try:
470
- if supports_voice_cloning():
471
- # XTTS model with voice cloning support
472
  if speaker_wav:
473
  # Custom voice with speaker file
474
  tts.tts_to_file(
@@ -478,51 +452,61 @@ async def generate_tts(request: TTSRequest):
478
  file_path=output_path
479
  )
480
  else:
481
- # Built-in voice (no speaker_wav needed)
482
  tts.tts_to_file(
483
  text=cleaned_text,
484
  language=request.language,
485
  file_path=output_path
486
  )
487
  else:
488
- # Models without voice cloning
489
  tts.tts_to_file(
490
  text=cleaned_text,
491
  file_path=output_path
492
  )
493
- except Exception as tts_error:
494
- print(f"❌ TTS generation failed: {tts_error}")
495
- # Try alternative approach
 
 
 
 
 
496
  try:
497
- print("πŸ”„ Trying alternative TTS generation method...")
498
- if supports_voice_cloning():
499
- if speaker_wav:
500
- audio = tts.tts(
501
- text=cleaned_text,
502
- speaker_wav=speaker_wav,
503
- language=request.language
504
- )
505
- else:
506
- audio = tts.tts(
507
- text=cleaned_text,
508
- language=request.language
509
- )
510
  else:
511
- audio = tts.tts(text=cleaned_text)
 
 
 
 
 
 
 
 
512
 
513
- # Save manually
514
- if not save_wav(audio, output_path):
515
- raise Exception("Failed to save audio file")
516
-
517
- except Exception as alt_error:
518
- print(f"❌ Alternative method also failed: {alt_error}")
519
- # Last resort: try very simple generation
520
  try:
521
- print("πŸ”„ Trying simple generation as last resort...")
522
- tts.tts_to_file(text="Hello world", file_path=output_path)
523
- print("βœ… Simple generation worked, but original text failed")
524
- except:
525
- raise alt_error
 
 
 
 
 
 
 
 
 
526
 
527
  # Verify the file was created
528
  if not os.path.exists(output_path):
@@ -568,14 +552,13 @@ async def generate_tts(request: TTSRequest):
568
 
569
  except Exception as e:
570
  print(f"❌ TTS generation error: {str(e)}")
571
- error_detail = {
572
- "error": str(e),
 
573
  "model": current_model,
574
  "model_type": request.model_type if 'request' in locals() else "unknown",
575
- "voice_cloning_supported": supports_voice_cloning(),
576
- "device": DEVICE
577
  }
578
- raise HTTPException(status_code=500, detail=error_detail)
579
 
580
  async def list_voices_internal():
581
  """Internal function to list available voices"""
@@ -628,13 +611,12 @@ async def get_builtin_voices():
628
  "voice_cloning_supported": voice_cloning_supported
629
  }
630
 
631
- # Keep your existing endpoints but enhance them with model selection
632
  @app.post("/api/batch-tts")
633
  async def batch_generate_tts(request: BatchTTSRequest):
634
  """Enhanced batch TTS with model selection"""
635
  try:
636
  # Lazy load model
637
- if not model_loaded or active_model_config is None or request.model_type not in list(AVAILABLE_MODELS.keys())[0]:
638
  if not load_tts_model(request.model_type):
639
  raise HTTPException(status_code=500, detail=f"TTS model '{request.model_type}' failed to load")
640
 
@@ -682,7 +664,6 @@ async def batch_generate_tts(request: BatchTTSRequest):
682
  print(f"❌ Batch TTS generation error: {str(e)}")
683
  raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
684
 
685
- # Enhanced voice cloning endpoint
686
  @app.post("/api/clone-voice")
687
  async def api_clone_voice(
688
  project_id: str = Form(...),
@@ -705,7 +686,7 @@ async def api_clone_voice(
705
  if not load_tts_model("xtts-v2"):
706
  raise HTTPException(status_code=500, detail="XTTS-v2 model failed to load. Voice cloning requires XTTS-v2.")
707
 
708
- # Rest of your voice cloning implementation...
709
  temp_files = []
710
  for i, file in enumerate(files):
711
  if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
@@ -740,7 +721,6 @@ async def api_clone_voice(
740
  print(f"❌ Voice cloning error: {str(e)}")
741
  raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
742
 
743
- # Enhanced voices list endpoint
744
  @app.get("/api/voices")
745
  async def list_voices():
746
  """List available voices with enhanced information"""
@@ -807,12 +787,11 @@ async def list_voices():
807
  print(f"❌ List voices error: {str(e)}")
808
  raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
809
 
810
- # Keep your existing health check, reload-model, and root endpoints
811
  @app.get("/api/health")
812
  async def health_check():
813
  """Enhanced health check with model information"""
814
  return {
815
- "status": "healthy",
816
  "tts_loaded": model_loaded,
817
  "model": current_model,
818
  "model_config": active_model_config,
 
89
  project_id: str
90
  voice_name: Optional[str] = "default"
91
  language: Optional[str] = "en"
92
+ model_type: Optional[str] = "xtts-v2"
93
 
94
  class BatchTTSRequest(BaseModel):
95
  texts: List[str]
 
106
 
107
  class VoiceStyleRequest(BaseModel):
108
  voice_name: str
109
+ style: str
110
  intensity: Optional[float] = 1.0
111
 
112
  # Enhanced helper functions
 
115
  import re
116
 
117
  if not text or not isinstance(text, str):
118
+ return "Hello"
119
 
120
  # Remove any problematic characters but keep basic punctuation and multilingual characters
121
  text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:\;\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]', '', text)
 
287
  return False
288
 
289
  def load_tts_model(model_type="xtts-v2"):
290
+ """ROBUST MODEL LOADING: Simplified approach that just loads without complex testing"""
291
  global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, active_model_config
292
 
293
  if model_loading:
 
315
  model_config = AVAILABLE_MODELS[model_type]
316
  print(f"πŸš€ Loading {model_config['name']}...")
317
 
318
+ # SIMPLE APPROACH: Just load the model without complex testing
319
  tts = TTS(model_config["model_name"]).to(DEVICE)
320
 
321
+ # Mark as loaded immediately without testing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  model_loaded = True
323
  current_model = model_config["model_name"]
324
  voice_cloning_supported = model_config["voice_cloning"]
 
328
  print(f" Voice cloning: {'βœ… Supported' if voice_cloning_supported else '❌ Not supported'}")
329
  print(f" Languages: {', '.join(model_config['languages'])}")
330
 
331
+ # Try a simple test but don't fail if it doesn't work
332
+ try:
333
+ test_path = "/tmp/test_output.wav"
334
+ if model_config["voice_cloning"]:
335
+ # For XTTS-v2, try without speaker_wav first
336
+ tts.tts_to_file(
337
+ text="Test",
338
+ file_path=test_path,
339
+ language="en"
340
+ )
341
+ else:
342
+ tts.tts_to_file(text="Test", file_path=test_path)
343
+
344
+ if os.path.exists(test_path):
345
+ os.remove(test_path)
346
+ print("βœ… Model test completed successfully!")
347
+ except Exception as test_error:
348
+ print(f"⚠️ Model test failed but model is loaded: {test_error}")
349
+
350
  return True
351
 
352
  except Exception as e:
353
+ print(f"❌ {model_config['name']} model failed to load: {e}")
354
  # Fallback to Tacotron2 if XTTS fails
355
  if model_type == "xtts-v2":
356
  print("πŸ”„ Falling back to Tacotron2...")
357
+ model_loading = False # Reset loading state
358
  return load_tts_model("tacotron2-ddc")
359
  return False
360
 
 
376
  # Enhanced API endpoints
377
  @app.post("/api/tts")
378
  async def generate_tts(request: TTSRequest):
379
+ """ROBUST TTS generation with multiple fallback approaches"""
380
  try:
381
+ # Lazy load model on first request
382
+ if not model_loaded:
383
  if not load_tts_model(request.model_type):
384
  return {
385
  "status": "error",
 
435
  print(f"πŸ“ Original text: '{request.text}'")
436
  print(f"πŸ“ Cleaned text: '{cleaned_text}'")
437
 
438
+ # Generate TTS with multiple fallback approaches
439
+ generation_success = False
440
+ last_error = None
441
+
442
+ # Approach 1: Standard generation
443
  try:
444
+ print("πŸ”„ Attempt 1: Standard generation...")
445
+ if supports_voice_cloning() and request.voice_name != "default":
446
  if speaker_wav:
447
  # Custom voice with speaker file
448
  tts.tts_to_file(
 
452
  file_path=output_path
453
  )
454
  else:
455
+ # Built-in XTTS voice
456
  tts.tts_to_file(
457
  text=cleaned_text,
458
  language=request.language,
459
  file_path=output_path
460
  )
461
  else:
462
+ # Default voice or non-voice-cloning models
463
  tts.tts_to_file(
464
  text=cleaned_text,
465
  file_path=output_path
466
  )
467
+ generation_success = True
468
+ print("βœ… Standard generation successful!")
469
+
470
+ except Exception as e1:
471
+ last_error = e1
472
+ print(f"❌ Standard generation failed: {e1}")
473
+
474
+ # Approach 2: Try without language parameter
475
  try:
476
+ print("πŸ”„ Attempt 2: Without language parameter...")
477
+ if supports_voice_cloning() and speaker_wav:
478
+ tts.tts_to_file(
479
+ text=cleaned_text,
480
+ speaker_wav=speaker_wav,
481
+ file_path=output_path
482
+ )
 
 
 
 
 
 
483
  else:
484
+ tts.tts_to_file(
485
+ text=cleaned_text,
486
+ file_path=output_path
487
+ )
488
+ generation_success = True
489
+ print("βœ… Generation without language successful!")
490
+ except Exception as e2:
491
+ last_error = e2
492
+ print(f"❌ Generation without language failed: {e2}")
493
 
494
+ # Approach 3: Try with very simple text
 
 
 
 
 
 
495
  try:
496
+ print("πŸ”„ Attempt 3: With simple text...")
497
+ simple_text = "Hello world" if len(cleaned_text) > 50 else cleaned_text
498
+ tts.tts_to_file(
499
+ text=simple_text,
500
+ file_path=output_path
501
+ )
502
+ generation_success = True
503
+ print("βœ… Simple text generation successful!")
504
+ except Exception as e3:
505
+ last_error = e3
506
+ print(f"❌ Simple text generation failed: {e3}")
507
+
508
+ if not generation_success:
509
+ raise Exception(f"All generation attempts failed. Last error: {last_error}")
510
 
511
  # Verify the file was created
512
  if not os.path.exists(output_path):
 
552
 
553
  except Exception as e:
554
  print(f"❌ TTS generation error: {str(e)}")
555
+ return {
556
+ "status": "error",
557
+ "message": f"TTS generation failed: {str(e)}",
558
  "model": current_model,
559
  "model_type": request.model_type if 'request' in locals() else "unknown",
560
+ "voice_cloning_supported": supports_voice_cloning()
 
561
  }
 
562
 
563
  async def list_voices_internal():
564
  """Internal function to list available voices"""
 
611
  "voice_cloning_supported": voice_cloning_supported
612
  }
613
 
 
614
  @app.post("/api/batch-tts")
615
  async def batch_generate_tts(request: BatchTTSRequest):
616
  """Enhanced batch TTS with model selection"""
617
  try:
618
  # Lazy load model
619
+ if not model_loaded:
620
  if not load_tts_model(request.model_type):
621
  raise HTTPException(status_code=500, detail=f"TTS model '{request.model_type}' failed to load")
622
 
 
664
  print(f"❌ Batch TTS generation error: {str(e)}")
665
  raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
666
 
 
667
  @app.post("/api/clone-voice")
668
  async def api_clone_voice(
669
  project_id: str = Form(...),
 
686
  if not load_tts_model("xtts-v2"):
687
  raise HTTPException(status_code=500, detail="XTTS-v2 model failed to load. Voice cloning requires XTTS-v2.")
688
 
689
+ # Save uploaded files temporarily
690
  temp_files = []
691
  for i, file in enumerate(files):
692
  if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
 
721
  print(f"❌ Voice cloning error: {str(e)}")
722
  raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
723
 
 
724
  @app.get("/api/voices")
725
  async def list_voices():
726
  """List available voices with enhanced information"""
 
787
  print(f"❌ List voices error: {str(e)}")
788
  raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
789
 
 
790
  @app.get("/api/health")
791
  async def health_check():
792
  """Enhanced health check with model information"""
793
  return {
794
+ "status": "healthy" if model_loaded else "loading",
795
  "tts_loaded": model_loaded,
796
  "model": current_model,
797
  "model_config": active_model_config,