yukee1992 commited on
Commit
e264e7d
·
verified ·
1 Parent(s): f07a760

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -193
app.py CHANGED
@@ -36,32 +36,23 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
36
 
37
  print(f"✅ Using device: {DEVICE}")
38
 
39
- # ENHANCED: Multi-language model support with speaker configuration
40
  AVAILABLE_MODELS = {
41
- "tacotron2-ddc": {
42
- "name": "Tacotron2-DDC",
43
- "model_name": "tts_models/en/ljspeech/tacotron2-DDC",
44
- "description": "High-quality English TTS (Excellent natural voice)",
45
- "languages": ["en"],
46
- "voice_cloning": False,
47
- "size_mb": 150,
48
- "quality": "excellent",
49
- "multi_speaker": False # ADDED: Speaker configuration
50
- },
51
- "your_tts": {
52
- "name": "YourTTS-Multi",
53
- "model_name": "tts_models/multilingual/multi-dataset/your_tts",
54
- "description": "Multilingual TTS supporting English, Chinese, and more",
55
- "languages": ["en", "zh", "fr", "de", "it", "pt", "es"],
56
  "voice_cloning": True,
57
- "size_mb": 200,
58
- "quality": "very_good",
59
- "multi_speaker": True, # ADDED: Speaker configuration
60
- "default_speaker": "female_01" # ADDED: Default speaker for YourTTS
 
61
  }
62
  }
63
 
64
- # ENHANCED: Expanded voice styles for both English and Chinese with speaker support
65
  VOICE_STYLES = {
66
  # English Voice Styles
67
  "default": {
@@ -69,40 +60,24 @@ VOICE_STYLES = {
69
  "description": "Clear and natural English voice",
70
  "gender": "neutral",
71
  "language": "en",
72
- "recommended_model": "tacotron2-ddc",
73
- "speaker": None # ADDED: Tacotron2 doesn't need speaker
74
  },
75
  "clear": {
76
  "name": "Clear English Voice",
77
  "description": "Very clear and articulate English voice",
78
  "gender": "neutral",
79
  "language": "en",
80
- "recommended_model": "tacotron2-ddc",
81
- "speaker": None # ADDED
82
  },
83
  "professional": {
84
  "name": "Professional English Voice",
85
  "description": "Professional and authoritative English voice",
86
  "gender": "neutral",
87
  "language": "en",
88
- "recommended_model": "tacotron2-ddc",
89
- "speaker": None # ADDED
90
- },
91
- "warm": {
92
- "name": "Warm English Voice",
93
- "description": "Friendly and warm English voice",
94
- "gender": "female",
95
- "language": "en",
96
- "recommended_model": "tacotron2-ddc",
97
- "speaker": None # ADDED
98
- },
99
- "authoritative": {
100
- "name": "Authoritative English Voice",
101
- "description": "Confident and authoritative English voice",
102
- "gender": "male",
103
- "language": "en",
104
- "recommended_model": "tacotron2-ddc",
105
- "speaker": None # ADDED
106
  },
107
 
108
  # Chinese Voice Styles
@@ -111,48 +86,24 @@ VOICE_STYLES = {
111
  "description": "清晰自然的中文语音",
112
  "gender": "neutral",
113
  "language": "zh",
114
- "recommended_model": "your_tts",
115
- "speaker": "female_01" # ADDED: Speaker for YourTTS
116
  },
117
  "chinese_clear": {
118
  "name": "清晰中文语音",
119
  "description": "非常清晰和标准的中文语音",
120
  "gender": "neutral",
121
  "language": "zh",
122
- "recommended_model": "your_tts",
123
- "speaker": "female_02" # ADDED
124
  },
125
  "chinese_professional": {
126
  "name": "专业中文语音",
127
  "description": "专业和正式的中文语音",
128
  "gender": "neutral",
129
  "language": "zh",
130
- "recommended_model": "your_tts",
131
- "speaker": "male_01" # ADDED
132
- },
133
- "chinese_warm": {
134
- "name": "温暖中文语音",
135
- "description": "友好温暖的中文语音",
136
- "gender": "female",
137
- "language": "zh",
138
- "recommended_model": "your_tts",
139
- "speaker": "female_03" # ADDED
140
- },
141
- "chinese_authoritative": {
142
- "name": "权威中文语音",
143
- "description": "自信权威的中文语音",
144
- "gender": "male",
145
- "language": "zh",
146
- "recommended_model": "your_tts",
147
- "speaker": "male_02" # ADDED
148
- },
149
- "chinese_storytelling": {
150
- "name": "讲故事中文语音",
151
- "description": "适合讲故事和叙述的中文语音",
152
- "gender": "neutral",
153
- "language": "zh",
154
- "recommended_model": "your_tts",
155
- "speaker": "female_04" # ADDED
156
  }
157
  }
158
 
@@ -177,7 +128,7 @@ class BatchTTSRequest(BaseModel):
177
  speed: Optional[float] = 1.0
178
  language: Optional[str] = "auto"
179
 
180
- # ENHANCED: Language detection function
181
  def detect_language(text: str) -> str:
182
  """Detect if text is Chinese or English"""
183
  import re
@@ -197,17 +148,12 @@ def detect_language(text: str) -> str:
197
  else:
198
  return "en"
199
 
200
- # ENHANCED: Get appropriate model based on voice style and language
201
- def get_model_for_voice_style(voice_style: str, language: str = "auto") -> str:
202
- """Determine which model to use based on voice style and language"""
203
  if voice_style in VOICE_STYLES:
204
- return VOICE_STYLES[voice_style].get("recommended_model", "tacotron2-ddc")
205
-
206
- # Fallback logic based on language
207
- if language == "zh" or (language == "auto" and detect_language("test") == "zh"):
208
- return "your_tts"
209
- else:
210
- return "tacotron2-ddc"
211
 
212
  # Storage management functions
213
  def cleanup_old_files():
@@ -254,7 +200,7 @@ def check_storage_usage():
254
  print(f"⚠️ Storage check error: {e}")
255
  return True
256
 
257
- # ENHANCED: Improved text cleaning with language support
258
  def clean_text(text, language="auto"):
259
  """Clean text for TTS generation with language support"""
260
  import re
@@ -319,8 +265,8 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
319
  except Exception as e:
320
  return None, f"Upload error: {str(e)}"
321
 
322
- # FIXED: Improved model loading with speaker support and better error handling
323
- def load_tts_model(model_type="tacotron2-ddc"):
324
  """Load TTS model with storage optimization"""
325
  global tts, model_loaded, current_model, model_loading
326
 
@@ -356,7 +302,7 @@ def load_tts_model(model_type="tacotron2-ddc"):
356
  model_config = AVAILABLE_MODELS[model_type]
357
  print(f"🚀 Loading {model_config['name']}...")
358
  print(f" Languages: {', '.join(model_config['languages'])}")
359
- print(f" Multi-speaker: {model_config.get('multi_speaker', False)}") # ADDED: Log speaker info
360
 
361
  # Clear current model from memory first if exists
362
  if tts is not None:
@@ -370,24 +316,21 @@ def load_tts_model(model_type="tacotron2-ddc"):
370
  # Load the selected model
371
  tts = TTS(model_config["model_name"]).to(DEVICE)
372
 
373
- # Test the model with appropriate text
374
  test_path = "/tmp/test_output.wav"
375
- if "zh" in model_config["languages"]:
376
- test_text = "你好" # Chinese test
377
- else:
378
- test_text = "Hello" # English test
379
 
380
- # FIXED: For multi-speaker models, provide speaker parameter during test
381
- if model_config.get('multi_speaker', False):
382
- test_speaker = model_config.get('default_speaker', 'female_01')
383
- print(f" Testing with speaker: {test_speaker}") # ADDED: Speaker info
384
- tts.tts_to_file(
385
- text=test_text,
386
- file_path=test_path,
387
- speaker=test_speaker # ADDED: Speaker parameter
388
- )
389
- else:
390
- tts.tts_to_file(text=test_text, file_path=test_path)
391
 
392
  if os.path.exists(test_path):
393
  os.remove(test_path)
@@ -405,10 +348,6 @@ def load_tts_model(model_type="tacotron2-ddc"):
405
 
406
  except Exception as e:
407
  print(f"❌ Model failed to load: {e}")
408
- # Fallback to English model if multilingual fails
409
- if model_type == "your_tts":
410
- print("🔄 Falling back to English model...")
411
- return load_tts_model("tacotron2-ddc")
412
  return False
413
 
414
  finally:
@@ -420,28 +359,24 @@ def load_tts_model(model_type="tacotron2-ddc"):
420
  finally:
421
  model_loading = False
422
 
423
- # FIXED: Improved model switching logic with better detection
424
- def ensure_correct_model(voice_style: str, text: str, language: str = "auto"):
425
- """Ensure the correct model is loaded for the requested voice style and language"""
426
  global tts, model_loaded, current_model
427
 
428
  # Determine target model
429
- target_model = get_model_for_voice_style(voice_style, language)
430
 
431
- print(f"🔍 Model selection: voice_style={voice_style}, language={language}, target_model={target_model}")
432
 
433
  # If no model loaded or wrong model loaded, load the correct one
434
  if not model_loaded or current_model != target_model:
435
- print(f"🔄 Switching to model: {target_model} for voice style: {voice_style}, language: {language}")
436
- success = load_tts_model(target_model)
437
- if not success and target_model == "your_tts":
438
- print("⚠️ Multilingual model failed, falling back to English model")
439
- return load_tts_model("tacotron2-ddc")
440
- return success
441
 
442
  return True
443
 
444
- # FIXED: Enhanced TTS generation with proper language AND speaker handling
445
  @app.post("/api/tts")
446
  async def generate_tts(request: TTSRequest):
447
  """Generate TTS with multi-language support"""
@@ -449,18 +384,18 @@ async def generate_tts(request: TTSRequest):
449
  # Clean up before processing
450
  cleanup_old_files()
451
 
452
- # ENHANCED: Auto-detect language if not specified
453
  if request.language == "auto":
454
  detected_language = detect_language(request.text)
455
  print(f"🌐 Auto-detected language: {detected_language}")
456
  else:
457
  detected_language = request.language
458
 
459
- # ENHANCED: Ensure correct model is loaded
460
- if not ensure_correct_model(request.voice_style, request.text, detected_language):
461
  return {
462
  "status": "error",
463
- "message": f"Failed to load appropriate TTS model for {detected_language}",
464
  "requires_tos_acceptance": True,
465
  "tos_url": "https://coqui.ai/cpml.txt"
466
  }
@@ -479,80 +414,31 @@ async def generate_tts(request: TTSRequest):
479
  # Ensure output directory exists
480
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
481
 
482
- # ENHANCED: Clean the text with language support
483
  cleaned_text = clean_text(request.text, detected_language)
484
  print(f"📝 Text: '{cleaned_text}'")
485
 
486
  # Get speaker configuration for the voice style
487
  voice_config = VOICE_STYLES.get(request.voice_style, {})
488
- speaker = voice_config.get('speaker')
489
- print(f"🎤 Speaker: {speaker}") # ADDED: Speaker info
490
 
491
  # Generate TTS
492
  try:
493
- # FIXED: Proper language AND speaker handling for multilingual model
494
- if current_model == "your_tts":
495
- if detected_language == "zh":
496
- print(f"🎯 Using YourTTS for Chinese text with speaker: {speaker}")
497
- tts.tts_to_file(
498
- text=cleaned_text,
499
- file_path=output_path,
500
- language="zh-cn", # Use zh-cn for Chinese
501
- speaker=speaker # ADDED: Speaker parameter
502
- )
503
- else:
504
- print(f"🎯 Using YourTTS for English text with speaker: {speaker}")
505
- tts.tts_to_file(
506
- text=cleaned_text,
507
- file_path=output_path,
508
- language="en",
509
- speaker=speaker # ADDED: Speaker parameter
510
- )
511
- else:
512
- # Tacotron2-DDC for English only
513
- if detected_language == "zh":
514
- # If Chinese text but English model, try to switch to multilingual
515
- print("🔄 Chinese text detected with English model, attempting to switch to multilingual...")
516
- if load_tts_model("your_tts"):
517
- # Get speaker for the voice style in multilingual model
518
- multilingual_speaker = VOICE_STYLES.get(request.voice_style, {}).get('speaker', 'female_01')
519
- print(f"🎯 Using YourTTS for Chinese text with speaker: {multilingual_speaker}")
520
- # Retry with multilingual model
521
- tts.tts_to_file(
522
- text=cleaned_text,
523
- file_path=output_path,
524
- language="zh-cn",
525
- speaker=multilingual_speaker # ADDED: Speaker parameter
526
- )
527
- else:
528
- raise Exception("Chinese text cannot be processed. Multilingual model failed to load.")
529
- else:
530
- print("🎯 Using Tacotron2-DDC for English text")
531
- # Tacotron2-DDC doesn't need speaker parameter
532
- tts.tts_to_file(
533
- text=cleaned_text,
534
- file_path=output_path
535
- )
536
  except Exception as tts_error:
537
  print(f"❌ TTS generation failed: {tts_error}")
538
-
539
- # FIXED: If it's a speaker error, try with default speaker
540
- if "speaker" in str(tts_error).lower() and current_model == "your_tts":
541
- print("🔄 Speaker error detected, trying with default speaker...")
542
- try:
543
- default_speaker = AVAILABLE_MODELS["your_tts"].get("default_speaker", "female_01")
544
- tts.tts_to_file(
545
- text=cleaned_text,
546
- file_path=output_path,
547
- language="zh-cn" if detected_language == "zh" else "en",
548
- speaker=default_speaker # Use default speaker
549
- )
550
- print("✅ Success with default speaker!")
551
- except Exception as retry_error:
552
- print(f"❌ Retry with default speaker also failed: {retry_error}")
553
- raise tts_error
554
- else:
555
- raise tts_error
556
 
557
  # Verify the file was created
558
  if not os.path.exists(output_path):
@@ -601,7 +487,7 @@ async def generate_tts(request: TTSRequest):
601
  "message": f"TTS generation failed: {str(e)}"
602
  }
603
 
604
- # FIXED: Enhanced batch processing with better logging and error handling
605
  @app.post("/api/batch-tts")
606
  async def batch_generate_tts(request: BatchTTSRequest):
607
  """Batch TTS with multi-language support"""
@@ -616,7 +502,7 @@ async def batch_generate_tts(request: BatchTTSRequest):
616
  results = []
617
  for i, text in enumerate(request.texts):
618
  try:
619
- # ENHANCED: Auto-detect language for each text
620
  if request.language == "auto":
621
  text_language = detect_language(text)
622
  else:
@@ -677,7 +563,7 @@ async def batch_generate_tts(request: BatchTTSRequest):
677
  @app.get("/api/voice-styles")
678
  async def get_voice_styles():
679
  """Get available voice styles"""
680
- # ENHANCED: Group voice styles by language
681
  english_styles = {k: v for k, v in VOICE_STYLES.items() if v.get("language") == "en"}
682
  chinese_styles = {k: v for k, v in VOICE_STYLES.items() if v.get("language") == "zh"}
683
 
@@ -690,7 +576,7 @@ async def get_voice_styles():
690
  "supported_languages": ["en", "zh", "auto"]
691
  }
692
 
693
- # ENHANCED: New endpoint to detect language
694
  @app.post("/api/detect-language")
695
  async def detect_text_language(text: str = Form(...)):
696
  """Detect the language of input text"""
@@ -752,6 +638,6 @@ if __name__ == "__main__":
752
  print("🚀 Starting Multi-Language TTS API...")
753
  print("💾 Storage management enabled")
754
  print("🌐 Supporting English and Chinese")
755
- print("🔊 Using Tacotron2-DDC (English) and YourTTS (Multilingual)")
756
  check_storage_usage()
757
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
36
 
37
  print(f"✅ Using device: {DEVICE}")
38
 
39
+ # SIMPLIFIED: Use only one reliable model that supports both languages
40
  AVAILABLE_MODELS = {
41
+ "xtts": {
42
+ "name": "XTTS-Multilingual",
43
+ "model_name": "tts_models/multilingual/multi-dataset/xtts_v2",
44
+ "description": "High-quality multilingual TTS supporting English and Chinese",
45
+ "languages": ["en", "zh", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "hu"],
 
 
 
 
 
 
 
 
 
 
46
  "voice_cloning": True,
47
+ "size_mb": 180,
48
+ "quality": "excellent",
49
+ "multi_speaker": True,
50
+ "default_speaker": "Claribel Dervla",
51
+ "default_language": "en"
52
  }
53
  }
54
 
55
+ # SIMPLIFIED: Voice styles for XTTS model
56
  VOICE_STYLES = {
57
  # English Voice Styles
58
  "default": {
 
60
  "description": "Clear and natural English voice",
61
  "gender": "neutral",
62
  "language": "en",
63
+ "recommended_model": "xtts",
64
+ "speaker": "Claribel Dervla"
65
  },
66
  "clear": {
67
  "name": "Clear English Voice",
68
  "description": "Very clear and articulate English voice",
69
  "gender": "neutral",
70
  "language": "en",
71
+ "recommended_model": "xtts",
72
+ "speaker": "Daisy Studious"
73
  },
74
  "professional": {
75
  "name": "Professional English Voice",
76
  "description": "Professional and authoritative English voice",
77
  "gender": "neutral",
78
  "language": "en",
79
+ "recommended_model": "xtts",
80
+ "speaker": "Gracie Wise"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  },
82
 
83
  # Chinese Voice Styles
 
86
  "description": "清晰自然的中文语音",
87
  "gender": "neutral",
88
  "language": "zh",
89
+ "recommended_model": "xtts",
90
+ "speaker": "Claribel Dervla"
91
  },
92
  "chinese_clear": {
93
  "name": "清晰中文语音",
94
  "description": "非常清晰和标准的中文语音",
95
  "gender": "neutral",
96
  "language": "zh",
97
+ "recommended_model": "xtts",
98
+ "speaker": "Daisy Studious"
99
  },
100
  "chinese_professional": {
101
  "name": "专业中文语音",
102
  "description": "专业和正式的中文语音",
103
  "gender": "neutral",
104
  "language": "zh",
105
+ "recommended_model": "xtts",
106
+ "speaker": "Gracie Wise"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
  }
109
 
 
128
  speed: Optional[float] = 1.0
129
  language: Optional[str] = "auto"
130
 
131
+ # Language detection function
132
  def detect_language(text: str) -> str:
133
  """Detect if text is Chinese or English"""
134
  import re
 
148
  else:
149
  return "en"
150
 
151
+ # Get appropriate model based on voice style
152
+ def get_model_for_voice_style(voice_style: str) -> str:
153
+ """Determine which model to use based on voice style"""
154
  if voice_style in VOICE_STYLES:
155
+ return VOICE_STYLES[voice_style].get("recommended_model", "xtts")
156
+ return "xtts"
 
 
 
 
 
157
 
158
  # Storage management functions
159
  def cleanup_old_files():
 
200
  print(f"⚠️ Storage check error: {e}")
201
  return True
202
 
203
+ # Text cleaning with language support
204
  def clean_text(text, language="auto"):
205
  """Clean text for TTS generation with language support"""
206
  import re
 
265
  except Exception as e:
266
  return None, f"Upload error: {str(e)}"
267
 
268
+ # SIMPLIFIED: Model loading with XTTS
269
+ def load_tts_model(model_type="xtts"):
270
  """Load TTS model with storage optimization"""
271
  global tts, model_loaded, current_model, model_loading
272
 
 
302
  model_config = AVAILABLE_MODELS[model_type]
303
  print(f"🚀 Loading {model_config['name']}...")
304
  print(f" Languages: {', '.join(model_config['languages'])}")
305
+ print(f" Multi-speaker: {model_config.get('multi_speaker', False)}")
306
 
307
  # Clear current model from memory first if exists
308
  if tts is not None:
 
316
  # Load the selected model
317
  tts = TTS(model_config["model_name"]).to(DEVICE)
318
 
319
+ # Test the model with BOTH language and speaker parameters
320
  test_path = "/tmp/test_output.wav"
321
+ test_speaker = model_config.get('default_speaker', 'Claribel Dervla')
322
+ test_language = model_config.get('default_language', 'en')
323
+ test_text = "Hello" if test_language == "en" else "你好"
 
324
 
325
+ print(f" Testing with speaker: {test_speaker}, language: {test_language}")
326
+
327
+ # XTTS requires BOTH language AND speaker parameters
328
+ tts.tts_to_file(
329
+ text=test_text,
330
+ file_path=test_path,
331
+ speaker=test_speaker,
332
+ language=test_language
333
+ )
 
 
334
 
335
  if os.path.exists(test_path):
336
  os.remove(test_path)
 
348
 
349
  except Exception as e:
350
  print(f"❌ Model failed to load: {e}")
 
 
 
 
351
  return False
352
 
353
  finally:
 
359
  finally:
360
  model_loading = False
361
 
362
+ # Ensure correct model is loaded
363
+ def ensure_correct_model(voice_style: str):
364
+ """Ensure the correct model is loaded for the requested voice style"""
365
  global tts, model_loaded, current_model
366
 
367
  # Determine target model
368
+ target_model = get_model_for_voice_style(voice_style)
369
 
370
+ print(f"🔍 Model selection: voice_style={voice_style}, target_model={target_model}")
371
 
372
  # If no model loaded or wrong model loaded, load the correct one
373
  if not model_loaded or current_model != target_model:
374
+ print(f"🔄 Switching to model: {target_model} for voice style: {voice_style}")
375
+ return load_tts_model(target_model)
 
 
 
 
376
 
377
  return True
378
 
379
+ # SIMPLIFIED: TTS generation with XTTS
380
  @app.post("/api/tts")
381
  async def generate_tts(request: TTSRequest):
382
  """Generate TTS with multi-language support"""
 
384
  # Clean up before processing
385
  cleanup_old_files()
386
 
387
+ # Auto-detect language if not specified
388
  if request.language == "auto":
389
  detected_language = detect_language(request.text)
390
  print(f"🌐 Auto-detected language: {detected_language}")
391
  else:
392
  detected_language = request.language
393
 
394
+ # Ensure correct model is loaded
395
+ if not ensure_correct_model(request.voice_style):
396
  return {
397
  "status": "error",
398
+ "message": "Failed to load TTS model",
399
  "requires_tos_acceptance": True,
400
  "tos_url": "https://coqui.ai/cpml.txt"
401
  }
 
414
  # Ensure output directory exists
415
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
416
 
417
+ # Clean the text with language support
418
  cleaned_text = clean_text(request.text, detected_language)
419
  print(f"📝 Text: '{cleaned_text}'")
420
 
421
  # Get speaker configuration for the voice style
422
  voice_config = VOICE_STYLES.get(request.voice_style, {})
423
+ speaker = voice_config.get('speaker', 'Claribel Dervla')
424
+ print(f"🎤 Speaker: {speaker}")
425
 
426
  # Generate TTS
427
  try:
428
+ # XTTS requires BOTH language AND speaker parameters
429
+ tts_language = "zh-cn" if detected_language == "zh" else "en"
430
+ print(f"🎯 Using XTTS with language: {tts_language}, speaker: {speaker}")
431
+
432
+ tts.tts_to_file(
433
+ text=cleaned_text,
434
+ file_path=output_path,
435
+ language=tts_language,
436
+ speaker=speaker
437
+ )
438
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
  except Exception as tts_error:
440
  print(f"❌ TTS generation failed: {tts_error}")
441
+ raise tts_error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
 
443
  # Verify the file was created
444
  if not os.path.exists(output_path):
 
487
  "message": f"TTS generation failed: {str(e)}"
488
  }
489
 
490
+ # Batch TTS processing
491
  @app.post("/api/batch-tts")
492
  async def batch_generate_tts(request: BatchTTSRequest):
493
  """Batch TTS with multi-language support"""
 
502
  results = []
503
  for i, text in enumerate(request.texts):
504
  try:
505
+ # Auto-detect language for each text
506
  if request.language == "auto":
507
  text_language = detect_language(text)
508
  else:
 
563
  @app.get("/api/voice-styles")
564
  async def get_voice_styles():
565
  """Get available voice styles"""
566
+ # Group voice styles by language
567
  english_styles = {k: v for k, v in VOICE_STYLES.items() if v.get("language") == "en"}
568
  chinese_styles = {k: v for k, v in VOICE_STYLES.items() if v.get("language") == "zh"}
569
 
 
576
  "supported_languages": ["en", "zh", "auto"]
577
  }
578
 
579
+ # Language detection endpoint
580
  @app.post("/api/detect-language")
581
  async def detect_text_language(text: str = Form(...)):
582
  """Detect the language of input text"""
 
638
  print("🚀 Starting Multi-Language TTS API...")
639
  print("💾 Storage management enabled")
640
  print("🌐 Supporting English and Chinese")
641
+ print("🔊 Using XTTS (Multilingual)")
642
  check_storage_usage()
643
  uvicorn.run(app, host="0.0.0.0", port=7860)