Spaces:

yukee1992
/

Tts-api

Sleeping

App Files Files Community

yukee1992 commited on Oct 11, 2025

Commit

1214c7f

verified ·

1 Parent(s): 4624555

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -17

app.py CHANGED Viewed

@@ -305,6 +305,7 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
     except Exception as e:
         return None, f"Upload error: {str(e)}"
 def load_tts_model(model_type="tacotron2-ddc"):
     """Load TTS model with storage optimization"""
     global tts, model_loaded, current_model, model_loading
@@ -317,6 +318,11 @@ def load_tts_model(model_type="tacotron2-ddc"):
         print(f"❌ Model type '{model_type}' not found.")
         return False
     model_loading = True
     try:
@@ -337,12 +343,25 @@ def load_tts_model(model_type="tacotron2-ddc"):
             print(f"🚀 Loading {model_config['name']}...")
             print(f"   Languages: {', '.join(model_config['languages'])}")
             # Load the selected model
             tts = TTS(model_config["model_name"]).to(DEVICE)
             # Test the model with appropriate text
             test_path = "/tmp/test_output.wav"
-            test_text = "Hello" if "en" in model_config["languages"] else "你好"
             tts.tts_to_file(text=test_text, file_path=test_path)
             if os.path.exists(test_path):
@@ -361,6 +380,10 @@ def load_tts_model(model_type="tacotron2-ddc"):
         except Exception as e:
             print(f"❌ Model failed to load: {e}")
             return False
         finally:
@@ -372,7 +395,7 @@ def load_tts_model(model_type="tacotron2-ddc"):
     finally:
         model_loading = False
-# ENHANCED: Model switching logic
 def ensure_correct_model(voice_style: str, text: str, language: str = "auto"):
     """Ensure the correct model is loaded for the requested voice style and language"""
     global tts, model_loaded, current_model
@@ -380,14 +403,20 @@ def ensure_correct_model(voice_style: str, text: str, language: str = "auto"):
     # Determine target model
     target_model = get_model_for_voice_style(voice_style, language)
     # If no model loaded or wrong model loaded, load the correct one
     if not model_loaded or current_model != target_model:
-        print(f"🔄 Switching to model: {target_model} for voice style: {voice_style}")
-        return load_tts_model(target_model)
     return True
-# API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
     """Generate TTS with multi-language support"""
@@ -415,6 +444,7 @@ async def generate_tts(request: TTSRequest):
         print(f"   Voice Style: {request.voice_style}")
         print(f"   Language: {detected_language}")
         print(f"   Text length: {len(request.text)} characters")
         # Generate unique filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -430,18 +460,42 @@ async def generate_tts(request: TTSRequest):
         # Generate TTS
         try:
-            # ENHANCED: For multilingual model, specify language
-            if current_model == "your_tts" and detected_language in ["en", "zh"]:
-                tts.tts_to_file(
-                    text=cleaned_text,
-                    file_path=output_path,
-                    language=detected_language
-                )
             else:
-                tts.tts_to_file(
-                    text=cleaned_text,
-                    file_path=output_path
-                )
         except Exception as tts_error:
             print(f"❌ TTS generation failed: {tts_error}")
             raise tts_error
@@ -493,6 +547,7 @@ async def generate_tts(request: TTSRequest):
             "message": f"TTS generation failed: {str(e)}"
         }
 @app.post("/api/batch-tts")
 async def batch_generate_tts(request: BatchTTSRequest):
     """Batch TTS with multi-language support"""
@@ -500,6 +555,9 @@ async def batch_generate_tts(request: BatchTTSRequest):
         cleanup_old_files()
         print(f"📥 Batch TTS request for {len(request.texts)} texts")
         results = []
         for i, text in enumerate(request.texts):
@@ -510,6 +568,8 @@ async def batch_generate_tts(request: BatchTTSRequest):
                 else:
                     text_language = request.language
                 single_request = TTSRequest(
                     text=text,
                     project_id=request.project_id,
@@ -521,23 +581,37 @@ async def batch_generate_tts(request: BatchTTSRequest):
                 result = await generate_tts(single_request)
                 results.append({
                     "text_index": i,
                     "status": result.get("status", "error"),
                     "message": result.get("message", ""),
                     "filename": result.get("filename", ""),
                     "oci_path": result.get("oci_path", ""),
-                    "language": result.get("language", "unknown")  # ENHANCED: Include language
                 })
             except Exception as e:
                 results.append({
                     "text_index": i,
                     "status": "error",
                     "message": f"Failed to generate TTS: {str(e)}"
                 })
         return {
             "status": "completed",
             "project_id": request.project_id,
             "results": results,
             "model_used": current_model
         }

     except Exception as e:
         return None, f"Upload error: {str(e)}"
+# FIXED: Improved model loading with better error handling and memory management
 def load_tts_model(model_type="tacotron2-ddc"):
     """Load TTS model with storage optimization"""
     global tts, model_loaded, current_model, model_loading
         print(f"❌ Model type '{model_type}' not found.")
         return False
+    # If we're already using the correct model, no need to reload
+    if model_loaded and current_model == model_type:
+        print(f"✅ Model {model_type} is already loaded")
+        return True
     model_loading = True
     try:
             print(f"🚀 Loading {model_config['name']}...")
             print(f"   Languages: {', '.join(model_config['languages'])}")
+            # Clear current model from memory first if exists
+            if tts is not None:
+                print("🧹 Clearing previous model from memory...")
+                del tts
+                import gc
+                gc.collect()
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
             # Load the selected model
             tts = TTS(model_config["model_name"]).to(DEVICE)
             # Test the model with appropriate text
             test_path = "/tmp/test_output.wav"
+            if "zh" in model_config["languages"]:
+                test_text = "你好"  # Chinese test
+            else:
+                test_text = "Hello"  # English test
             tts.tts_to_file(text=test_text, file_path=test_path)
             if os.path.exists(test_path):
         except Exception as e:
             print(f"❌ Model failed to load: {e}")
+            # Fallback to English model if multilingual fails
+            if model_type == "your_tts":
+                print("🔄 Falling back to English model...")
+                return load_tts_model("tacotron2-ddc")
             return False
         finally:
     finally:
         model_loading = False
+# FIXED: Improved model switching logic with better detection
 def ensure_correct_model(voice_style: str, text: str, language: str = "auto"):
     """Ensure the correct model is loaded for the requested voice style and language"""
     global tts, model_loaded, current_model
     # Determine target model
     target_model = get_model_for_voice_style(voice_style, language)
+    print(f"🔍 Model selection: voice_style={voice_style}, language={language}, target_model={target_model}")
     # If no model loaded or wrong model loaded, load the correct one
     if not model_loaded or current_model != target_model:
+        print(f"🔄 Switching to model: {target_model} for voice style: {voice_style}, language: {language}")
+        success = load_tts_model(target_model)
+        if not success and target_model == "your_tts":
+            print("⚠️ Multilingual model failed, falling back to English model")
+            return load_tts_model("tacotron2-ddc")
+        return success
     return True
+# FIXED: Enhanced TTS generation with proper language handling
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
     """Generate TTS with multi-language support"""
         print(f"   Voice Style: {request.voice_style}")
         print(f"   Language: {detected_language}")
         print(f"   Text length: {len(request.text)} characters")
+        print(f"   Current Model: {current_model}")
         # Generate unique filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         # Generate TTS
         try:
+            # FIXED: Proper language handling for multilingual model
+            if current_model == "your_tts":
+                if detected_language == "zh":
+                    print("🎯 Using YourTTS for Chinese text with zh-cn language code")
+                    tts.tts_to_file(
+                        text=cleaned_text,
+                        file_path=output_path,
+                        language="zh-cn"  # Use zh-cn for Chinese
+                    )
+                else:
+                    print("🎯 Using YourTTS for English text")
+                    tts.tts_to_file(
+                        text=cleaned_text,
+                        file_path=output_path,
+                        language="en"
+                    )
             else:
+                # Tacotron2-DDC for English only
+                if detected_language == "zh":
+                    # If Chinese text but English model, try to switch to multilingual
+                    print("🔄 Chinese text detected with English model, attempting to switch to multilingual...")
+                    if load_tts_model("your_tts"):
+                        # Retry with multilingual model
+                        tts.tts_to_file(
+                            text=cleaned_text,
+                            file_path=output_path,
+                            language="zh-cn"
+                        )
+                    else:
+                        raise Exception("Chinese text cannot be processed. Multilingual model failed to load.")
+                else:
+                    print("🎯 Using Tacotron2-DDC for English text")
+                    tts.tts_to_file(
+                        text=cleaned_text,
+                        file_path=output_path
+                    )
         except Exception as tts_error:
             print(f"❌ TTS generation failed: {tts_error}")
             raise tts_error
             "message": f"TTS generation failed: {str(e)}"
         }
+# FIXED: Enhanced batch processing with better logging and error handling
 @app.post("/api/batch-tts")
 async def batch_generate_tts(request: BatchTTSRequest):
     """Batch TTS with multi-language support"""
         cleanup_old_files()
         print(f"📥 Batch TTS request for {len(request.texts)} texts")
+        print(f"   Project: {request.project_id}")
+        print(f"   Voice Style: {request.voice_style}")
+        print(f"   Language: {request.language}")
         results = []
         for i, text in enumerate(request.texts):
                 else:
                     text_language = request.language
+                print(f"   Processing text {i+1}/{len(request.texts)}: {text_language} - {text[:50]}...")
                 single_request = TTSRequest(
                     text=text,
                     project_id=request.project_id,
                 result = await generate_tts(single_request)
                 results.append({
                     "text_index": i,
+                    "text_preview": text[:30] + "..." if len(text) > 30 else text,
                     "status": result.get("status", "error"),
                     "message": result.get("message", ""),
                     "filename": result.get("filename", ""),
                     "oci_path": result.get("oci_path", ""),
+                    "language": result.get("language", "unknown")
                 })
             except Exception as e:
+                print(f"❌ Failed to process text {i}: {str(e)}")
                 results.append({
                     "text_index": i,
+                    "text_preview": text[:30] + "..." if len(text) > 30 else text,
                     "status": "error",
                     "message": f"Failed to generate TTS: {str(e)}"
                 })
+        # Summary
+        success_count = sum(1 for r in results if r.get("status") == "success")
+        error_count = sum(1 for r in results if r.get("status") == "error")
+        print(f"📊 Batch completed: {success_count} successful, {error_count} failed")
         return {
             "status": "completed",
             "project_id": request.project_id,
+            "summary": {
+                "total": len(results),
+                "successful": success_count,
+                "failed": error_count
+            },
             "results": results,
             "model_used": current_model
         }