Spaces:

yukee1992
/

Tts-api

Sleeping

App Files Files Community

yukee1992 commited on Sep 15, 2025

Commit

54639e2

verified ·

1 Parent(s): 5133677

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -88

app.py CHANGED Viewed

@@ -32,73 +32,21 @@ app.add_middleware(
 # Configuration
 OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "http://localhost:7860")
-DEFAULT_MODEL = "tts_models/multilingual/multi-dataset/xtts_v2"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"✅ Using device: {DEVICE}")
-# Initialize TTS model
 tts = None
 model_loaded = False
 current_model = ""
 voice_cloning_supported = False
-try:
-    # Set environment variable to automatically accept terms
-    os.environ["COQUI_TOS_AGREED"] = "1"
-    print("🔍 Starting TTS model loading process...")
-    # Import TTS
-    from TTS.api import TTS
-    # Automatically respond to the TOS prompt
-    import sys
-    from io import StringIO
-    # Capture the input prompt and automatically respond 'y'
-    old_stdin = sys.stdin
-    sys.stdin = StringIO('y\n')
-    try:
-        print("🚀 Loading XTTS model...")
-        # Clear any potentially corrupted model files
-        model_path = os.path.expanduser("~/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2")
-        if os.path.exists(model_path):
-            print(f"🔄 Clearing potentially corrupted model cache: {model_path}")
-            import shutil
-            shutil.rmtree(model_path, ignore_errors=True)
-        # Try to load XTTS model with explicit download
-        tts = TTS(DEFAULT_MODEL).to(DEVICE)
-        model_loaded = True
-        current_model = DEFAULT_MODEL
-        voice_cloning_supported = True
-        print("✅ XTTS model loaded successfully with voice cloning support")
-    except Exception as e:
-        print(f"❌ XTTS model failed: {e}")
-        # Try fallback model
-        try:
-            print("🔄 Trying fallback model...")
-            tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
-            model_loaded = True
-            current_model = "tts_models/en/ljspeech/tacotron2-DDC"
-            voice_cloning_supported = False
-            print("✅ Fallback TTS model loaded successfully (English only, no voice cloning)")
-        except Exception as fallback_error:
-            print(f"❌ Fallback model also failed: {fallback_error}")
-            tts = None
-    finally:
-        # Restore stdin
-        sys.stdin = old_stdin
-except Exception as e:
-    print(f"❌ Failed to initialize TTS: {e}")
-    tts = None
 # Pydantic models
 class TTSRequest(BaseModel):
@@ -125,14 +73,13 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
         if not OCI_UPLOAD_API_URL:
             return None, "OCI upload API URL not configured"
-        # Use voiceover subfolder
         url = f"{OCI_UPLOAD_API_URL}/api/upload"
         with open(file_path, "rb") as f:
             files = {"file": (filename, f, "audio/wav")}
             data = {
                 "project_id": project_id,
-                "subfolder": "voiceover"  # This creates project_id/voiceover/ structure
             }
             response = requests.post(url, files=files, data=data, timeout=30)
@@ -158,7 +105,7 @@ def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, fil
             if error:
                 if attempt < max_retries - 1:
-                    wait_time = 2 ** attempt  # Exponential backoff
                     print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
                     time.sleep(wait_time)
                     continue
@@ -196,18 +143,15 @@ def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
     try:
         print(f"🎙️ Cloning voice: {voice_name}")
-        # Create voice directory
         voice_dir = f"/tmp/voices/{voice_name}"
         os.makedirs(voice_dir, exist_ok=True)
-        # Copy audio files to voice directory
         for i, audio_file in enumerate(audio_files):
             dest_path = f"{voice_dir}/sample_{i+1}.wav"
             shutil.copy2(audio_file, dest_path)
             print(f"   Copied sample {i+1} to: {dest_path}")
         print(f"✅ Voice cloning setup completed for {voice_name}")
         return True, f"Voice {voice_name} is ready for use"
     except Exception as e:
@@ -217,18 +161,78 @@ def supports_voice_cloning():
     """Check if the current model supports voice cloning"""
     return "xtts" in current_model.lower()
-# API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
-    """Generate TTS for a single text"""
     try:
-        if tts is None:
-            return {
-                "status": "error",
-                "message": "TTS model not available. Please check the logs for details.",
-                "requires_tos_acceptance": True,
-                "tos_url": "https://coqui.ai/cpml.txt"
-            }
         print(f"📥 TTS request for project: {request.project_id}")
         print(f"   Text length: {len(request.text)} characters")
@@ -260,7 +264,6 @@ async def generate_tts(request: TTSRequest):
         # Generate TTS based on model capabilities
         if supports_voice_cloning():
-            # XTTS model with voice cloning support
             tts.tts_to_file(
                 text=request.text,
                 speaker_wav=speaker_wav,
@@ -268,7 +271,6 @@ async def generate_tts(request: TTSRequest):
                 file_path=output_path
             )
         else:
-            # Fallback model (Tacotron2)
             tts.tts_to_file(
                 text=request.text,
                 file_path=output_path
@@ -283,7 +285,6 @@ async def generate_tts(request: TTSRequest):
         if error:
             print(f"❌ OCI upload failed: {error}")
-            # Still return the local file path if upload fails
             return {
                 "status": "partial_success",
                 "message": f"TTS generated but upload failed: {error}",
@@ -316,8 +317,10 @@ async def generate_tts(request: TTSRequest):
 async def batch_generate_tts(request: BatchTTSRequest):
     """Generate TTS for multiple texts with sequential naming"""
     try:
-        if tts is None:
-            raise HTTPException(status_code=500, detail="TTS model not loaded")
         print(f"📥 Batch TTS request for project: {request.project_id}")
         print(f"   Number of texts: {len(request.texts)}")
@@ -349,7 +352,6 @@ async def batch_generate_tts(request: BatchTTSRequest):
             # Generate TTS based on model capabilities
             if supports_voice_cloning():
-                # XTTS model with voice cloning support
                 tts.tts_to_file(
                     text=text,
                     speaker_wav=speaker_wav,
@@ -357,7 +359,6 @@ async def batch_generate_tts(request: BatchTTSRequest):
                     file_path=output_path
                 )
             else:
-                # Fallback model (Tacotron2)
                 tts.tts_to_file(
                     text=text,
                     file_path=output_path
@@ -540,13 +541,35 @@ async def health_check():
     """Health check endpoint"""
     return {
         "status": "healthy",
-        "tts_loaded": tts is not None,
         "model": current_model,
         "voice_cloning_supported": voice_cloning_supported,
         "device": DEVICE,
         "timestamp": datetime.now().isoformat()
     }
 @app.get("/")
 async def root():
     """Root endpoint with API information"""
@@ -558,10 +581,11 @@ async def root():
             "POST /api/upload-voice": "Upload a voice sample for cloning",
             "POST /api/clone-voice": "Clone a voice from multiple samples",
             "GET /api/voices": "List available voices",
-            "GET /api/health": "Health check"
         },
-        "model_loaded": tts is not None,
-        "model_name": current_model if tts else "None",
         "voice_cloning_supported": supports_voice_cloning()
     }
@@ -569,7 +593,6 @@ if __name__ == "__main__":
     import uvicorn
     print("🚀 Starting TTS API with Coqui TTS and Voice Cloning...")
     print("📊 API endpoints available at: http://localhost:7860/")
-    print("📚 Documentation available at: http://localhost:7860/docs")
-    print(f"🔊 Model: {current_model}")
-    print(f"🎙️ Voice cloning: {'Supported' if voice_cloning_supported else 'Not supported'}")
     uvicorn.run(app, host="0.0.0.0", port=7860)

 # Configuration
 OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "http://localhost:7860")
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"✅ Using device: {DEVICE}")
+# Model configuration
+MODEL_REPO_ID = "coqui/XTTS-v2"
+MODEL_CACHE_DIR = "/tmp/tts_models"
+# Global state
 tts = None
 model_loaded = False
 current_model = ""
 voice_cloning_supported = False
+model_loading = False
+model_load_attempts = 0
 # Pydantic models
 class TTSRequest(BaseModel):
         if not OCI_UPLOAD_API_URL:
             return None, "OCI upload API URL not configured"
         url = f"{OCI_UPLOAD_API_URL}/api/upload"
         with open(file_path, "rb") as f:
             files = {"file": (filename, f, "audio/wav")}
             data = {
                 "project_id": project_id,
+                "subfolder": "voiceover"
             }
             response = requests.post(url, files=files, data=data, timeout=30)
             if error:
                 if attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
                     print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
                     time.sleep(wait_time)
                     continue
     try:
         print(f"🎙️ Cloning voice: {voice_name}")
         voice_dir = f"/tmp/voices/{voice_name}"
         os.makedirs(voice_dir, exist_ok=True)
         for i, audio_file in enumerate(audio_files):
             dest_path = f"{voice_dir}/sample_{i+1}.wav"
             shutil.copy2(audio_file, dest_path)
             print(f"   Copied sample {i+1} to: {dest_path}")
         print(f"✅ Voice cloning setup completed for {voice_name}")
         return True, f"Voice {voice_name} is ready for use"
     except Exception as e:
     """Check if the current model supports voice cloning"""
     return "xtts" in current_model.lower()
+def load_tts_model():
+    """Load TTS model with retry logic and proper error handling"""
+    global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts
+    if model_loading:
+        print("⏳ Model is already being loaded...")
+        return False
+    model_loading = True
+    model_load_attempts += 1
+    try:
+        from TTS.api import TTS
+        # Handle TOS acceptance automatically
+        import sys
+        from io import StringIO
+        old_stdin = sys.stdin
+        sys.stdin = StringIO('y\n')
+        try:
+            print("🚀 Loading XTTS model...")
+            # Try to load XTTS model
+            tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(DEVICE)
+            model_loaded = True
+            current_model = "xtts_v2"
+            voice_cloning_supported = True
+            print("✅ XTTS model loaded successfully")
+            return True
+        except Exception as e:
+            print(f"❌ XTTS model loading failed: {e}")
+            # Try fallback model
+            try:
+                print("🔄 Trying fallback model...")
+                tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
+                model_loaded = True
+                current_model = "tacotron2-DDC"
+                voice_cloning_supported = False
+                print("✅ Fallback model loaded successfully")
+                return True
+            except Exception as fallback_error:
+                print(f"❌ Fallback model also failed: {fallback_error}")
+                return False
+        finally:
+            sys.stdin = old_stdin
+    except Exception as e:
+        print(f"❌ Failed to initialize TTS: {e}")
+        return False
+    finally:
+        model_loading = False
+# API endpoints with lazy loading
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
+    """Generate TTS for a single text with lazy model loading"""
     try:
+        # Lazy load model on first request
+        if not model_loaded:
+            if not load_tts_model():
+                return {
+                    "status": "error",
+                    "message": "TTS model failed to load. Please check the logs.",
+                    "requires_tos_acceptance": True,
+                    "tos_url": "https://coqui.ai/cpml.txt"
+                }
         print(f"📥 TTS request for project: {request.project_id}")
         print(f"   Text length: {len(request.text)} characters")
         # Generate TTS based on model capabilities
         if supports_voice_cloning():
             tts.tts_to_file(
                 text=request.text,
                 speaker_wav=speaker_wav,
                 file_path=output_path
             )
         else:
             tts.tts_to_file(
                 text=request.text,
                 file_path=output_path
         if error:
             print(f"❌ OCI upload failed: {error}")
             return {
                 "status": "partial_success",
                 "message": f"TTS generated but upload failed: {error}",
 async def batch_generate_tts(request: BatchTTSRequest):
     """Generate TTS for multiple texts with sequential naming"""
     try:
+        # Lazy load model on first request
+        if not model_loaded:
+            if not load_tts_model():
+                raise HTTPException(status_code=500, detail="TTS model failed to load")
         print(f"📥 Batch TTS request for project: {request.project_id}")
         print(f"   Number of texts: {len(request.texts)}")
             # Generate TTS based on model capabilities
             if supports_voice_cloning():
                 tts.tts_to_file(
                     text=text,
                     speaker_wav=speaker_wav,
                     file_path=output_path
                 )
             else:
                 tts.tts_to_file(
                     text=text,
                     file_path=output_path
     """Health check endpoint"""
     return {
         "status": "healthy",
+        "tts_loaded": model_loaded,
         "model": current_model,
         "voice_cloning_supported": voice_cloning_supported,
         "device": DEVICE,
+        "load_attempts": model_load_attempts,
         "timestamp": datetime.now().isoformat()
     }
+@app.post("/api/reload-model")
+async def reload_model():
+    """Force reload the TTS model"""
+    global tts, model_loaded, current_model, voice_cloning_supported
+    # Clear current model
+    tts = None
+    model_loaded = False
+    current_model = ""
+    voice_cloning_supported = False
+    # Try to reload
+    success = load_tts_model()
+    return {
+        "status": "success" if success else "error",
+        "message": "Model reloaded successfully" if success else "Failed to reload model",
+        "model_loaded": model_loaded,
+        "model": current_model
+    }
 @app.get("/")
 async def root():
     """Root endpoint with API information"""
             "POST /api/upload-voice": "Upload a voice sample for cloning",
             "POST /api/clone-voice": "Clone a voice from multiple samples",
             "GET /api/voices": "List available voices",
+            "GET /api/health": "Health check",
+            "POST /api/reload-model": "Reload TTS model"
         },
+        "model_loaded": model_loaded,
+        "model_name": current_model if model_loaded else "None",
         "voice_cloning_supported": supports_voice_cloning()
     }
     import uvicorn
     print("🚀 Starting TTS API with Coqui TTS and Voice Cloning...")
     print("📊 API endpoints available at: http://localhost:7860/")
+    print("💡 Model will be loaded on first request to save memory")
+    print("🔄 Use /api/reload-model to force reload if needed")
     uvicorn.run(app, host="0.0.0.0", port=7860)