Spaces:

yukee1992
/

Tts-api

Sleeping

App Files Files Community

yukee1992 commited on Sep 14, 2025

Commit

2eaadb8

verified ·

1 Parent(s): 697cc6f

Update app.py

Browse files

Files changed (1) hide show

app.py +102 -44

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 import tempfile
 import uuid
 import time
 from datetime import datetime
 from typing import List, Optional
 from pathlib import Path
@@ -36,9 +37,10 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"✅ Using device: {DEVICE}")
-# Initialize TTS model with automatic TOS acceptance
 tts = None
 model_loaded = False
 try:
     # Set environment variable to automatically accept terms
@@ -56,18 +58,37 @@ try:
     sys.stdin = StringIO('y\n')
     try:
-        print("🚀 Loading TTS model...")
         tts = TTS(DEFAULT_MODEL).to(DEVICE)
         model_loaded = True
-        print("✅ TTS model loaded successfully")
     except Exception as e:
-        print(f"❌ Primary model failed: {e}")
         # Try fallback model
         try:
             print("🔄 Trying fallback model...")
             tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
             model_loaded = True
-            print("✅ Fallback TTS model loaded successfully")
         except Exception as fallback_error:
             print(f"❌ Fallback model also failed: {fallback_error}")
             tts = None
@@ -182,11 +203,9 @@ def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
         # Copy audio files to voice directory
         for i, audio_file in enumerate(audio_files):
             dest_path = f"{voice_dir}/sample_{i+1}.wav"
-            # For now, just create a placeholder since we can't copy files in this context
-            # In a real implementation, you'd copy the files here
-            print(f"   Would copy sample {i+1} to: {dest_path}")
-        # For XTTS model, we can use the samples directly
         print(f"✅ Voice cloning setup completed for {voice_name}")
         return True, f"Voice {voice_name} is ready for use"
@@ -194,6 +213,10 @@ def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
     except Exception as e:
         return False, f"Voice cloning failed: {str(e)}"
 # API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
@@ -212,6 +235,14 @@ async def generate_tts(request: TTSRequest):
         print(f"   Voice: {request.voice_name}")
         print(f"   Language: {request.language}")
         # Generate unique filename with sequential naming
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
@@ -227,13 +258,21 @@ async def generate_tts(request: TTSRequest):
                     "message": f"Voice '{request.voice_name}' not found"
                 }
-        # Generate TTS
-        tts.tts_to_file(
-            text=request.text,
-            speaker_wav=speaker_wav,
-            language=request.language,
-            file_path=output_path
-        )
         print(f"✅ TTS generated: {output_path}")
@@ -264,7 +303,9 @@ async def generate_tts(request: TTSRequest):
             "status": "success",
             "message": "TTS generated and uploaded successfully",
             "filename": filename,
-            "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
         }
     except Exception as e:
@@ -283,6 +324,13 @@ async def batch_generate_tts(request: BatchTTSRequest):
         print(f"   Voice: {request.voice_name}")
         print(f"   Language: {request.language}")
         # Get voice path if custom voice is requested
         speaker_wav = None
         if request.voice_name != "default":
@@ -299,13 +347,21 @@ async def batch_generate_tts(request: BatchTTSRequest):
             filename = f"voiceover_{i+1:02d}.wav"
             output_path = f"/tmp/output/{filename}"
-            # Generate TTS
-            tts.tts_to_file(
-                text=text,
-                speaker_wav=speaker_wav,
-                language=request.language,
-                file_path=output_path
-            )
             # Upload to OCI
             upload_result, error = upload_to_oci_with_retry(
@@ -340,7 +396,9 @@ async def batch_generate_tts(request: BatchTTSRequest):
         return {
             "status": "completed",
             "project_id": request.project_id,
-            "results": results
         }
     except Exception as e:
@@ -357,6 +415,13 @@ async def upload_voice_sample(
     try:
         print(f"📥 Voice upload request: {voice_name} for project {project_id}")
         # Validate file type
         if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
             raise HTTPException(status_code=400, detail="Only audio files are allowed")
@@ -391,6 +456,13 @@ async def api_clone_voice(
     try:
         print(f"📥 Voice cloning request: {voice_name} for project {project_id}")
         # Save uploaded files temporarily
         temp_files = []
         for i, file in enumerate(files):
@@ -455,7 +527,8 @@ async def list_voices():
         return {
             "status": "success",
-            "voices": voices
         }
     except Exception as e:
@@ -468,6 +541,8 @@ async def health_check():
     return {
         "status": "healthy",
         "tts_loaded": tts is not None,
         "device": DEVICE,
         "timestamp": datetime.now().isoformat()
     }
@@ -477,21 +552,4 @@ async def root():
     """Root endpoint with API information"""
     return {
         "message": "TTS API with Coqui TTS and Voice Cloning",
-        "endpoints": {
-            "POST /api/tts": "Generate TTS for a single text",
-            "POST /api/batch-tts": "Generate TTS for multiple texts",
-            "POST /api/upload-voice": "Upload a voice sample for cloning",
-            "POST /api/clone-voice": "Clone a voice from multiple samples",
-            "GET /api/voices": "List available voices",
-            "GET /api/health": "Health check"
-        },
-        "model_loaded": tts is not None,
-        "model_name": DEFAULT_MODEL if tts else "None"
-    }
-if __name__ == "__main__":
-    import uvicorn
-    print("🚀 Starting TTS API with Coqui TTS and Voice Cloning...")
-    print("📊 API endpoints available at: http://localhost:7860/")
-    print("📚 Documentation available at: http://localhost:7860/docs")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import tempfile
 import uuid
 import time
+import shutil
 from datetime import datetime
 from typing import List, Optional
 from pathlib import Path
 print(f"✅ Using device: {DEVICE}")
+# Initialize TTS model with automatic TOS acceptance and safe globals
 tts = None
 model_loaded = False
+current_model = ""
 try:
     # Set environment variable to automatically accept terms
     sys.stdin = StringIO('y\n')
     try:
+        print("🚀 Loading XTTS model with safe globals...")
+        # Add safe globals for PyTorch 2.6 compatibility
+        try:
+            import torch.serialization
+            # Import the required classes for safe globals
+            from TTS.tts.configs.xtts_config import XttsConfig
+            from TTS.tts.models.xtts import Xtts
+            from TTS.utils.manage import ModelManager
+            # Add the required classes to safe globals
+            torch.serialization.add_safe_globals([XttsConfig, Xtts, ModelManager])
+            print("✅ Added safe globals for XTTS model")
+        except Exception as safe_globals_error:
+            print(f"⚠️ Could not add safe globals: {safe_globals_error}")
+        # Load the XTTS model
         tts = TTS(DEFAULT_MODEL).to(DEVICE)
         model_loaded = True
+        current_model = DEFAULT_MODEL
+        print("✅ XTTS model loaded successfully with voice cloning support")
     except Exception as e:
+        print(f"❌ XTTS model failed: {e}")
         # Try fallback model
         try:
             print("🔄 Trying fallback model...")
             tts = TTS("tts_models/en/ljspeech/tacotron2-DDC").to(DEVICE)
             model_loaded = True
+            current_model = "tts_models/en/ljspeech/tacotron2-DDC"
+            print("✅ Fallback TTS model loaded successfully (English only, no voice cloning)")
         except Exception as fallback_error:
             print(f"❌ Fallback model also failed: {fallback_error}")
             tts = None
         # Copy audio files to voice directory
         for i, audio_file in enumerate(audio_files):
             dest_path = f"{voice_dir}/sample_{i+1}.wav"
+            shutil.copy2(audio_file, dest_path)
+            print(f"   Copied sample {i+1} to: {dest_path}")
         print(f"✅ Voice cloning setup completed for {voice_name}")
         return True, f"Voice {voice_name} is ready for use"
     except Exception as e:
         return False, f"Voice cloning failed: {str(e)}"
+def supports_voice_cloning():
+    """Check if the current model supports voice cloning"""
+    return "xtts" in current_model.lower()
 # API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
         print(f"   Voice: {request.voice_name}")
         print(f"   Language: {request.language}")
+        # Check if voice cloning is requested but not supported
+        if request.voice_name != "default" and not supports_voice_cloning():
+            return {
+                "status": "error",
+                "message": "Voice cloning is not supported with the current model. Only the default voice is available.",
+                "model": current_model
+            }
         # Generate unique filename with sequential naming
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
                     "message": f"Voice '{request.voice_name}' not found"
                 }
+        # Generate TTS based on model capabilities
+        if supports_voice_cloning():
+            # XTTS model with voice cloning support
+            tts.tts_to_file(
+                text=request.text,
+                speaker_wav=speaker_wav,
+                language=request.language,
+                file_path=output_path
+            )
+        else:
+            # Fallback model (Tacotron2)
+            tts.tts_to_file(
+                text=request.text,
+                file_path=output_path
+            )
         print(f"✅ TTS generated: {output_path}")
             "status": "success",
             "message": "TTS generated and uploaded successfully",
             "filename": filename,
+            "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
+            "model_used": current_model,
+            "voice_cloning": supports_voice_cloning() and request.voice_name != "default"
         }
     except Exception as e:
         print(f"   Voice: {request.voice_name}")
         print(f"   Language: {request.language}")
+        # Check if voice cloning is requested but not supported
+        if request.voice_name != "default" and not supports_voice_cloning():
+            raise HTTPException(
+                status_code=400,
+                detail="Voice cloning is not supported with the current model. Only the default voice is available."
+            )
         # Get voice path if custom voice is requested
         speaker_wav = None
         if request.voice_name != "default":
             filename = f"voiceover_{i+1:02d}.wav"
             output_path = f"/tmp/output/{filename}"
+            # Generate TTS based on model capabilities
+            if supports_voice_cloning():
+                # XTTS model with voice cloning support
+                tts.tts_to_file(
+                    text=text,
+                    speaker_wav=speaker_wav,
+                    language=request.language,
+                    file_path=output_path
+                )
+            else:
+                # Fallback model (Tacotron2)
+                tts.tts_to_file(
+                    text=text,
+                    file_path=output_path
+                )
             # Upload to OCI
             upload_result, error = upload_to_oci_with_retry(
         return {
             "status": "completed",
             "project_id": request.project_id,
+            "results": results,
+            "model_used": current_model,
+            "voice_cloning": supports_voice_cloning() and request.voice_name != "default"
         }
     except Exception as e:
     try:
         print(f"📥 Voice upload request: {voice_name} for project {project_id}")
+        # Check if voice cloning is supported
+        if not supports_voice_cloning():
+            raise HTTPException(
+                status_code=400,
+                detail="Voice cloning is not supported with the current model. Please use the XTTS model for voice cloning."
+            )
         # Validate file type
         if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
             raise HTTPException(status_code=400, detail="Only audio files are allowed")
     try:
         print(f"📥 Voice cloning request: {voice_name} for project {project_id}")
+        # Check if voice cloning is supported
+        if not supports_voice_cloning():
+            raise HTTPException(
+                status_code=400,
+                detail="Voice cloning is not supported with the current model. Please use the XTTS model for voice cloning."
+            )
         # Save uploaded files temporarily
         temp_files = []
         for i, file in enumerate(files):
         return {
             "status": "success",
+            "voices": voices,
+            "voice_cloning_supported": supports_voice_cloning()
         }
     except Exception as e:
     return {
         "status": "healthy",
         "tts_loaded": tts is not None,
+        "model": current_model,
+        "voice_cloning_supported": supports_voice_cloning(),
         "device": DEVICE,
         "timestamp": datetime.now().isoformat()
     }
     """Root endpoint with API information"""
     return {
         "message": "TTS API with Coqui TTS and Voice Cloning",
+        "en