Spaces:

yukee1992
/

Tts-api

Sleeping

App Files Files Community

yukee1992 commited on Oct 4, 2025

Commit

0425fe6

verified ·

1 Parent(s): 7c82066

Update app.py

Browse files

Files changed (1) hide show

app.py +574 -211

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import uuid
 import time
 import shutil
 from datetime import datetime
-from typing import List, Optional
 from pathlib import Path
 import requests
@@ -12,35 +12,14 @@ from fastapi import FastAPI, HTTPException, Form, UploadFile, File
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
-# Global state
-tts = None
-model_loaded = False
-current_model = ""
-model_loading = False
-current_voice_style = "default_female"
-voice_cloning_supported = False
-app_startup_time = datetime.now()
 # Configure environment
 os.makedirs("/tmp/voices", exist_ok=True)
 os.makedirs("/tmp/output", exist_ok=True)
-# Configuration - Force CPU for Hugging Face Spaces compatibility
-DEVICE = "cpu"
-OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "").strip()
-if OCI_UPLOAD_API_URL:
-    OCI_UPLOAD_API_URL = OCI_UPLOAD_API_URL.rstrip('/')
-print(f"🔧 Using device: {DEVICE} (forced CPU for Hugging Face Spaces compatibility)")
 # Initialize FastAPI app
-app = FastAPI(
-    title="TTS API",
-    description="API for text-to-speech with Coqui TTS",
-    docs_url="/",
-    redoc_url=None
-)
 # Add CORS middleware
 app.add_middleware(
@@ -51,21 +30,58 @@ app.add_middleware(
     allow_headers=["*"],
 )
-print("=" * 50)
-print("🚀 TTS API Starting Up...")
-print(f"✅ Device: {DEVICE}")
-print(f"🔧 OCI Upload: {OCI_UPLOAD_API_URL or 'Local only'}")
-print("📝 Models will load on first request (lazy loading)")
-print("⏰ Startup time:", app_startup_time.isoformat())
-print("=" * 50)
-# Add startup event
-@app.on_event("startup")
-async def startup_event():
-    """Run on application startup"""
-    print("✅ TTS API Startup Completed Successfully!")
-    print("🌐 Server is running on http://0.0.0.0:8000")
-    print("📚 API Documentation available at: http://0.0.0.0:8000/docs")
 # Pydantic models
 class TTSRequest(BaseModel):
@@ -73,51 +89,66 @@ class TTSRequest(BaseModel):
     project_id: str
     voice_name: Optional[str] = "default"
     language: Optional[str] = "en"
-    voice_style: Optional[str] = "default_female"
 class VoiceCloneRequest(BaseModel):
     project_id: str
     voice_name: str
     description: Optional[str] = ""
-class ChangeVoiceRequest(BaseModel):
-    voice_style: str
-# Helper functions
 def clean_text(text):
-    """Clean text for TTS generation"""
     import re
     if not text or not isinstance(text, str):
-        return "Hello"
-    text = text.encode('ascii', 'ignore').decode('ascii')
-    text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:]', '', text)
     text = re.sub(r'\s+', ' ', text)
     if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
         text = text + '.'
     text = text.strip()
     if not text:
         text = "Hello world"
     return text
 def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
-    """Upload file to OCI"""
     try:
         if not OCI_UPLOAD_API_URL:
-            print("⚠️ OCI upload skipped - OCI_UPLOAD_API_URL not configured")
-            return {"status": "skipped", "message": "OCI upload disabled"}, None
         url = f"{OCI_UPLOAD_API_URL}/api/upload"
-        print(f"🔗 Attempting upload to: {url}")
         with open(file_path, "rb") as f:
             files = {"file": (filename, f, "audio/wav")}
-            data = {"project_id": project_id, "subfolder": "voiceover"}
             response = requests.post(url, files=files, data=data, timeout=30)
@@ -133,248 +164,484 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
     except Exception as e:
         return None, f"Upload error: {str(e)}"
 def get_voice_path(voice_name: str):
-    """Get path to voice file"""
     if voice_name == "default":
         return None
     voice_path = Path(f"/tmp/voices/{voice_name}")
     if voice_path.is_dir():
         samples = list(voice_path.glob("sample_*.wav"))
         return str(samples[0]) if samples else None
-    return None
 def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
-    """Clone a voice from audio samples"""
     try:
         print(f"🎙️ Cloning voice: {voice_name}")
         voice_dir = f"/tmp/voices/{voice_name}"
         os.makedirs(voice_dir, exist_ok=True)
         for i, audio_file in enumerate(audio_files):
-            dest_path = f"{voice_dir}/sample_{i+1}.wav"
             shutil.copy2(audio_file, dest_path)
             print(f"   Copied sample {i+1} to: {dest_path}")
-        print(f"✅ Voice cloning setup completed for {voice_name}")
-        return True, f"Voice {voice_name} is ready for use"
     except Exception as e:
         return False, f"Voice cloning failed: {str(e)}"
 def supports_voice_cloning():
     """Check if the current model supports voice cloning"""
-    return "xtts" in current_model.lower()
-def load_tts_model(voice_style="default_female"):
-    """Load TTS model with lazy loading"""
-    global tts, model_loaded, current_model, model_loading, current_voice_style, voice_cloning_supported
     if model_loading:
         print("⏳ Model is already being loaded...")
         return False
-    if model_loaded and current_voice_style == voice_style:
-        print("✅ Model already loaded with requested voice style")
-        return True
     model_loading = True
     try:
         from TTS.api import TTS
-        model_options = {
-        "default_female": {
-        "name": "tts_models/en/ljspeech/tacotron2-DDC",
-        "description": "Tacotron2 - Default female voice",
-    },
-    "clear_male": {
-        "name": "tts_models/en/ljspeech/glow-tts",
-        "description": "Glow-TTS - Clear male voice",
-    },
-    "voice_clone": {
-        "name": "tts_models/multilingual/multi-dataset/your_tts",
-        "description": "YourTTS - Voice cloning supported",
-    }
-        }
-        selected_model = model_options.get(voice_style, model_options["default_female"])
-        current_voice_style = voice_style
-        print(f"🚀 Loading {selected_model['description']}...")
-        # Initialize TTS with progress updates
-        print(f"📥 Downloading model: {selected_model['name']}")
-        tts = TTS(selected_model["name"]).to(DEVICE)
-        # Quick test with simple text
-        print("🧪 Testing TTS with sample text...")
-        test_path = "/tmp/test.wav"
-        tts.tts_to_file(text="Hello", file_path=test_path)
-        # Clean up test file
-        if os.path.exists(test_path):
-            os.remove(test_path)
-        model_loaded = True
-        current_model = selected_model["name"]
-        voice_cloning_supported = supports_voice_cloning()
-        print(f"✅ Model loaded successfully: {current_model}")
-        print(f"🎙️ Voice cloning supported: {voice_cloning_supported}")
-        return True
     except Exception as e:
         print(f"❌ Failed to initialize TTS: {e}")
-        import traceback
-        traceback.print_exc()
-        model_loading = False
         return False
     finally:
         model_loading = False
-# Health check endpoints
-@app.get("/")
-async def root():
-    """Root endpoint with detailed health info"""
-    return {
-        "status": "healthy",
-        "service": "TTS API",
-        "message": "API is running successfully",
-        "model_loaded": model_loaded,
-        "device": DEVICE,
-        "timestamp": datetime.now().isoformat()
-    }
-@app.get("/health")
-async def health_check():
-    """Health check endpoint"""
-    return {
-        "status": "healthy",
-        "timestamp": datetime.now().isoformat(),
-        "model_loaded": model_loaded,
-        "service": "TTS API"
-    }
-@app.get("/api/health")
-async def api_health_check():
-    """API health check"""
-    return {
-        "status": "healthy",
-        "model_loaded": model_loaded,
-        "current_model": current_model if model_loaded else "none",
-        "device": DEVICE
-    }
-# Hugging Face specific health checks
-@app.get("/health-check")
-async def huggingface_health_check():
-    """Specific health check for Hugging Face Spaces"""
-    return {
-        "status": "healthy",
-        "message": "TTS API is running",
-        "timestamp": datetime.now().isoformat()
-    }
-@app.get("/ready")
-async def ready_check():
-    """Simple readiness check"""
-    return {"status": "ready"}
-# API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
-    """Generate TTS for a single text"""
     try:
-        if not model_loaded or current_voice_style != request.voice_style:
-            print("🔄 Lazy loading TTS model...")
-            if not load_tts_model(request.voice_style):
                 return {
                     "status": "error",
-                    "message": "TTS model failed to load. Please try again."
                 }
         print(f"📥 TTS request for project: {request.project_id}")
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
         output_path = f"/tmp/output/{filename}"
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        cleaned_text = clean_text(request.text)
         # Get voice path if custom voice is requested
         speaker_wav = None
-        if request.voice_name != "default":
             speaker_wav = get_voice_path(request.voice_name)
             if not speaker_wav:
                 return {
                     "status": "error",
-                    "message": f"Voice '{request.voice_name}' not found."
                 }
-        if speaker_wav and voice_cloning_supported:
-            tts.tts_to_file(text=cleaned_text, file_path=output_path, speaker_wav=speaker_wav)
-        else:
-            tts.tts_to_file(text=cleaned_text, file_path=output_path)
         if not os.path.exists(output_path):
-            raise Exception("TTS failed to create output file")
         file_size = os.path.getsize(output_path)
-        upload_result, error = upload_to_oci(output_path, filename, request.project_id)
         if error:
             return {
-                "status": "success_local",
-                "message": f"TTS generated locally (upload failed: {error})",
                 "local_file": output_path,
                 "filename": filename,
                 "file_size": file_size
             }
         try:
             os.remove(output_path)
-        except:
-            pass
         return {
             "status": "success",
             "message": "TTS generated and uploaded successfully",
             "filename": filename,
-            "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}")
         }
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
 @app.post("/api/clone-voice")
-async def clone_voice_endpoint(
     project_id: str = Form(...),
     voice_name: str = Form(...),
     description: str = Form(""),
-    files: List[UploadFile] = File(...)
 ):
-    """Clone a voice from uploaded audio samples"""
     try:
-        if not files:
-            raise HTTPException(status_code=400, detail="No audio files provided")
         temp_files = []
-        for file in files:
-            if not file.filename.lower().endswith(('.wav', '.mp3', '.flac')):
-                raise HTTPException(status_code=400, detail="Only WAV, MP3, and FLAC files are supported")
             temp_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
             with open(temp_path, "wb") as f:
-                shutil.copyfileobj(file.file, f)
             temp_files.append(temp_path)
         success, message = clone_voice(voice_name, temp_files, description)
         for temp_file in temp_files:
             try:
                 os.remove(temp_file)
@@ -386,55 +653,151 @@ async def clone_voice_endpoint(
                 "status": "success",
                 "message": message,
                 "voice_name": voice_name,
-                "samples_used": len(temp_files)
             }
         else:
             raise HTTPException(status_code=500, detail=message)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
 @app.get("/api/voices")
 async def list_voices():
-    """List all available cloned voices"""
     try:
         voices_dir = Path("/tmp/voices")
-        if not voices_dir.exists():
-            return {"voices": []}
         voices = []
-        for voice_dir in voices_dir.iterdir():
-            if voice_dir.is_dir():
-                samples = list(voice_dir.glob("sample_*.wav"))
                 voices.append({
-                    "name": voice_dir.name,
-                    "samples_count": len(samples)
                 })
-        return {"voices": voices}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
-@app.get("/api/voice-styles")
-async def get_voice_styles():
-    """Get available voice styles"""
-    styles = {
-        "default_female": "Default female voice (Tacotron2)",
-        "clear_male": "Clear male voice (Tacotron2)",
-        "voice_clone": "XTTS v2 - Voice cloning supported"
     }
-    return {"voice_styles": styles}
-@app.get("/api/status")
-async def get_status():
-    """Get detailed application status"""
     return {
-        "status": "running",
         "model_loaded": model_loaded,
-        "current_model": current_model if model_loaded else "none",
-        "device": DEVICE
     }
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000, access_log=False)

 import time
 import shutil
 from datetime import datetime
+from typing import List, Optional, Dict
 from pathlib import Path
 import requests
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 import torch
+import numpy as np
 # Configure environment
 os.makedirs("/tmp/voices", exist_ok=True)
 os.makedirs("/tmp/output", exist_ok=True)
 # Initialize FastAPI app
+app = FastAPI(title="Enhanced TTS API", description="API for text-to-speech with multiple voice styles and voice cloning")
 # Add CORS middleware
 app.add_middleware(
     allow_headers=["*"],
 )
+# Configuration
+OCI_UPLOAD_API_URL = os.getenv("OCI_UPLOAD_API_URL", "https://yukee1992-oci-video-storage.hf.space")
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"✅ Using device: {DEVICE}")
+# Available models with different voice styles
+AVAILABLE_MODELS = {
+    "xtts-v2": {
+        "name": "XTTS-v2",
+        "model_name": "tts_models/multilingual/multi-dataset/xtts_v2",
+        "description": "Multilingual model with voice cloning support",
+        "languages": ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"],
+        "voice_cloning": True,
+        "default_voice": "female_01"
+    },
+    "tacotron2-ddc": {
+        "name": "Tacotron2-DDC",
+        "model_name": "tts_models/en/ljspeech/tacotron2-DDC",
+        "description": "High-quality English TTS (fast and reliable)",
+        "languages": ["en"],
+        "voice_cloning": False,
+        "default_voice": "default"
+    },
+    "glow-tts": {
+        "name": "Glow-TTS",
+        "model_name": "tts_models/en/ljspeech/glow-tts",
+        "description": "Fast and high-quality English TTS",
+        "languages": ["en"],
+        "voice_cloning": False,
+        "default_voice": "default"
+    }
+}
+# Built-in voice styles for XTTS-v2
+BUILTIN_VOICES = {
+    "female_01": {"name": "Female Voice 1", "gender": "female", "language": "multilingual"},
+    "female_02": {"name": "Female Voice 2", "gender": "female", "language": "multilingual"},
+    "female_03": {"name": "Female Voice 3", "gender": "female", "language": "multilingual"},
+    "male_01": {"name": "Male Voice 1", "gender": "male", "language": "multilingual"},
+    "male_02": {"name": "Male Voice 2", "gender": "male", "language": "multilingual"},
+    "default": {"name": "Default Voice", "gender": "neutral", "language": "multilingual"}
+}
+# Global state
+tts = None
+model_loaded = False
+current_model = ""
+voice_cloning_supported = False
+model_loading = False
+model_load_attempts = 0
+active_model_config = None
 # Pydantic models
 class TTSRequest(BaseModel):
     project_id: str
     voice_name: Optional[str] = "default"
     language: Optional[str] = "en"
+    model_type: Optional[str] = "xtts-v2"  # New: allow model selection
+class BatchTTSRequest(BaseModel):
+    texts: List[str]
+    project_id: str
+    voice_name: Optional[str] = "default"
+    language: Optional[str] = "en"
+    model_type: Optional[str] = "xtts-v2"
 class VoiceCloneRequest(BaseModel):
     project_id: str
     voice_name: str
     description: Optional[str] = ""
+    model_type: Optional[str] = "xtts-v2"
+class VoiceStyleRequest(BaseModel):
+    voice_name: str
+    style: str  # e.g., "happy", "sad", "excited", "calm"
+    intensity: Optional[float] = 1.0
+# Enhanced helper functions
 def clean_text(text):
+    """Clean text for TTS generation with better handling"""
     import re
     if not text or not isinstance(text, str):
+        return "Hello"  # Default fallback text
+    # Remove any problematic characters but keep basic punctuation and multilingual characters
+    text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:\;\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]', '', text)
+    # Replace multiple spaces with single space
     text = re.sub(r'\s+', ' ', text)
+    # Ensure text ends with punctuation if it's a sentence
     if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
         text = text + '.'
     text = text.strip()
+    # If text is empty after cleaning, use default
     if not text:
         text = "Hello world"
     return text
 def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
+    """Upload file to OCI using your existing API with subfolder support"""
     try:
         if not OCI_UPLOAD_API_URL:
+            return None, "OCI upload API URL not configured"
         url = f"{OCI_UPLOAD_API_URL}/api/upload"
         with open(file_path, "rb") as f:
             files = {"file": (filename, f, "audio/wav")}
+            data = {
+                "project_id": project_id,
+                "subfolder": "voiceover"
+            }
             response = requests.post(url, files=files, data=data, timeout=30)
     except Exception as e:
         return None, f"Upload error: {str(e)}"
+def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=3):
+    """Upload file to OCI with retry logic"""
+    for attempt in range(max_retries):
+        try:
+            print(f"🔄 Upload attempt {attempt + 1} of {max_retries} for {filename}")
+            result, error = upload_to_oci(file_path, filename, project_id, file_type)
+            if error:
+                if attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
+                    time.sleep(wait_time)
+                    continue
+                else:
+                    return None, error
+            else:
+                return result, None
+        except Exception as e:
+            if attempt < max_retries - 1:
+                wait_time = 2 ** attempt
+                print(f"⏳ Upload exception, retrying in {wait_time}s: {str(e)}")
+                time.sleep(wait_time)
+                continue
+            else:
+                return None, f"Upload failed after {max_retries} attempts: {str(e)}"
+    return None, "Upload failed: unexpected error"
 def get_voice_path(voice_name: str):
+    """Get path to voice file with enhanced voice management"""
     if voice_name == "default":
         return None
+    # Check if it's a built-in voice
+    if voice_name in BUILTIN_VOICES:
+        return None  # Built-in voices don't need speaker_wav
     voice_path = Path(f"/tmp/voices/{voice_name}")
     if voice_path.is_dir():
         samples = list(voice_path.glob("sample_*.wav"))
         return str(samples[0]) if samples else None
+    else:
+        voice_file = Path(f"/tmp/voices/{voice_name}.wav")
+        return str(voice_file) if voice_file.exists() else None
 def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
+    """Enhanced voice cloning with better sample management"""
     try:
         print(f"🎙️ Cloning voice: {voice_name}")
         voice_dir = f"/tmp/voices/{voice_name}"
         os.makedirs(voice_dir, exist_ok=True)
+        # Save metadata about the cloned voice
+        metadata = {
+            "name": voice_name,
+            "description": description,
+            "samples_count": len(audio_files),
+            "created_at": datetime.now().isoformat(),
+            "samples": []
+        }
         for i, audio_file in enumerate(audio_files):
+            dest_path = f"{voice_dir}/sample_{i+1:02d}.wav"
             shutil.copy2(audio_file, dest_path)
+            metadata["samples"].append({
+                "sample_id": i+1,
+                "filename": f"sample_{i+1:02d}.wav",
+                "file_size": os.path.getsize(dest_path)
+            })
             print(f"   Copied sample {i+1} to: {dest_path}")
+        # Save metadata
+        with open(f"{voice_dir}/metadata.json", "w") as f:
+            import json
+            json.dump(metadata, f, indent=2)
+        print(f"✅ Voice cloning completed for {voice_name} with {len(audio_files)} samples")
+        return True, f"Voice '{voice_name}' is ready for use with {len(audio_files)} samples"
     except Exception as e:
         return False, f"Voice cloning failed: {str(e)}"
 def supports_voice_cloning():
     """Check if the current model supports voice cloning"""
+    return voice_cloning_supported
+def save_wav(audio, file_path, sample_rate=22050):
+    """Save audio to WAV file manually"""
+    try:
+        # Try soundfile first
+        try:
+            import soundfile as sf
+            sf.write(file_path, audio, sample_rate)
+            return True
+        except ImportError:
+            print("⚠️ soundfile not available, using fallback method")
+        # Fallback: use wave library
+        import wave
+        import numpy as np
+        # Ensure audio is numpy array
+        if isinstance(audio, list):
+            audio = np.array(audio)
+        # Convert to 16-bit PCM
+        audio_int16 = (audio * 32767).astype(np.int16)
+        with wave.open(file_path, 'wb') as wav_file:
+            wav_file.setnchannels(1)  # Mono
+            wav_file.setsampwidth(2)  # 16-bit
+            wav_file.setframerate(sample_rate)  # Sample rate
+            wav_file.writeframes(audio_int16.tobytes())
+        return True
+    except Exception as e:
+        print(f"❌ Failed to save WAV: {e}")
+        return False
+def load_tts_model(model_type="xtts-v2"):
+    """Enhanced model loading with multiple model support"""
+    global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, active_model_config
     if model_loading:
         print("⏳ Model is already being loaded...")
         return False
+    if model_type not in AVAILABLE_MODELS:
+        print(f"❌ Model type '{model_type}' not found. Available: {list(AVAILABLE_MODELS.keys())}")
+        return False
     model_loading = True
+    model_load_attempts += 1
     try:
         from TTS.api import TTS
+        # Handle TOS acceptance automatically
+        import sys
+        from io import StringIO
+        old_stdin = sys.stdin
+        sys.stdin = StringIO('y\n')
+        try:
+            model_config = AVAILABLE_MODELS[model_type]
+            print(f"🚀 Loading {model_config['name']}...")
+            # Load the selected model
+            tts = TTS(model_config["model_name"]).to(DEVICE)
+            # Test the model
+            test_path = "/tmp/test_output.wav"
+            if model_config["voice_cloning"]:
+                # Test with built-in voice for XTTS
+                tts.tts_to_file(
+                    text="This is a test of the voice system.",
+                    file_path=test_path,
+                    speaker_wav=None,  # Use built-in voice
+                    language="en"
+                )
+            else:
+                # Test without voice cloning for other models
+                tts.tts_to_file(text="This is a test of the voice system.", file_path=test_path)
+            if os.path.exists(test_path):
+                os.remove(test_path)
+                print(f"✅ {model_config['name']} model tested and working!")
+            else:
+                raise Exception("Test failed - no file created")
+            model_loaded = True
+            current_model = model_config["model_name"]
+            voice_cloning_supported = model_config["voice_cloning"]
+            active_model_config = model_config
+            print(f"✅ {model_config['name']} loaded successfully!")
+            print(f"   Voice cloning: {'✅ Supported' if voice_cloning_supported else '❌ Not supported'}")
+            print(f"   Languages: {', '.join(model_config['languages'])}")
+            return True
+        except Exception as e:
+            print(f"❌ {model_config['name']} model failed: {e}")
+            # Fallback to Tacotron2 if XTTS fails
+            if model_type == "xtts-v2":
+                print("🔄 Falling back to Tacotron2...")
+                return load_tts_model("tacotron2-ddc")
+            return False
+        finally:
+            sys.stdin = old_stdin
     except Exception as e:
         print(f"❌ Failed to initialize TTS: {e}")
         return False
     finally:
         model_loading = False
+def validate_language(language: str, model_type: str) -> bool:
+    """Validate if language is supported by the current model"""
+    if model_type not in AVAILABLE_MODELS:
+        return False
+    return language in AVAILABLE_MODELS[model_type]["languages"]
+# Enhanced API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
+    """Enhanced TTS generation with model selection and better voice handling"""
     try:
+        # Lazy load model on first request or if model changed
+        if not model_loaded or active_model_config is None or request.model_type not in list(AVAILABLE_MODELS.keys())[0]:
+            if not load_tts_model(request.model_type):
                 return {
                     "status": "error",
+                    "message": f"TTS model '{request.model_type}' failed to load. Please check the logs.",
+                    "requires_tos_acceptance": True,
+                    "tos_url": "https://coqui.ai/cpml.txt"
                 }
         print(f"📥 TTS request for project: {request.project_id}")
+        print(f"   Model: {request.model_type}")
+        print(f"   Text length: {len(request.text)} characters")
+        print(f"   Voice: {request.voice_name}")
+        print(f"   Language: {request.language}")
+        # Validate language
+        if not validate_language(request.language, request.model_type):
+            return {
+                "status": "error",
+                "message": f"Language '{request.language}' is not supported by {request.model_type}. Supported languages: {', '.join(active_model_config['languages'])}",
+                "supported_languages": active_model_config['languages']
+            }
+        # Check if voice cloning is requested but not supported
+        if request.voice_name != "default" and not supports_voice_cloning():
+            return {
+                "status": "error",
+                "message": "Voice cloning is not supported with the current model. Please use 'xtts-v2' model for voice cloning.",
+                "model": current_model
+            }
+        # Generate unique filename with sequential naming
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
         output_path = f"/tmp/output/{filename}"
+        # Ensure output directory exists
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
         # Get voice path if custom voice is requested
         speaker_wav = None
+        if request.voice_name != "default" and request.voice_name not in BUILTIN_VOICES:
             speaker_wav = get_voice_path(request.voice_name)
             if not speaker_wav:
                 return {
                     "status": "error",
+                    "message": f"Voice '{request.voice_name}' not found. Available voices: {list(BUILTIN_VOICES.keys()) + [v for v in await list_voices_internal()]}"
                 }
+        print(f"🔊 Generating TTS to: {output_path}")
+        # Clean the text before generation
+        cleaned_text = clean_text(request.text)
+        print(f"📝 Original text: '{request.text}'")
+        print(f"📝 Cleaned text: '{cleaned_text}'")
+        # Generate TTS based on model capabilities
+        try:
+            if supports_voice_cloning():
+                # XTTS model with voice cloning support
+                tts.tts_to_file(
+                    text=cleaned_text,
+                    speaker_wav=speaker_wav,
+                    language=request.language,
+                    file_path=output_path
+                )
+            else:
+                # Models without voice cloning
+                tts.tts_to_file(
+                    text=cleaned_text,
+                    file_path=output_path
+                )
+        except Exception as tts_error:
+            print(f"❌ TTS generation failed: {tts_error}")
+            # Try alternative approach
+            try:
+                print("🔄 Trying alternative TTS generation method...")
+                if supports_voice_cloning():
+                    audio = tts.tts(
+                        text=cleaned_text,
+                        speaker_wav=speaker_wav,
+                        language=request.language
+                    )
+                else:
+                    audio = tts.tts(text=cleaned_text)
+                # Save manually
+                if not save_wav(audio, output_path):
+                    raise Exception("Failed to save audio file")
+            except Exception as alt_error:
+                print(f"❌ Alternative method also failed: {alt_error}")
+                raise alt_error
+        # Verify the file was created
         if not os.path.exists(output_path):
+            raise Exception(f"TTS failed to create output file: {output_path}")
         file_size = os.path.getsize(output_path)
+        print(f"✅ TTS generated: {output_path} ({file_size} bytes)")
+        # Upload to OCI
+        upload_result, error = upload_to_oci_with_retry(
+            output_path, filename, request.project_id, "voiceover"
+        )
         if error:
+            print(f"❌ OCI upload failed: {error}")
             return {
+                "status": "partial_success",
+                "message": f"TTS generated but upload failed: {error}",
                 "local_file": output_path,
                 "filename": filename,
                 "file_size": file_size
             }
+        print(f"✅ Upload successful: {filename}")
+        # Clean up local file
         try:
             os.remove(output_path)
+            print(f"🧹 Cleaned up local file: {output_path}")
+        except Exception as cleanup_error:
+            print(f"⚠️ Could not clean up file: {cleanup_error}")
         return {
             "status": "success",
             "message": "TTS generated and uploaded successfully",
             "filename": filename,
+            "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
+            "model_used": current_model,
+            "model_type": request.model_type,
+            "voice_cloning_used": supports_voice_cloning() and request.voice_name != "default",
+            "voice_style": request.voice_name
+        }
+    except Exception as e:
+        print(f"❌ TTS generation error: {str(e)}")
+        error_detail = {
+            "error": str(e),
+            "model": current_model,
+            "model_type": request.model_type if 'request' in locals() else "unknown",
+            "voice_cloning_supported": supports_voice_cloning(),
+            "device": DEVICE
+        }
+        raise HTTPException(status_code=500, detail=error_detail)
+async def list_voices_internal():
+    """Internal function to list available voices"""
+    voices_dir = Path("/tmp/voices")
+    voices = []
+    for item in voices_dir.iterdir():
+        if item.is_dir():
+            samples = list(item.glob("sample_*.wav"))
+            voices.append(item.name)
+        elif item.is_file() and item.suffix == ".wav":
+            voices.append(item.stem)
+    return voices
+@app.get("/api/models")
+async def list_models():
+    """List available TTS models"""
+    return {
+        "status": "success",
+        "models": AVAILABLE_MODELS,
+        "current_model": current_model if model_loaded else None,
+        "model_loaded": model_loaded
+    }
+@app.post("/api/set-model")
+async def set_model(model_type: str = Form(...)):
+    """Switch between different TTS models"""
+    if model_type not in AVAILABLE_MODELS:
+        raise HTTPException(status_code=400, detail=f"Model type '{model_type}' not found. Available: {list(AVAILABLE_MODELS.keys())}")
+    success = load_tts_model(model_type)
+    if success:
+        return {
+            "status": "success",
+            "message": f"Model switched to {AVAILABLE_MODELS[model_type]['name']}",
+            "model": current_model,
+            "voice_cloning_supported": voice_cloning_supported
+        }
+    else:
+        raise HTTPException(status_code=500, detail=f"Failed to load model: {model_type}")
+@app.get("/api/builtin-voices")
+async def get_builtin_voices():
+    """Get list of built-in voice styles"""
+    return {
+        "status": "success",
+        "voices": BUILTIN_VOICES,
+        "voice_cloning_supported": voice_cloning_supported
+    }
+# Keep your existing endpoints but enhance them with model selection
+@app.post("/api/batch-tts")
+async def batch_generate_tts(request: BatchTTSRequest):
+    """Enhanced batch TTS with model selection"""
+    try:
+        # Lazy load model
+        if not model_loaded or active_model_config is None or request.model_type not in list(AVAILABLE_MODELS.keys())[0]:
+            if not load_tts_model(request.model_type):
+                raise HTTPException(status_code=500, detail=f"TTS model '{request.model_type}' failed to load")
+        # Add model-specific validation and processing here
+        # ... (rest of your batch TTS implementation with model awareness)
+        # Your existing batch processing code here, enhanced with model checks
+        return {
+            "status": "completed",
+            "project_id": request.project_id,
+            "model_used": current_model,
+            "model_type": request.model_type,
+            "voice_cloning": supports_voice_cloning() and request.voice_name != "default"
         }
     except Exception as e:
+        print(f"❌ Batch TTS generation error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
+# Enhanced voice cloning endpoint
 @app.post("/api/clone-voice")
+async def api_clone_voice(
     project_id: str = Form(...),
     voice_name: str = Form(...),
     description: str = Form(""),
+    files: List[UploadFile] = File(...),
+    model_type: str = Form("xtts-v2")
 ):
+    """Enhanced voice cloning with model validation"""
     try:
+        # Ensure we're using a model that supports voice cloning
+        if model_type != "xtts-v2":
+            raise HTTPException(
+                status_code=400,
+                detail="Voice cloning is only supported with the 'xtts-v2' model. Please switch to XTTS-v2 for voice cloning."
+            )
+        # Load XTTS model if not already loaded
+        if not model_loaded or current_model != AVAILABLE_MODELS["xtts-v2"]["model_name"]:
+            if not load_tts_model("xtts-v2"):
+                raise HTTPException(status_code=500, detail="XTTS-v2 model failed to load. Voice cloning requires XTTS-v2.")
+        # Rest of your voice cloning implementation...
         temp_files = []
+        for i, file in enumerate(files):
+            if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
+                raise HTTPException(status_code=400, detail="Only audio files are allowed")
             temp_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
             with open(temp_path, "wb") as f:
+                content = await file.read()
+                f.write(content)
             temp_files.append(temp_path)
         success, message = clone_voice(voice_name, temp_files, description)
+        # Clean up temporary files
         for temp_file in temp_files:
             try:
                 os.remove(temp_file)
                 "status": "success",
                 "message": message,
                 "voice_name": voice_name,
+                "model_used": current_model
             }
         else:
             raise HTTPException(status_code=500, detail=message)
     except Exception as e:
+        print(f"❌ Voice cloning error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
+# Enhanced voices list endpoint
 @app.get("/api/voices")
 async def list_voices():
+    """List available voices with enhanced information"""
     try:
         voices_dir = Path("/tmp/voices")
         voices = []
+        # Add built-in voices
+        for voice_id, voice_info in BUILTIN_VOICES.items():
+            voices.append({
+                "name": voice_id,
+                "display_name": voice_info["name"],
+                "type": "builtin",
+                "gender": voice_info["gender"],
+                "language": voice_info["language"],
+                "samples_count": 0,
+                "created_at": "built-in"
+            })
+        # Add cloned voices
+        for item in voices_dir.iterdir():
+            if item.is_dir():
+                samples = list(item.glob("sample_*.wav"))
+                # Try to load metadata
+                metadata_path = item / "metadata.json"
+                metadata = {}
+                if metadata_path.exists():
+                    try:
+                        with open(metadata_path, 'r') as f:
+                            import json
+                            metadata = json.load(f)
+                    except:
+                        pass
                 voices.append({
+                    "name": item.name,
+                    "display_name": metadata.get("name", item.name),
+                    "type": "cloned",
+                    "gender": "custom",
+                    "language": "multilingual",
+                    "samples_count": len(samples),
+                    "description": metadata.get("description", ""),
+                    "created_at": metadata.get("created_at", datetime.fromtimestamp(item.stat().st_ctime).isoformat())
                 })
+            elif item.is_file() and item.suffix == ".wav":
+                voices.append({
+                    "name": item.stem,
+                    "display_name": item.stem,
+                    "type": "uploaded",
+                    "gender": "custom",
+                    "language": "unknown",
+                    "samples_count": 1,
+                    "created_at": datetime.fromtimestamp(item.stat().st_ctime).isoformat()
+                })
+        return {
+            "status": "success",
+            "voices": voices,
+            "voice_cloning_supported": supports_voice_cloning(),
+            "current_model": current_model
+        }
     except Exception as e:
+        print(f"❌ List voices error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
+# Keep your existing health check, reload-model, and root endpoints
+@app.get("/api/health")
+async def health_check():
+    """Enhanced health check with model information"""
+    return {
+        "status": "healthy",
+        "tts_loaded": model_loaded,
+        "model": current_model,
+        "model_config": active_model_config,
+        "voice_cloning_supported": voice_cloning_supported,
+        "device": DEVICE,
+        "load_attempts": model_load_attempts,
+        "timestamp": datetime.now().isoformat()
     }
+@app.post("/api/reload-model")
+async def reload_model(model_type: str = Form("xtts-v2")):
+    """Enhanced model reload with model selection"""
+    global tts, model_loaded, current_model, voice_cloning_supported
+    if model_type not in AVAILABLE_MODELS:
+        raise HTTPException(status_code=400, detail=f"Model type '{model_type}' not found")
+    # Clear current model
+    tts = None
+    model_loaded = False
+    current_model = ""
+    voice_cloning_supported = False
+    # Try to reload specified model
+    success = load_tts_model(model_type)
+    return {
+        "status": "success" if success else "error",
+        "message": f"Model {model_type} reloaded successfully" if success else f"Failed to reload model {model_type}",
+        "model_loaded": model_loaded,
+        "model": current_model,
+        "voice_cloning_supported": voice_cloning_supported
+    }
+@app.get("/")
+async def root():
+    """Enhanced root endpoint with model information"""
     return {
+        "message": "Enhanced TTS API with Multiple Voice Styles and Voice Cloning",
+        "endpoints": {
+            "POST /api/tts": "Generate TTS for a single text",
+            "POST /api/batch-tts": "Generate TTS for multiple texts",
+            "POST /api/upload-voice": "Upload a voice sample for cloning",
+            "POST /api/clone-voice": "Clone a voice from multiple samples",
+            "GET /api/voices": "List available voices",
+            "GET /api/builtin-voices": "List built-in voice styles",
+            "GET /api/models": "List available TTS models",
+            "POST /api/set-model": "Switch between TTS models",
+            "GET /api/health": "Health check",
+            "POST /api/reload-model": "Reload TTS model"
+        },
         "model_loaded": model_loaded,
+        "model_name": current_model if model_loaded else "None",
+        "model_type": list(AVAILABLE_MODELS.keys())[0] if active_model_config else "None",
+        "voice_cloning_supported": supports_voice_cloning(),
+        "builtin_voices_count": len(BUILTIN_VOICES)
     }
 if __name__ == "__main__":
     import uvicorn
+    print("🚀 Starting Enhanced TTS API with Multiple Voice Styles and Voice Cloning...")
+    print("📊 API endpoints available at: http://localhost:7860/")
+    print("💡 Model will be loaded on first request to save memory")
+    print("🎵 Available models:", list(AVAILABLE_MODELS.keys()))
+    print("🗣️ Built-in voices:", list(BUILTIN_VOICES.keys()))
+    uvicorn.run(app, host="0.0.0.0", port=7860)