Spaces:

yukee1992
/

Tts-api

Sleeping

App Files Files Community

yukee1992 commited on Oct 7, 2025

Commit

05428d2

verified ·

1 Parent(s): 2efdbd8

Update app.py

Browse files

Files changed (1) hide show

app.py +151 -638

app.py CHANGED Viewed

@@ -14,12 +14,12 @@ from pydantic import BaseModel
 import torch
 import numpy as np
-# Configure environment
 os.makedirs("/tmp/voices", exist_ok=True)
 os.makedirs("/tmp/output", exist_ok=True)
 # Initialize FastAPI app
-app = FastAPI(title="Enhanced TTS API", description="API for text-to-speech with multiple voice styles and voice cloning")
 # Add CORS middleware
 app.add_middleware(
@@ -36,71 +36,35 @@ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 print(f"✅ Using device: {DEVICE}")
-# Available models with different voice styles
 AVAILABLE_MODELS = {
-    "xtts-v2": {
-        "name": "XTTS-v2",
-        "model_name": "tts_models/multilingual/multi-dataset/xtts_v2",
-        "description": "Multilingual model with voice cloning support",
-        "languages": ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko"],
-        "voice_cloning": True,
-        "default_voice": "female_01"
-    },
     "tacotron2-ddc": {
         "name": "Tacotron2-DDC",
         "model_name": "tts_models/en/ljspeech/tacotron2-DDC",
-        "description": "High-quality English TTS (fast and reliable)",
-        "languages": ["en"],
-        "voice_cloning": False,
-        "default_voice": "default"
-    },
-    "glow-tts": {
-        "name": "Glow-TTS",
-        "model_name": "tts_models/en/ljspeech/glow-tts",
-        "description": "Fast and high-quality English TTS",
         "languages": ["en"],
         "voice_cloning": False,
-        "default_voice": "default"
     }
 }
-# Built-in voice styles for XTTS-v2 with better descriptions
-BUILTIN_VOICES = {
-    "female_01": {
-        "name": "Female Voice 1",
-        "gender": "female",
-        "language": "multilingual",
-        "description": "Clear and natural female voice"
-    },
-    "female_02": {
-        "name": "Female Voice 2",
-        "gender": "female",
-        "language": "multilingual",
-        "description": "Warm and friendly female voice"
-    },
-    "female_03": {
-        "name": "Female Voice 3",
-        "gender": "female",
-        "language": "multilingual",
-        "description": "Professional and articulate female voice"
-    },
-    "male_01": {
-        "name": "Male Voice 1",
-        "gender": "male",
-        "language": "multilingual",
-        "description": "Deep and clear male voice"
     },
-    "male_02": {
-        "name": "Male Voice 2",
-        "gender": "male",
-        "language": "multilingual",
-        "description": "Friendly and approachable male voice"
     },
-    "default": {
-        "name": "Default Voice",
-        "gender": "neutral",
-        "language": "multilingual",
-        "description": "Balanced and natural voice"
     }
 }
@@ -108,69 +72,89 @@ BUILTIN_VOICES = {
 tts = None
 model_loaded = False
 current_model = ""
-voice_cloning_supported = False
 model_loading = False
-model_load_attempts = 0
-active_model_config = None
 # Pydantic models
 class TTSRequest(BaseModel):
     text: str
     project_id: str
-    voice_name: Optional[str] = "female_01"
-    language: Optional[str] = "en"
-    model_type: Optional[str] = "xtts-v2"
     speed: Optional[float] = 1.0
-    temperature: Optional[float] = 0.75
 class BatchTTSRequest(BaseModel):
     texts: List[str]
     project_id: str
-    voice_name: Optional[str] = "female_01"
-    language: Optional[str] = "en"
-    model_type: Optional[str] = "xtts-v2"
     speed: Optional[float] = 1.0
-    temperature: Optional[float] = 0.75
-class VoiceCloneRequest(BaseModel):
-    project_id: str
-    voice_name: str
-    description: Optional[str] = ""
-    model_type: Optional[str] = "xtts-v2"
-class VoiceStyleRequest(BaseModel):
-    voice_name: str
-    style: str
-    intensity: Optional[float] = 1.0
-# Enhanced helper functions
 def clean_text(text):
-    """Clean text for TTS generation with better handling"""
     import re
     if not text or not isinstance(text, str):
         return "Hello"
-    # Remove any problematic characters but keep basic punctuation and multilingual characters
-    text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:\;\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af]', '', text)
-    # Replace multiple spaces with single space
     text = re.sub(r'\s+', ' ', text)
-    # Ensure text ends with punctuation if it's a sentence
     if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
         text = text + '.'
     text = text.strip()
-    # If text is empty after cleaning, use default
     if not text:
         text = "Hello world"
     return text
 def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
-    """Upload file to OCI using your existing API with subfolder support"""
     try:
         if not OCI_UPLOAD_API_URL:
             return None, "OCI upload API URL not configured"
@@ -198,144 +182,24 @@ def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voi
     except Exception as e:
         return None, f"Upload error: {str(e)}"
-def upload_to_oci_with_retry(file_path: str, filename: str, project_id: str, file_type="voiceover", max_retries=3):
-    """Upload file to OCI with retry logic"""
-    for attempt in range(max_retries):
-        try:
-            print(f"🔄 Upload attempt {attempt + 1} of {max_retries} for {filename}")
-            result, error = upload_to_oci(file_path, filename, project_id, file_type)
-            if error:
-                if attempt < max_retries - 1:
-                    wait_time = 2 ** attempt
-                    print(f"⏳ Upload failed, retrying in {wait_time}s: {error}")
-                    time.sleep(wait_time)
-                    continue
-                else:
-                    return None, error
-            else:
-                return result, None
-        except Exception as e:
-            if attempt < max_retries - 1:
-                wait_time = 2 ** attempt
-                print(f"⏳ Upload exception, retrying in {wait_time}s: {str(e)}")
-                time.sleep(wait_time)
-                continue
-            else:
-                return None, f"Upload failed after {max_retries} attempts: {str(e)}"
-    return None, "Upload failed: unexpected error"
-def get_voice_path(voice_name: str):
-    """Get path to voice file with enhanced voice management"""
-    if voice_name == "default":
-        return None
-    # Check if it's a built-in voice
-    if voice_name in BUILTIN_VOICES:
-        return None
-    voice_path = Path(f"/tmp/voices/{voice_name}")
-    if voice_path.is_dir():
-        samples = list(voice_path.glob("sample_*.wav"))
-        return str(samples[0]) if samples else None
-    else:
-        voice_file = Path(f"/tmp/voices/{voice_name}.wav")
-        return str(voice_file) if voice_file.exists() else None
-def clone_voice(voice_name: str, audio_files: List[str], description: str = ""):
-    """Enhanced voice cloning with better sample management"""
-    try:
-        print(f"🎙️ Cloning voice: {voice_name}")
-        voice_dir = f"/tmp/voices/{voice_name}"
-        os.makedirs(voice_dir, exist_ok=True)
-        # Save metadata about the cloned voice
-        metadata = {
-            "name": voice_name,
-            "description": description,
-            "samples_count": len(audio_files),
-            "created_at": datetime.now().isoformat(),
-            "samples": []
-        }
-        for i, audio_file in enumerate(audio_files):
-            dest_path = f"{voice_dir}/sample_{i+1:02d}.wav"
-            shutil.copy2(audio_file, dest_path)
-            metadata["samples"].append({
-                "sample_id": i+1,
-                "filename": f"sample_{i+1:02d}.wav",
-                "file_size": os.path.getsize(dest_path)
-            })
-            print(f"   Copied sample {i+1} to: {dest_path}")
-        # Save metadata
-        with open(f"{voice_dir}/metadata.json", "w") as f:
-            import json
-            json.dump(metadata, f, indent=2)
-        print(f"✅ Voice cloning completed for {voice_name} with {len(audio_files)} samples")
-        return True, f"Voice '{voice_name}' is ready for use with {len(audio_files)} samples"
-    except Exception as e:
-        return False, f"Voice cloning failed: {str(e)}"
-def supports_voice_cloning():
-    """Check if the current model supports voice cloning"""
-    return voice_cloning_supported
-def save_wav(audio, file_path, sample_rate=22050):
-    """Save audio to WAV file manually"""
-    try:
-        # Try soundfile first
-        try:
-            import soundfile as sf
-            sf.write(file_path, audio, sample_rate)
-            return True
-        except ImportError:
-            print("⚠️ soundfile not available, using fallback method")
-        # Fallback: use wave library
-        import wave
-        import numpy as np
-        # Ensure audio is numpy array
-        if isinstance(audio, list):
-            audio = np.array(audio)
-        # Convert to 16-bit PCM
-        audio_int16 = (audio * 32767).astype(np.int16)
-        with wave.open(file_path, 'wb') as wav_file:
-            wav_file.setnchannels(1)  # Mono
-            wav_file.setsampwidth(2)  # 16-bit
-            wav_file.setframerate(sample_rate)  # Sample rate
-            wav_file.writeframes(audio_int16.tobytes())
-        return True
-    except Exception as e:
-        print(f"❌ Failed to save WAV: {e}")
-        return False
-def load_tts_model(model_type="xtts-v2"):
-    """ROBUST MODEL LOADING: Proper XTTS-v2 handling"""
-    global tts, model_loaded, current_model, voice_cloning_supported, model_loading, model_load_attempts, active_model_config
     if model_loading:
         print("⏳ Model is already being loaded...")
         return False
     if model_type not in AVAILABLE_MODELS:
-        print(f"❌ Model type '{model_type}' not found. Available: {list(AVAILABLE_MODELS.keys())}")
         return False
     model_loading = True
-    model_load_attempts += 1
     try:
         from TTS.api import TTS
         # Handle TOS acceptance automatically
@@ -352,47 +216,25 @@ def load_tts_model(model_type="xtts-v2"):
             # Load the selected model
             tts = TTS(model_config["model_name"]).to(DEVICE)
-            # Mark as loaded immediately
             model_loaded = True
             current_model = model_config["model_name"]
-            voice_cloning_supported = model_config["voice_cloning"]
-            active_model_config = model_config
             print(f"✅ {model_config['name']} loaded successfully!")
-            print(f"   Voice cloning: {'✅ Supported' if voice_cloning_supported else '❌ Not supported'}")
-            print(f"   Languages: {', '.join(model_config['languages'])}")
-            # Try a simple test but don't fail if it doesn't work
-            try:
-                test_path = "/tmp/test_output.wav"
-                if model_config["voice_cloning"]:
-                    # For XTTS-v2, test without speaker_wav to use built-in voices
-                    tts.tts_to_file(
-                        text="This is a test of the voice system.",
-                        file_path=test_path,
-                        language="en"
-                    )
-                else:
-                    # For non-voice-cloning models
-                    tts.tts_to_file(text="This is a test of the voice system.", file_path=test_path)
-                if os.path.exists(test_path):
-                    os.remove(test_path)
-                    print("✅ Model test completed successfully!")
-                else:
-                    print("⚠️ Test file not created, but model is loaded")
-            except Exception as test_error:
-                print(f"⚠️ Model test failed but model is loaded: {test_error}")
             return True
         except Exception as e:
-            print(f"❌ {model_config['name']} model failed to load: {e}")
-            # Fallback to Tacotron2 if XTTS fails
-            if model_type == "xtts-v2":
-                print("🔄 Falling back to Tacotron2...")
-                model_loading = False  # Reset loading state
-                return load_tts_model("tacotron2-ddc")
             return False
         finally:
@@ -404,51 +246,29 @@ def load_tts_model(model_type="xtts-v2"):
     finally:
         model_loading = False
-def validate_language(language: str, model_type: str) -> bool:
-    """Validate if language is supported by the current model"""
-    if model_type not in AVAILABLE_MODELS:
-        return False
-    return language in AVAILABLE_MODELS[model_type]["languages"]
-# Enhanced API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
-    """ENHANCED TTS generation with better voice quality and naturalness"""
     try:
-        # Lazy load model on first request
         if not model_loaded:
-            if not load_tts_model(request.model_type):
                 return {
                     "status": "error",
-                    "message": f"TTS model '{request.model_type}' failed to load. Please check the logs.",
                     "requires_tos_acceptance": True,
                     "tos_url": "https://coqui.ai/cpml.txt"
                 }
         print(f"📥 TTS request for project: {request.project_id}")
-        print(f"   Model: {request.model_type}")
         print(f"   Text length: {len(request.text)} characters")
-        print(f"   Voice: {request.voice_name}")
-        print(f"   Language: {request.language}")
-        print(f"   Speed: {request.speed}")
-        # Validate language
-        if not validate_language(request.language, request.model_type):
-            return {
-                "status": "error",
-                "message": f"Language '{request.language}' is not supported by {request.model_type}. Supported languages: {', '.join(active_model_config['languages'])}",
-                "supported_languages": active_model_config['languages']
-            }
-        # Check if voice cloning is requested but not supported
-        if request.voice_name != "default" and request.voice_name not in BUILTIN_VOICES and not supports_voice_cloning():
-            return {
-                "status": "error",
-                "message": "Voice cloning is not supported with the current model. Please use 'xtts-v2' model for voice cloning.",
-                "model": current_model
-            }
-        # Generate unique filename with sequential naming
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
         output_path = f"/tmp/output/{filename}"
@@ -456,91 +276,29 @@ async def generate_tts(request: TTSRequest):
         # Ensure output directory exists
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        # Get voice path - only for custom cloned voices
-        speaker_wav = None
-        if request.voice_name not in BUILTIN_VOICES and request.voice_name != "default":
-            speaker_wav = get_voice_path(request.voice_name)
-            if not speaker_wav:
-                return {
-                    "status": "error",
-                    "message": f"Voice '{request.voice_name}' not found. Available voices: {list(BUILTIN_VOICES.keys()) + [v for v in await list_voices_internal()]}"
-                }
-        print(f"🔊 Generating TTS to: {output_path}")
-        if speaker_wav:
-            print(f"🎙️ Using custom voice: {request.voice_name}")
-        else:
-            print(f"🎙️ Using built-in voice: {request.voice_name}")
-        # Clean the text before generation
         cleaned_text = clean_text(request.text)
-        print(f"📝 Original text: '{request.text}'")
-        print(f"📝 Cleaned text: '{cleaned_text}'")
-        # Generate TTS based on model capabilities - WITH ERROR HANDLING
         try:
-            if supports_voice_cloning():
-                # XTTS model with voice cloning support
-                if speaker_wav:
-                    # Custom voice with speaker file
-                    tts.tts_to_file(
-                        text=cleaned_text,
-                        speaker_wav=speaker_wav,
-                        language=request.language,
-                        file_path=output_path
-                    )
-                else:
-                    # Built-in XTTS voice (no speaker_wav)
-                    tts.tts_to_file(
-                        text=cleaned_text,
-                        language=request.language,
-                        file_path=output_path
-                    )
-            else:
-                # Non-voice-cloning models
-                tts.tts_to_file(
-                    text=cleaned_text,
-                    file_path=output_path
-                )
         except Exception as tts_error:
             print(f"❌ TTS generation failed: {tts_error}")
-            # Try alternative approach
-            try:
-                print("🔄 Trying alternative TTS generation method...")
-                if supports_voice_cloning():
-                    if speaker_wav:
-                        audio = tts.tts(
-                            text=cleaned_text,
-                            speaker_wav=speaker_wav,
-                            language=request.language
-                        )
-                    else:
-                        audio = tts.tts(
-                            text=cleaned_text,
-                            language=request.language
-                        )
-                else:
-                    audio = tts.tts(text=cleaned_text)
-                # Save manually
-                if not save_wav(audio, output_path):
-                    raise Exception("Failed to save audio file")
-            except Exception as alt_error:
-                print(f"❌ Alternative method also failed: {alt_error}")
-                raise alt_error
         # Verify the file was created
         if not os.path.exists(output_path):
-            raise Exception(f"TTS failed to create output file: {output_path}")
         file_size = os.path.getsize(output_path)
         print(f"✅ TTS generated: {output_path} ({file_size} bytes)")
         # Upload to OCI
-        upload_result, error = upload_to_oci_with_retry(
-            output_path, filename, request.project_id, "voiceover"
-        )
         if error:
             print(f"❌ OCI upload failed: {error}")
@@ -554,7 +312,7 @@ async def generate_tts(request: TTSRequest):
         print(f"✅ Upload successful: {filename}")
-        # Clean up local file
         try:
             os.remove(output_path)
             print(f"🧹 Cleaned up local file: {output_path}")
@@ -567,98 +325,38 @@ async def generate_tts(request: TTSRequest):
             "filename": filename,
             "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
             "model_used": current_model,
-            "model_type": request.model_type,
-            "voice_cloning_used": supports_voice_cloning() and speaker_wav is not None,
-            "voice_style": request.voice_name
         }
     except Exception as e:
         print(f"❌ TTS generation error: {str(e)}")
         return {
             "status": "error",
-            "message": f"TTS generation failed: {str(e)}",
-            "model": current_model,
-            "model_type": request.model_type if 'request' in locals() else "unknown",
-            "voice_cloning_supported": supports_voice_cloning()
         }
-async def list_voices_internal():
-    """Internal function to list available voices"""
-    voices_dir = Path("/tmp/voices")
-    voices = []
-    for item in voices_dir.iterdir():
-        if item.is_dir():
-            samples = list(item.glob("sample_*.wav"))
-            voices.append(item.name)
-        elif item.is_file() and item.suffix == ".wav":
-            voices.append(item.stem)
-    return voices
-@app.get("/api/models")
-async def list_models():
-    """List available TTS models"""
-    return {
-        "status": "success",
-        "models": AVAILABLE_MODELS,
-        "current_model": current_model if model_loaded else None,
-        "model_loaded": model_loaded
-    }
-@app.post("/api/set-model")
-async def set_model(model_type: str = Form(...)):
-    """Switch between different TTS models"""
-    if model_type not in AVAILABLE_MODELS:
-        raise HTTPException(status_code=400, detail=f"Model type '{model_type}' not found. Available: {list(AVAILABLE_MODELS.keys())}")
-    success = load_tts_model(model_type)
-    if success:
-        return {
-            "status": "success",
-            "message": f"Model switched to {AVAILABLE_MODELS[model_type]['name']}",
-            "model": current_model,
-            "voice_cloning_supported": voice_cloning_supported
-        }
-    else:
-        raise HTTPException(status_code=500, detail=f"Failed to load model: {model_type}")
-@app.get("/api/builtin-voices")
-async def get_builtin_voices():
-    """Get list of built-in voice styles"""
-    return {
-        "status": "success",
-        "voices": BUILTIN_VOICES,
-        "voice_cloning_supported": voice_cloning_supported
-    }
 @app.post("/api/batch-tts")
 async def batch_generate_tts(request: BatchTTSRequest):
-    """Enhanced batch TTS with model selection"""
     try:
-        # Lazy load model
         if not model_loaded:
-            if not load_tts_model(request.model_type):
-                raise HTTPException(status_code=500, detail=f"TTS model '{request.model_type}' failed to load")
         print(f"📥 Batch TTS request for {len(request.texts)} texts")
         results = []
         for i, text in enumerate(request.texts):
             try:
-                # Create individual TTS request
                 single_request = TTSRequest(
                     text=text,
                     project_id=request.project_id,
-                    voice_name=request.voice_name,
-                    language=request.language,
-                    model_type=request.model_type,
-                    speed=request.speed,
-                    temperature=request.temperature
                 )
-                # Use the single TTS endpoint
                 result = await generate_tts(single_request)
                 results.append({
                     "text_index": i,
@@ -679,246 +377,61 @@ async def batch_generate_tts(request: BatchTTSRequest):
             "status": "completed",
             "project_id": request.project_id,
             "results": results,
-            "model_used": current_model,
-            "model_type": request.model_type,
-            "voice_cloning": supports_voice_cloning() and request.voice_name != "default"
         }
     except Exception as e:
         print(f"❌ Batch TTS generation error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
-@app.post("/api/clone-voice")
-async def api_clone_voice(
-    project_id: str = Form(...),
-    voice_name: str = Form(...),
-    description: str = Form(""),
-    files: List[UploadFile] = File(...),
-    model_type: str = Form("xtts-v2")
-):
-    """Enhanced voice cloning with model validation"""
-    try:
-        # Ensure we're using a model that supports voice cloning
-        if model_type != "xtts-v2":
-            raise HTTPException(
-                status_code=400,
-                detail="Voice cloning is only supported with the 'xtts-v2' model. Please switch to XTTS-v2 for voice cloning."
-            )
-        # Load XTTS model if not already loaded
-        if not model_loaded or current_model != AVAILABLE_MODELS["xtts-v2"]["model_name"]:
-            if not load_tts_model("xtts-v2"):
-                raise HTTPException(status_code=500, detail="XTTS-v2 model failed to load. Voice cloning requires XTTS-v2.")
-        # Save uploaded files temporarily
-        temp_files = []
-        for i, file in enumerate(files):
-            if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
-                raise HTTPException(status_code=400, detail="Only audio files are allowed")
-            temp_path = f"/tmp/{uuid.uuid4()}_{file.filename}"
-            with open(temp_path, "wb") as f:
-                content = await file.read()
-                f.write(content)
-            temp_files.append(temp_path)
-        success, message = clone_voice(voice_name, temp_files, description)
-        # Clean up temporary files
-        for temp_file in temp_files:
-            try:
-                os.remove(temp_file)
-            except:
-                pass
-        if success:
-            return {
-                "status": "success",
-                "message": message,
-                "voice_name": voice_name,
-                "model_used": current_model
-            }
-        else:
-            raise HTTPException(status_code=500, detail=message)
-    except Exception as e:
-        print(f"❌ Voice cloning error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Voice cloning failed: {str(e)}")
-@app.post("/api/upload-voice")
-async def upload_voice_sample(
-    project_id: str = Form(...),
-    voice_name: str = Form(...),
-    file: UploadFile = File(...)
-):
-    """Upload a voice sample for cloning"""
-    try:
-        print(f"📥 Voice upload request: {voice_name} for project {project_id}")
-        # Check if voice cloning is supported
-        if not supports_voice_cloning():
-            raise HTTPException(
-                status_code=400,
-                detail="Voice cloning is not supported with the current model. Please use the XTTS model for voice cloning."
-            )
-        # Validate file type
-        if not file.filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
-            raise HTTPException(status_code=400, detail="Only audio files are allowed")
-        # Save voice sample
-        voice_path = f"/tmp/voices/{voice_name}.wav"
-        with open(voice_path, "wb") as f:
-            content = await file.read()
-            f.write(content)
-        print(f"✅ Voice sample saved: {voice_path}")
-        return {
-            "status": "success",
-            "message": "Voice sample uploaded successfully",
-            "voice_name": voice_name,
-            "local_path": voice_path
-        }
-    except Exception as e:
-        print(f"❌ Voice upload error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Voice upload failed: {str(e)}")
-@app.get("/api/voices")
-async def list_voices():
-    """List available voices with enhanced information"""
-    try:
-        voices_dir = Path("/tmp/voices")
-        voices = []
-        # Add built-in voices
-        for voice_id, voice_info in BUILTIN_VOICES.items():
-            voices.append({
-                "name": voice_id,
-                "display_name": voice_info["name"],
-                "type": "builtin",
-                "gender": voice_info["gender"],
-                "language": voice_info["language"],
-                "samples_count": 0,
-                "created_at": "built-in"
-            })
-        # Add cloned voices
-        for item in voices_dir.iterdir():
-            if item.is_dir():
-                samples = list(item.glob("sample_*.wav"))
-                # Try to load metadata
-                metadata_path = item / "metadata.json"
-                metadata = {}
-                if metadata_path.exists():
-                    try:
-                        with open(metadata_path, 'r') as f:
-                            import json
-                            metadata = json.load(f)
-                    except:
-                        pass
-                voices.append({
-                    "name": item.name,
-                    "display_name": metadata.get("name", item.name),
-                    "type": "cloned",
-                    "gender": "custom",
-                    "language": "multilingual",
-                    "samples_count": len(samples),
-                    "description": metadata.get("description", ""),
-                    "created_at": metadata.get("created_at", datetime.fromtimestamp(item.stat().st_ctime).isoformat())
-                })
-            elif item.is_file() and item.suffix == ".wav":
-                voices.append({
-                    "name": item.stem,
-                    "display_name": item.stem,
-                    "type": "uploaded",
-                    "gender": "custom",
-                    "language": "unknown",
-                    "samples_count": 1,
-                    "created_at": datetime.fromtimestamp(item.stat().st_ctime).isoformat()
-                })
-        return {
-            "status": "success",
-            "voices": voices,
-            "voice_cloning_supported": supports_voice_cloning(),
-            "current_model": current_model
-        }
-    except Exception as e:
-        print(f"❌ List voices error: {str(e)}")
-        raise HTTPException(status_code=500, detail=f"Failed to list voices: {str(e)}")
 @app.get("/api/health")
 async def health_check():
-    """Enhanced health check with model information"""
     return {
-        "status": "healthy" if model_loaded else "loading",
         "tts_loaded": model_loaded,
         "model": current_model,
-        "model_config": active_model_config,
-        "voice_cloning_supported": voice_cloning_supported,
-        "device": DEVICE,
-        "load_attempts": model_load_attempts,
-        "timestamp": datetime.now().isoformat()
     }
-@app.post("/api/reload-model")
-async def reload_model(model_type: str = Form("xtts-v2")):
-    """Enhanced model reload with model selection"""
-    global tts, model_loaded, current_model, voice_cloning_supported
-    if model_type not in AVAILABLE_MODELS:
-        raise HTTPException(status_code=400, detail=f"Model type '{model_type}' not found")
-    # Clear current model
-    tts = None
-    model_loaded = False
-    current_model = ""
-    voice_cloning_supported = False
-    # Try to reload specified model
-    success = load_tts_model(model_type)
-    return {
-        "status": "success" if success else "error",
-        "message": f"Model {model_type} reloaded successfully" if success else f"Failed to reload model {model_type}",
-        "model_loaded": model_loaded,
-        "model": current_model,
-        "voice_cloning_supported": voice_cloning_supported
-    }
 @app.get("/")
 async def root():
-    """Enhanced root endpoint with model information"""
     return {
-        "message": "Enhanced TTS API with Multiple Voice Styles and Voice Cloning",
-        "endpoints": {
-            "POST /api/tts": "Generate TTS for a single text",
-            "POST /api/batch-tts": "Generate TTS for multiple texts",
-            "POST /api/upload-voice": "Upload a voice sample for cloning",
-            "POST /api/clone-voice": "Clone a voice from multiple samples",
-            "GET /api/voices": "List available voices",
-            "GET /api/builtin-voices": "List built-in voice styles",
-            "GET /api/models": "List available TTS models",
-            "POST /api/set-model": "Switch between TTS models",
-            "GET /api/health": "Health check",
-            "POST /api/reload-model": "Reload TTS model"
-        },
         "model_loaded": model_loaded,
-        "model_name": current_model if model_loaded else "None",
-        "model_type": list(AVAILABLE_MODELS.keys())[0] if active_model_config else "None",
-        "voice_cloning_supported": supports_voice_cloning(),
-        "builtin_voices_count": len(BUILTIN_VOICES)
     }
 if __name__ == "__main__":
     import uvicorn
-    print("🚀 Starting Enhanced TTS API with Multiple Voice Styles and Voice Cloning...")
-    print("📊 API endpoints available at: http://localhost:7860/")
-    print("💡 Model will be loaded on first request to save memory")
-    print("🎵 Available models:", list(AVAILABLE_MODELS.keys()))
-    print("🗣️ Built-in voices:", list(BUILTIN_VOICES.keys()))
     uvicorn.run(app, host="0.0.0.0", port=7860)

 import torch
 import numpy as np
+# Configure environment with storage limits
 os.makedirs("/tmp/voices", exist_ok=True)
 os.makedirs("/tmp/output", exist_ok=True)
 # Initialize FastAPI app
+app = FastAPI(title="Storage-Optimized TTS API", description="API for text-to-speech with storage management")
 # Add CORS middleware
 app.add_middleware(
 print(f"✅ Using device: {DEVICE}")
+# STORAGE OPTIMIZATION: Use only ONE high-quality model to save space
 AVAILABLE_MODELS = {
     "tacotron2-ddc": {
         "name": "Tacotron2-DDC",
         "model_name": "tts_models/en/ljspeech/tacotron2-DDC",
+        "description": "High-quality English TTS (Excellent natural voice)",
         "languages": ["en"],
         "voice_cloning": False,
+        "size_mb": 150,  # Approximate size
+        "quality": "excellent"
     }
 }
+# Simple voice styles for the single model
+VOICE_STYLES = {
+    "default": {
+        "name": "Default Voice",
+        "description": "Clear and natural English voice",
+        "gender": "neutral"
     },
+    "clear": {
+        "name": "Clear Voice",
+        "description": "Very clear and articulate voice",
+        "gender": "neutral"
     },
+    "professional": {
+        "name": "Professional Voice",
+        "description": "Professional and authoritative voice",
+        "gender": "neutral"
     }
 }
 tts = None
 model_loaded = False
 current_model = ""
 model_loading = False
 # Pydantic models
 class TTSRequest(BaseModel):
     text: str
     project_id: str
+    voice_style: Optional[str] = "default"
     speed: Optional[float] = 1.0
 class BatchTTSRequest(BaseModel):
     texts: List[str]
     project_id: str
+    voice_style: Optional[str] = "default"
     speed: Optional[float] = 1.0
+# Storage management functions
+def cleanup_old_files():
+    """Clean up old files to free up space"""
+    try:
+        # Clean output files older than 1 hour
+        output_dir = Path("/tmp/output")
+        if output_dir.exists():
+            for file in output_dir.glob("*.wav"):
+                if file.stat().st_mtime < time.time() - 3600:  # 1 hour
+                    file.unlink()
+                    print(f"🧹 Cleaned up old file: {file}")
+        # Clean voice files older than 24 hours
+        voices_dir = Path("/tmp/voices")
+        if voices_dir.exists():
+            for file in voices_dir.rglob("*.wav"):
+                if file.stat().st_mtime < time.time() - 86400:  # 24 hours
+                    file.unlink()
+                    print(f"🧹 Cleaned up old voice file: {file}")
+        # Check storage usage
+        check_storage_usage()
+    except Exception as e:
+        print(f"⚠️ Cleanup error: {e}")
+def check_storage_usage():
+    """Check and log storage usage"""
+    try:
+        import shutil
+        # Check available space in /tmp
+        total, used, free = shutil.disk_usage("/tmp")
+        print(f"💾 Storage: {free // (2**30)}GB free of {total // (2**30)}GB total")
+        # Warn if running low
+        if free < 2 * (2**30):  # Less than 2GB free
+            print("🚨 WARNING: Low storage space!")
+            return False
+        return True
+    except Exception as e:
+        print(f"⚠️ Storage check error: {e}")
+        return True
 def clean_text(text):
+    """Clean text for TTS generation"""
     import re
     if not text or not isinstance(text, str):
         return "Hello"
+    # Remove any problematic characters but keep basic punctuation
+    text = re.sub(r'[^\w\s\.\,\!\?\-\'\"\:\;]', '', text)
     text = re.sub(r'\s+', ' ', text)
     if len(text) > 10 and not re.search(r'[\.\!\?]$', text):
         text = text + '.'
     text = text.strip()
     if not text:
         text = "Hello world"
     return text
 def upload_to_oci(file_path: str, filename: str, project_id: str, file_type="voiceover"):
+    """Upload file to OCI"""
     try:
         if not OCI_UPLOAD_API_URL:
             return None, "OCI upload API URL not configured"
     except Exception as e:
         return None, f"Upload error: {str(e)}"
+def load_tts_model(model_type="tacotron2-ddc"):
+    """Load TTS model with storage optimization"""
+    global tts, model_loaded, current_model, model_loading
     if model_loading:
         print("⏳ Model is already being loaded...")
         return False
     if model_type not in AVAILABLE_MODELS:
+        print(f"❌ Model type '{model_type}' not found.")
         return False
     model_loading = True
     try:
+        # Clean up before loading new model
+        cleanup_old_files()
         from TTS.api import TTS
         # Handle TOS acceptance automatically
             # Load the selected model
             tts = TTS(model_config["model_name"]).to(DEVICE)
+            # Test the model
+            test_path = "/tmp/test_output.wav"
+            tts.tts_to_file(text="Test", file_path=test_path)
+            if os.path.exists(test_path):
+                os.remove(test_path)
+                print("✅ Model tested successfully!")
             model_loaded = True
             current_model = model_config["model_name"]
             print(f"✅ {model_config['name']} loaded successfully!")
+            print(f"   Size: ~{model_config['size_mb']}MB")
+            print(f"   Quality: {model_config['quality']}")
             return True
         except Exception as e:
+            print(f"❌ Model failed to load: {e}")
             return False
         finally:
     finally:
         model_loading = False
+# API endpoints
 @app.post("/api/tts")
 async def generate_tts(request: TTSRequest):
+    """Generate TTS with storage optimization"""
     try:
+        # Clean up before processing
+        cleanup_old_files()
+        # Lazy load model
         if not model_loaded:
+            if not load_tts_model("tacotron2-ddc"):
                 return {
                     "status": "error",
+                    "message": "TTS model failed to load. Please check storage space.",
                     "requires_tos_acceptance": True,
                     "tos_url": "https://coqui.ai/cpml.txt"
                 }
         print(f"📥 TTS request for project: {request.project_id}")
+        print(f"   Voice Style: {request.voice_style}")
         print(f"   Text length: {len(request.text)} characters")
+        # Generate unique filename
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"voiceover_{timestamp}.wav"
         output_path = f"/tmp/output/{filename}"
         # Ensure output directory exists
         os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        # Clean the text
         cleaned_text = clean_text(request.text)
+        print(f"📝 Text: '{cleaned_text}'")
+        # Generate TTS
         try:
+            tts.tts_to_file(
+                text=cleaned_text,
+                file_path=output_path
+            )
         except Exception as tts_error:
             print(f"❌ TTS generation failed: {tts_error}")
+            raise tts_error
         # Verify the file was created
         if not os.path.exists(output_path):
+            raise Exception(f"TTS failed to create output file")
         file_size = os.path.getsize(output_path)
         print(f"✅ TTS generated: {output_path} ({file_size} bytes)")
         # Upload to OCI
+        upload_result, error = upload_to_oci(output_path, filename, request.project_id)
         if error:
             print(f"❌ OCI upload failed: {error}")
         print(f"✅ Upload successful: {filename}")
+        # Clean up local file immediately after upload
         try:
             os.remove(output_path)
             print(f"🧹 Cleaned up local file: {output_path}")
             "filename": filename,
             "oci_path": upload_result.get("path", f"{request.project_id}/voiceover/{filename}"),
             "model_used": current_model,
+            "voice_style": request.voice_style
         }
     except Exception as e:
         print(f"❌ TTS generation error: {str(e)}")
         return {
             "status": "error",
+            "message": f"TTS generation failed: {str(e)}"
         }
 @app.post("/api/batch-tts")
 async def batch_generate_tts(request: BatchTTSRequest):
+    """Batch TTS with storage optimization"""
     try:
+        cleanup_old_files()
         if not model_loaded:
+            if not load_tts_model("tacotron2-ddc"):
+                raise HTTPException(status_code=500, detail="TTS model failed to load")
         print(f"📥 Batch TTS request for {len(request.texts)} texts")
         results = []
         for i, text in enumerate(request.texts):
             try:
                 single_request = TTSRequest(
                     text=text,
                     project_id=request.project_id,
+                    voice_style=request.voice_style,
+                    speed=request.speed
                 )
                 result = await generate_tts(single_request)
                 results.append({
                     "text_index": i,
             "status": "completed",
             "project_id": request.project_id,
             "results": results,
+            "model_used": current_model
         }
     except Exception as e:
         print(f"❌ Batch TTS generation error: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Batch TTS generation failed: {str(e)}")
+@app.get("/api/voice-styles")
+async def get_voice_styles():
+    """Get available voice styles"""
+    return {
+        "status": "success",
+        "voice_styles": VOICE_STYLES,
+        "current_model": current_model if model_loaded else None
+    }
 @app.get("/api/health")
 async def health_check():
+    """Health check with storage info"""
+    storage_ok = check_storage_usage()
     return {
+        "status": "healthy" if model_loaded and storage_ok else "warning",
         "tts_loaded": model_loaded,
         "model": current_model,
+        "storage_ok": storage_ok,
+        "device": DEVICE
     }
+@app.post("/api/cleanup")
+async def manual_cleanup():
+    """Manual cleanup endpoint"""
+    try:
+        cleanup_old_files()
+        return {
+            "status": "success",
+            "message": "Cleanup completed successfully"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Cleanup failed: {str(e)}")
 @app.get("/")
 async def root():
+    """Root endpoint"""
     return {
+        "message": "Storage-Optimized TTS API",
         "model_loaded": model_loaded,
+        "model": current_model if model_loaded else "None",
+        "storage_optimized": True
     }
 if __name__ == "__main__":
     import uvicorn
+    print("🚀 Starting Storage-Optimized TTS API...")
+    print("💾 Storage management enabled")
+    print("🔊 Using Tacotron2-DDC for best quality/size ratio")
+    check_storage_usage()
     uvicorn.run(app, host="0.0.0.0", port=7860)