Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException, Form, BackgroundTasks | |
| from fastapi.responses import FileResponse | |
| from kokoro_onnx import Kokoro | |
| import tempfile | |
| import os | |
| from datetime import datetime | |
| import soundfile as sf | |
| # ============== CONFIG ============== | |
| MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min) | |
| MIN_CHARS = 5 | |
| MAX_AUDIO_DURATION = 300 # 5 minutes of audio | |
| # ============== KOKORO TTS MODEL ============== | |
| print("🎤 Loading Kokoro TTS model...") | |
| try: | |
| kokoro = Kokoro("kokoro-v0_19.onnx", "voices") | |
| print("✅ Kokoro TTS loaded successfully!") | |
| except Exception as e: | |
| print(f"⚠️ Kokoro not found locally. Will download on first use.") | |
| kokoro = None | |
| app = FastAPI( | |
| title="Kokoro TTS API - Fast & Simple", | |
| description="High-speed text-to-speech with emotional voices", | |
| version="2.0" | |
| ) | |
| def startup(): | |
| global kokoro | |
| if kokoro is None: | |
| import urllib.request | |
| print("📥 Downloading Kokoro TTS model files...") | |
| # Create directory for voices | |
| os.makedirs("voices", exist_ok=True) | |
| # Download voices file | |
| voices_file = "voices/voices.bin" | |
| if not os.path.exists(voices_file): | |
| print("Downloading voices.bin...") | |
| urllib.request.urlretrieve( | |
| "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin", | |
| voices_file | |
| ) | |
| print("✅ Voices downloaded!") | |
| # Download ONNX model | |
| model_file = "kokoro-v0_19.onnx" | |
| if not os.path.exists(model_file): | |
| print("Downloading kokoro-v0_19.onnx...") | |
| urllib.request.urlretrieve( | |
| "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx", | |
| model_file | |
| ) | |
| print("✅ Model downloaded!") | |
| print("🎤 Initializing Kokoro TTS...") | |
| kokoro = Kokoro(model_file, voices_file) | |
| print("✅ Kokoro TTS loaded!") | |
| # ============== HELPERS ============== | |
| def cleanup_file(path: str): | |
| """Delete temporary file after response is sent""" | |
| try: | |
| if os.path.exists(path): | |
| os.unlink(path) | |
| except: | |
| pass | |
| def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str: | |
| """ | |
| Generate speech using Kokoro TTS | |
| Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella | |
| """ | |
| if len(text) < MIN_CHARS: | |
| raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.") | |
| if len(text) > MAX_CHARS: | |
| raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).") | |
| # Generate audio samples | |
| samples, sample_rate = kokoro.create( | |
| text=text, | |
| voice=voice, | |
| speed=speed, | |
| lang="en-us" | |
| ) | |
| # Save to temporary file | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: | |
| sf.write(tmp.name, samples, sample_rate) | |
| return tmp.name | |
| # ============== API ENDPOINTS ============== | |
| def root(): | |
| return { | |
| "service": "Kokoro TTS API", | |
| "status": "running", | |
| "model": "Kokoro-82M", | |
| "version": "2.0", | |
| "features": { | |
| "speed": "10x faster than XTTS", | |
| "voices": 6, | |
| "max_chars": MAX_CHARS, | |
| "emotional": True | |
| }, | |
| "endpoints": { | |
| "health": "/health", | |
| "generate": "/api/generate (POST)", | |
| "docs": "/docs" | |
| } | |
| } | |
| def health(): | |
| return { | |
| "status": "healthy", | |
| "model": "Kokoro TTS 82M", | |
| "speed": "10x faster than XTTS", | |
| "max_chars": MAX_CHARS, | |
| "voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"] | |
| } | |
| async def generate_tts( | |
| background_tasks: BackgroundTasks, | |
| text: str = Form(..., description="Text to convert to speech"), | |
| voice: str = Form("bf_isabella", description="Voice to use"), | |
| speed: float = Form(1.0, description="Speech speed (0.5-2.0)") | |
| ): | |
| """ | |
| Generate TTS with Kokoro (Fast & Emotional) | |
| **Performance:** | |
| - Max audio: 5 minutes (4500 chars) | |
| - Generation: ~20-30 seconds on CPU | |
| - Speech rate: ~900 chars/minute | |
| **Available Voices:** | |
| - `af_heart`: American Female (warm) | |
| - `af_bella`: American Female (professional) | |
| - `am_adam`: American Male (confident) | |
| - `am_michael`: American Male (friendly) | |
| - `bf_emma`: British Female (elegant) | |
| - `bf_isabella`: British Female (storytelling) ⭐ Best for long content | |
| **Example:** | |
| ```bash | |
| curl -X POST https://your-space.hf.space/api/generate \\ | |
| -F "text=Hello world, this is Kokoro TTS!" \\ | |
| -F "voice=bf_isabella" \\ | |
| -F "speed=1.0" \\ | |
| --output audio.wav | |
| ``` | |
| """ | |
| try: | |
| # Validate speed | |
| if speed < 0.5 or speed > 2.0: | |
| raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0") | |
| # Generate speech | |
| output_path = generate_speech(text.strip(), voice, speed) | |
| # Schedule cleanup after response is sent | |
| background_tasks.add_task(cleanup_file, output_path) | |
| # Return audio file | |
| response = FileResponse( | |
| output_path, | |
| media_type="audio/wav", | |
| filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav" | |
| ) | |
| response.headers["X-Character-Count"] = str(len(text)) | |
| response.headers["X-Voice-Used"] = voice | |
| return response | |
| except ValueError as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |