| | from fastapi import FastAPI, HTTPException, Form, BackgroundTasks |
| | from fastapi.responses import FileResponse |
| | from kokoro_onnx import Kokoro |
| | import tempfile |
| | import os |
| | from datetime import datetime |
| | import soundfile as sf |
| |
|
| | |
| | MAX_CHARS = 4500 |
| | MIN_CHARS = 5 |
| | MAX_AUDIO_DURATION = 300 |
| |
|
| | |
| | print("π€ Loading Kokoro TTS model...") |
| | try: |
| | kokoro = Kokoro("kokoro-v0_19.onnx", "voices") |
| | print("β
Kokoro TTS loaded successfully!") |
| | except Exception as e: |
| | print(f"β οΈ Kokoro not found locally. Will download on first use.") |
| | kokoro = None |
| |
|
| | app = FastAPI( |
| | title="Kokoro TTS API - Fast & Simple", |
| | description="High-speed text-to-speech with emotional voices", |
| | version="2.0" |
| | ) |
| |
|
| | @app.on_event("startup") |
| | def startup(): |
| | global kokoro |
| | if kokoro is None: |
| | import urllib.request |
| | |
| | print("π₯ Downloading Kokoro TTS model files...") |
| | |
| | |
| | os.makedirs("voices", exist_ok=True) |
| | |
| | |
| | voices_file = "voices/voices.bin" |
| | if not os.path.exists(voices_file): |
| | print("Downloading voices.bin...") |
| | urllib.request.urlretrieve( |
| | "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin", |
| | voices_file |
| | ) |
| | print("β
Voices downloaded!") |
| | |
| | |
| | model_file = "kokoro-v0_19.onnx" |
| | if not os.path.exists(model_file): |
| | print("Downloading kokoro-v0_19.onnx...") |
| | urllib.request.urlretrieve( |
| | "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx", |
| | model_file |
| | ) |
| | print("β
Model downloaded!") |
| | |
| | print("π€ Initializing Kokoro TTS...") |
| | kokoro = Kokoro(model_file, voices_file) |
| | print("β
Kokoro TTS loaded!") |
| |
|
| | |
| | def cleanup_file(path: str): |
| | """Delete temporary file after response is sent""" |
| | try: |
| | if os.path.exists(path): |
| | os.unlink(path) |
| | except: |
| | pass |
| |
|
| | def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str: |
| | """ |
| | Generate speech using Kokoro TTS |
| | Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella |
| | """ |
| | if len(text) < MIN_CHARS: |
| | raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.") |
| | if len(text) > MAX_CHARS: |
| | raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).") |
| | |
| | |
| | samples, sample_rate = kokoro.create( |
| | text=text, |
| | voice=voice, |
| | speed=speed, |
| | lang="en-us" |
| | ) |
| | |
| | |
| | with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: |
| | sf.write(tmp.name, samples, sample_rate) |
| | return tmp.name |
| |
|
| | |
| | @app.get("/") |
| | def root(): |
| | return { |
| | "service": "Kokoro TTS API", |
| | "status": "running", |
| | "model": "Kokoro-82M", |
| | "version": "2.0", |
| | "features": { |
| | "speed": "10x faster than XTTS", |
| | "voices": 6, |
| | "max_chars": MAX_CHARS, |
| | "emotional": True |
| | }, |
| | "endpoints": { |
| | "health": "/health", |
| | "generate": "/api/generate (POST)", |
| | "docs": "/docs" |
| | } |
| | } |
| |
|
| | @app.get("/health") |
| | def health(): |
| | return { |
| | "status": "healthy", |
| | "model": "Kokoro TTS 82M", |
| | "speed": "10x faster than XTTS", |
| | "max_chars": MAX_CHARS, |
| | "voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"] |
| | } |
| |
|
| | @app.post("/api/generate") |
| | async def generate_tts( |
| | background_tasks: BackgroundTasks, |
| | text: str = Form(..., description="Text to convert to speech"), |
| | voice: str = Form("bf_isabella", description="Voice to use"), |
| | speed: float = Form(1.0, description="Speech speed (0.5-2.0)") |
| | ): |
| | """ |
| | Generate TTS with Kokoro (Fast & Emotional) |
| | |
| | **Performance:** |
| | - Max audio: 5 minutes (4500 chars) |
| | - Generation: ~20-30 seconds on CPU |
| | - Speech rate: ~900 chars/minute |
| | |
| | **Available Voices:** |
| | - `af_heart`: American Female (warm) |
| | - `af_bella`: American Female (professional) |
| | - `am_adam`: American Male (confident) |
| | - `am_michael`: American Male (friendly) |
| | - `bf_emma`: British Female (elegant) |
| | - `bf_isabella`: British Female (storytelling) β Best for long content |
| | |
| | **Example:** |
| | ```bash |
| | curl -X POST https://your-space.hf.space/api/generate \\ |
| | -F "text=Hello world, this is Kokoro TTS!" \\ |
| | -F "voice=bf_isabella" \\ |
| | -F "speed=1.0" \\ |
| | --output audio.wav |
| | ``` |
| | """ |
| | try: |
| | |
| | if speed < 0.5 or speed > 2.0: |
| | raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0") |
| | |
| | |
| | output_path = generate_speech(text.strip(), voice, speed) |
| | |
| | |
| | background_tasks.add_task(cleanup_file, output_path) |
| | |
| | |
| | response = FileResponse( |
| | output_path, |
| | media_type="audio/wav", |
| | filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav" |
| | ) |
| | response.headers["X-Character-Count"] = str(len(text)) |
| | response.headers["X-Voice-Used"] = voice |
| | |
| | return response |
| | |
| | except ValueError as e: |
| | raise HTTPException(status_code=400, detail=str(e)) |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") |
| |
|
| | if __name__ == "__main__": |
| | import uvicorn |
| | uvicorn.run(app, host="0.0.0.0", port=7860) |
| |
|