kokoro / app.py
triflix's picture
Update app.py
6807c58 verified
import os
# Redirect Hugging Face cache to writable /tmp
os.environ["HF_HOME"] = "/tmp/hf_cache"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
from fastapi import FastAPI, Query
from fastapi.responses import StreamingResponse
import io
import torch
import soundfile as sf
from kokoro import KPipeline
app = FastAPI(title="Kokoro TTS API")
# Initialize pipeline once
pipeline = KPipeline(lang_code='a') # default English
@app.post("/tts")
async def tts_endpoint(text: str = Query(..., min_length=1)):
"""
Convert text to speech and stream WAV.
"""
try:
# Generate audio chunks
generator = pipeline(text, voice='af_heart', speed=1.0, split_pattern=r'\n+')
audio_list = [audio for _, _, _, audio in generator]
audio_tensor = torch.cat(audio_list, dim=0)
except Exception as e:
return {"error": f"TTS generation failed: {e}"}
# Convert to WAV in-memory
buffer = io.BytesIO()
sf.write(buffer, audio_tensor.numpy(), 24000, format="WAV")
buffer.seek(0)
return StreamingResponse(buffer, media_type="audio/wav")