kokoro-api / main.py
saidchrf's picture
Update main.py
a1f9f55 verified
from fastapi import FastAPI, HTTPException, Header
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from kokoro import KPipeline
import numpy as np
import io
import logging
import os
# --- Logging ---
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("tts_stream")
app = FastAPI()
# Charger les clés valides depuis l'environnement
VALID_HF_KEYS = os.environ.get('VALID_HF_KEYS', '').split(',')
# Initialiser le pipeline au démarrage
pipeline = KPipeline(lang_code='a', device='cpu')
logger.info("✅ KPipeline loaded successfully.")
class TTSRequest(BaseModel):
text: str
voice: str = 'af_heart'
speed: float = 1.0
@app.post("/tts/stream")
async def stream_speech(request: TTSRequest, hf_key: str = Header(None)):
logger.info(f"🚀 Request: text='{request.text[:30]}...', voice='{request.voice}'")
logger.info(f"🔑 Received key: {hf_key[:10] + '...' if hf_key else 'NONE'}")
# Vérifier le token
if not hf_key or hf_key not in VALID_HF_KEYS:
logger.error(f"❌ Unauthorized: key not in valid list")
raise HTTPException(status_code=401, detail="Unauthorized: invalid or missing hf_key")
logger.info(f"✅ Key validated, generating audio...")
def generate():
chunk_index = 0
try:
for result in pipeline(request.text, voice=request.voice, speed=request.speed):
chunk_index += 1
audio_bytes = result.audio.numpy().astype(np.float32).tobytes()
logger.info(f"✅ Chunk {chunk_index} ready, size={len(audio_bytes)} bytes")
yield audio_bytes
logger.info(f"🏁 Streaming finished: {chunk_index} chunks")
except Exception as e:
logger.error(f"❌ Error at chunk {chunk_index}: {e}")
raise
return StreamingResponse(
generate(),
media_type="audio/pcm",
headers={
"X-Sample-Rate": "24000",
"X-Channels": "1",
"X-Bit-Depth": "32"
}
)