from fastapi import FastAPI, HTTPException, Header from fastapi.responses import StreamingResponse from pydantic import BaseModel from kokoro import KPipeline import numpy as np import io import logging import os # --- Logging --- logging.basicConfig(level=logging.INFO) logger = logging.getLogger("tts_stream") app = FastAPI() # Charger les clés valides depuis l'environnement VALID_HF_KEYS = os.environ.get('VALID_HF_KEYS', '').split(',') # Initialiser le pipeline au démarrage pipeline = KPipeline(lang_code='a', device='cpu') logger.info("✅ KPipeline loaded successfully.") class TTSRequest(BaseModel): text: str voice: str = 'af_heart' speed: float = 1.0 @app.post("/tts/stream") async def stream_speech(request: TTSRequest, hf_key: str = Header(None)): logger.info(f"🚀 Request: text='{request.text[:30]}...', voice='{request.voice}'") logger.info(f"🔑 Received key: {hf_key[:10] + '...' if hf_key else 'NONE'}") # Vérifier le token if not hf_key or hf_key not in VALID_HF_KEYS: logger.error(f"❌ Unauthorized: key not in valid list") raise HTTPException(status_code=401, detail="Unauthorized: invalid or missing hf_key") logger.info(f"✅ Key validated, generating audio...") def generate(): chunk_index = 0 try: for result in pipeline(request.text, voice=request.voice, speed=request.speed): chunk_index += 1 audio_bytes = result.audio.numpy().astype(np.float32).tobytes() logger.info(f"✅ Chunk {chunk_index} ready, size={len(audio_bytes)} bytes") yield audio_bytes logger.info(f"🏁 Streaming finished: {chunk_index} chunks") except Exception as e: logger.error(f"❌ Error at chunk {chunk_index}: {e}") raise return StreamingResponse( generate(), media_type="audio/pcm", headers={ "X-Sample-Rate": "24000", "X-Channels": "1", "X-Bit-Depth": "32" } )