Spaces:
Running
Running
| from contextlib import asynccontextmanager | |
| from io import BytesIO | |
| import numpy as np | |
| import soundfile as sf | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import Response | |
| from pydantic import BaseModel, Field | |
| class SynthesizeRequest(BaseModel): | |
| text: str = Field(..., min_length=1, max_length=300) | |
| voice: str = Field(default="am_adam", min_length=2, max_length=32) | |
| _ALLOWED_VOICES: frozenset[str] = frozenset({"am_adam", "af_heart"}) | |
| class TTSState: | |
| pipeline = None | |
| async def lifespan(app: FastAPI): | |
| try: | |
| from kokoro import KPipeline | |
| # English voice pipeline; voice selection can be configured later. | |
| app.state.tts = KPipeline(lang_code="a") | |
| except Exception: | |
| app.state.tts = None | |
| yield | |
| app.state.tts = None | |
| app = FastAPI( | |
| title="PersonaBot TTS", | |
| lifespan=lifespan, | |
| docs_url=None, | |
| redoc_url=None, | |
| openapi_url=None, | |
| ) | |
| async def health() -> dict[str, str]: | |
| if app.state.tts is None: | |
| return {"status": "loading"} | |
| return {"status": "ok"} | |
| async def synthesize(payload: SynthesizeRequest) -> Response: | |
| if app.state.tts is None: | |
| raise HTTPException(status_code=503, detail="TTS model is not loaded") | |
| text = payload.text.strip() | |
| if not text: | |
| raise HTTPException(status_code=400, detail="text is required") | |
| voice = payload.voice.strip().lower() or "am_adam" | |
| if voice not in _ALLOWED_VOICES: | |
| raise HTTPException(status_code=422, detail="Unsupported voice") | |
| try: | |
| chunks: list[np.ndarray] = [] | |
| for _, _, audio in app.state.tts(text, voice=voice, speed=1.0): | |
| chunks.append(np.asarray(audio, dtype=np.float32)) | |
| if not chunks: | |
| raise HTTPException(status_code=500, detail="No audio generated") | |
| audio_array = np.concatenate(chunks) | |
| wav_buffer = BytesIO() | |
| sf.write(wav_buffer, audio_array, 24000, format="WAV") | |
| wav_buffer.seek(0) | |
| return Response(content=wav_buffer.read(), media_type="audio/wav") | |
| except HTTPException: | |
| raise | |
| except Exception as exc: | |
| raise HTTPException(status_code=500, detail=f"Synthesis failed: {exc}") from exc | |