Spaces:
Running
Running
| # app/main.py | |
| import json | |
| import httpx | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.responses import StreamingResponse, Response | |
| from pydantic import BaseModel, Field | |
| CONFIG_PATH = "app/config.json" | |
| def load_config() -> dict: | |
| with open(CONFIG_PATH, "r") as f: | |
| return json.load(f) | |
| config = load_config() | |
| EL_API_KEY = config["elevenlabs"]["api_key"] | |
| VOICE_ID = config["elevenlabs"]["voice_id"] | |
| MODEL_ID = config["elevenlabs"]["model_id"] | |
| ELEVENLABS_STREAM_URL = ( | |
| f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream" | |
| ) | |
| HEADERS = { | |
| "xi-api-key": EL_API_KEY, | |
| "Content-Type": "application/json", | |
| "Accept": "audio/mpeg", | |
| } | |
| app = FastAPI( | |
| title="Production TTS Service", | |
| version="1.0.2", | |
| docs_url="/docs", | |
| redoc_url="/redoc", | |
| ) | |
| class TTSRequest(BaseModel): | |
| text: str = Field(..., min_length=1, max_length=5000) | |
| stability: float = Field(0.5, ge=0.0, le=1.0) | |
| similarity_boost: float = Field(0.5, ge=0.0, le=1.0) | |
| async def health(): | |
| return {"status": "ok"} | |
| async def text_to_speech(payload: TTSRequest): | |
| body = { | |
| "text": payload.text, | |
| "model_id": MODEL_ID, | |
| "voice_settings": { | |
| "stability": payload.stability, | |
| "similarity_boost": payload.similarity_boost, | |
| }, | |
| } | |
| async def audio_stream(): | |
| async with httpx.AsyncClient(timeout=None) as client: | |
| async with client.stream( | |
| method="POST", | |
| url=ELEVENLABS_STREAM_URL, | |
| headers=HEADERS, | |
| json=body, | |
| ) as response: | |
| if response.status_code != 200: | |
| error = await response.aread() | |
| raise HTTPException( | |
| status_code=502, | |
| detail=error.decode(), | |
| ) | |
| async for chunk in response.aiter_bytes(): | |
| yield chunk | |
| return StreamingResponse( | |
| audio_stream(), | |
| media_type="audio/mpeg", | |
| headers={ | |
| "Content-Disposition": "inline; filename=tts.mp3", | |
| }, | |
| ) | |
| async def text_to_speech_buffered(payload: TTSRequest): | |
| body = { | |
| "text": payload.text, | |
| "model_id": MODEL_ID, | |
| "voice_settings": { | |
| "stability": payload.stability, | |
| "similarity_boost": payload.similarity_boost, | |
| }, | |
| } | |
| async with httpx.AsyncClient(timeout=30.0) as client: | |
| response = await client.post( | |
| ELEVENLABS_STREAM_URL, | |
| headers=HEADERS, | |
| json=body, | |
| ) | |
| if response.status_code != 200: | |
| raise HTTPException( | |
| status_code=502, | |
| detail=response.text, | |
| ) | |
| if not response.content: | |
| raise HTTPException( | |
| status_code=500, | |
| detail="Received empty audio buffer", | |
| ) | |
| return Response( | |
| content=response.content, | |
| media_type="audio/mpeg", | |
| headers={ | |
| "Content-Disposition": "attachment; filename=tts.mp3", | |
| "Content-Length": str(len(response.content)), | |
| }, | |
| ) | |