Spaces:
Running
Running
File size: 3,211 Bytes
377c6b8 30c81b8 377c6b8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# app/main.py
import json
import httpx
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse, Response
from pydantic import BaseModel, Field
CONFIG_PATH = "app/config.json"
def load_config() -> dict:
with open(CONFIG_PATH, "r") as f:
return json.load(f)
config = load_config()
EL_API_KEY = config["elevenlabs"]["api_key"]
VOICE_ID = config["elevenlabs"]["voice_id"]
MODEL_ID = config["elevenlabs"]["model_id"]
ELEVENLABS_STREAM_URL = (
f"https://api.elevenlabs.io/v1/text-to-speech/{VOICE_ID}/stream"
)
HEADERS = {
"xi-api-key": EL_API_KEY,
"Content-Type": "application/json",
"Accept": "audio/mpeg",
}
app = FastAPI(
title="Production TTS Service",
version="1.0.2",
docs_url="/docs",
redoc_url="/redoc",
)
class TTSRequest(BaseModel):
text: str = Field(..., min_length=1, max_length=5000)
stability: float = Field(0.5, ge=0.0, le=1.0)
similarity_boost: float = Field(0.5, ge=0.0, le=1.0)
@app.get("/health")
async def health():
return {"status": "ok"}
@app.post("/tts")
async def text_to_speech(payload: TTSRequest):
body = {
"text": payload.text,
"model_id": MODEL_ID,
"voice_settings": {
"stability": payload.stability,
"similarity_boost": payload.similarity_boost,
},
}
async def audio_stream():
async with httpx.AsyncClient(timeout=None) as client:
async with client.stream(
method="POST",
url=ELEVENLABS_STREAM_URL,
headers=HEADERS,
json=body,
) as response:
if response.status_code != 200:
error = await response.aread()
raise HTTPException(
status_code=502,
detail=error.decode(),
)
async for chunk in response.aiter_bytes():
yield chunk
return StreamingResponse(
audio_stream(),
media_type="audio/mpeg",
headers={
"Content-Disposition": "inline; filename=tts.mp3",
},
)
@app.post("/tts/buffered")
async def text_to_speech_buffered(payload: TTSRequest):
body = {
"text": payload.text,
"model_id": MODEL_ID,
"voice_settings": {
"stability": payload.stability,
"similarity_boost": payload.similarity_boost,
},
}
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
ELEVENLABS_STREAM_URL,
headers=HEADERS,
json=body,
)
if response.status_code != 200:
raise HTTPException(
status_code=502,
detail=response.text,
)
if not response.content:
raise HTTPException(
status_code=500,
detail="Received empty audio buffer",
)
return Response(
content=response.content,
media_type="audio/mpeg",
headers={
"Content-Disposition": "attachment; filename=tts.mp3",
"Content-Length": str(len(response.content)),
},
)
|