cpu-tts / app /api /speech.py
grimshaw's picture
Upload current hf-space contents
8986c3d verified
Raw
History Blame Contribute Delete
1.72 kB
from fastapi import APIRouter, Depends, Response
from app.auth import require_bearer_auth
from app.config import Settings, get_settings
from app.errors import OpenAICompatibleError
from app.schemas import SpeechRequest
from app.services.audio_formatter import format_audio
from app.services.engine_registry import EngineRegistry, get_engine_registry
from app.utils.text import ensure_max_length
router = APIRouter(prefix="/v1")
@router.post("/audio/speech", dependencies=[Depends(require_bearer_auth)])
def create_speech(
payload: SpeechRequest,
settings: Settings = Depends(get_settings),
registry: EngineRegistry = Depends(get_engine_registry),
) -> Response:
ensure_max_length(payload.input, settings.max_input_length)
engine = registry.get_engine(payload.model)
if not engine.supports_voice(payload.voice):
raise OpenAICompatibleError(
status_code=400,
message=f"Unsupported voice '{payload.voice}'.",
param="voice",
code="unsupported_voice",
)
response_format = payload.response_format or settings.default_response_format
speed = payload.speed if payload.speed is not None else settings.default_speed
quality = payload.quality or settings.default_quality
synthesis = engine.synthesize(
text=payload.input,
voice=payload.voice,
speed=speed,
quality=quality,
model_name=payload.model,
lang=settings.default_language,
)
audio_bytes, media_type = format_audio(
waveform=synthesis.waveform,
sample_rate=synthesis.sample_rate,
response_format=response_format,
)
return Response(content=audio_bytes, media_type=media_type)