Spaces:
Running
Running
File size: 2,239 Bytes
815b978 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 | from typing import Annotated
from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile, status
from app.models.speech import TranscribeResponse
from app.security.jwt_auth import verify_jwt
router = APIRouter()
_ALLOWED_AUDIO_TYPES: frozenset[str] = frozenset(
{
"audio/webm",
"audio/wav",
"audio/x-wav",
"audio/mpeg",
"audio/mp3",
"audio/mp4",
"audio/ogg",
"audio/flac",
}
)
@router.post("")
async def transcribe_endpoint(
request: Request,
audio: Annotated[UploadFile, File(...)],
_: Annotated[dict, Depends(verify_jwt)],
language: Annotated[str | None, Form()] = None,
) -> TranscribeResponse:
settings = request.app.state.settings
transcriber = request.app.state.transcriber
if not transcriber.is_configured:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Transcription service is not configured.",
)
content_type = (audio.content_type or "").strip().lower()
if content_type not in _ALLOWED_AUDIO_TYPES:
raise HTTPException(
status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
detail="Unsupported audio format.",
)
audio_bytes = await audio.read()
if not audio_bytes:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Audio file is empty.",
)
if len(audio_bytes) > settings.TRANSCRIBE_MAX_UPLOAD_BYTES:
raise HTTPException(
status_code=status.HTTP_413_CONTENT_TOO_LARGE,
detail="Audio file exceeds maximum allowed size.",
)
language_code = language.strip().lower() if language and language.strip() else None
if language_code and len(language_code) > 10:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail="Invalid language code.",
)
transcript = await transcriber.transcribe(
filename=audio.filename or "audio.webm",
content_type=content_type,
audio_bytes=audio_bytes,
language=language_code,
)
return TranscribeResponse(transcript=transcript)
|