from typing import Annotated from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile, status from app.models.speech import TranscribeResponse from app.security.jwt_auth import verify_jwt router = APIRouter() _ALLOWED_AUDIO_TYPES: frozenset[str] = frozenset( { "audio/webm", "audio/wav", "audio/x-wav", "audio/mpeg", "audio/mp3", "audio/mp4", "audio/ogg", "audio/flac", } ) @router.post("") async def transcribe_endpoint( request: Request, audio: Annotated[UploadFile, File(...)], _: Annotated[dict, Depends(verify_jwt)], language: Annotated[str | None, Form()] = None, ) -> TranscribeResponse: settings = request.app.state.settings transcriber = request.app.state.transcriber if not transcriber.is_configured: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Transcription service is not configured.", ) content_type = (audio.content_type or "").strip().lower() if content_type not in _ALLOWED_AUDIO_TYPES: raise HTTPException( status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE, detail="Unsupported audio format.", ) audio_bytes = await audio.read() if not audio_bytes: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Audio file is empty.", ) if len(audio_bytes) > settings.TRANSCRIBE_MAX_UPLOAD_BYTES: raise HTTPException( status_code=status.HTTP_413_CONTENT_TOO_LARGE, detail="Audio file exceeds maximum allowed size.", ) language_code = language.strip().lower() if language and language.strip() else None if language_code and len(language_code) > 10: raise HTTPException( status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail="Invalid language code.", ) transcript = await transcriber.transcribe( filename=audio.filename or "audio.webm", content_type=content_type, audio_bytes=audio_bytes, language=language_code, ) return TranscribeResponse(transcript=transcript)