File size: 2,239 Bytes
815b978
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from typing import Annotated

from fastapi import APIRouter, Depends, File, Form, HTTPException, Request, UploadFile, status

from app.models.speech import TranscribeResponse
from app.security.jwt_auth import verify_jwt

router = APIRouter()

_ALLOWED_AUDIO_TYPES: frozenset[str] = frozenset(
    {
        "audio/webm",
        "audio/wav",
        "audio/x-wav",
        "audio/mpeg",
        "audio/mp3",
        "audio/mp4",
        "audio/ogg",
        "audio/flac",
    }
)


@router.post("")
async def transcribe_endpoint(
    request: Request,
    audio: Annotated[UploadFile, File(...)],
    _: Annotated[dict, Depends(verify_jwt)],
    language: Annotated[str | None, Form()] = None,
) -> TranscribeResponse:
    settings = request.app.state.settings
    transcriber = request.app.state.transcriber

    if not transcriber.is_configured:
        raise HTTPException(
            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
            detail="Transcription service is not configured.",
        )

    content_type = (audio.content_type or "").strip().lower()
    if content_type not in _ALLOWED_AUDIO_TYPES:
        raise HTTPException(
            status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
            detail="Unsupported audio format.",
        )

    audio_bytes = await audio.read()
    if not audio_bytes:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail="Audio file is empty.",
        )

    if len(audio_bytes) > settings.TRANSCRIBE_MAX_UPLOAD_BYTES:
        raise HTTPException(
            status_code=status.HTTP_413_CONTENT_TOO_LARGE,
            detail="Audio file exceeds maximum allowed size.",
        )

    language_code = language.strip().lower() if language and language.strip() else None
    if language_code and len(language_code) > 10:
        raise HTTPException(
            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
            detail="Invalid language code.",
        )

    transcript = await transcriber.transcribe(
        filename=audio.filename or "audio.webm",
        content_type=content_type,
        audio_bytes=audio_bytes,
        language=language_code,
    )

    return TranscribeResponse(transcript=transcript)