File size: 2,860 Bytes
6b408d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""

Request models for VoiceAuth API.



Defines Pydantic models for API request validation.

"""

import base64
import re
from typing import Annotated

from pydantic import BaseModel
from pydantic import ConfigDict
from pydantic import Field
from pydantic import field_validator

from app.models.enums import AudioFormat
from app.models.enums import SupportedLanguage


class VoiceDetectionRequest(BaseModel):
    """

    Request model for voice detection endpoint.



    Accepts Base64-encoded MP3 audio in one of 5 supported languages.

    """

    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "language": "Tamil",
                "audioFormat": "mp3",
                "audioBase64": "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU2LjM2LjEwMAAAAAAA...",
            }
        }
    )

    language: Annotated[
        SupportedLanguage,
        Field(
            description="Language of the audio content. Must be one of: Tamil, English, Hindi, Malayalam, Telugu"
        ),
    ]

    audioFormat: Annotated[
        AudioFormat,
        Field(
            default=AudioFormat.MP3,
            description="Format of the audio file. Currently only 'mp3' is supported",
        ),
    ] = AudioFormat.MP3

    audioBase64: Annotated[
        str,
        Field(
            min_length=100,
            description="Base64-encoded MP3 audio data. Minimum 100 characters for valid audio",
        ),
    ]

    @field_validator("audioBase64")
    @classmethod
    def validate_base64(cls, v: str) -> str:
        """

        Validate that the string is valid Base64.



        Args:

            v: The base64 string to validate



        Returns:

            The validated base64 string



        Raises:

            ValueError: If the string is not valid base64

        """
        # Remove any whitespace
        v = v.strip()

        # Check for valid base64 characters
        base64_pattern = re.compile(r"^[A-Za-z0-9+/]*={0,2}$")
        if not base64_pattern.match(v):
            raise ValueError("Invalid Base64 encoding: contains invalid characters")

        # Try to decode to verify it's valid base64
        try:
            # Add padding if needed
            padding = 4 - len(v) % 4
            if padding != 4:
                v += "=" * padding

            decoded = base64.b64decode(v)
            if len(decoded) < 100:
                raise ValueError("Decoded audio data is too small to be a valid MP3 file")

        except Exception as e:
            if "Invalid Base64" in str(e) or "too small" in str(e):
                raise
            raise ValueError(f"Invalid Base64 encoding: {e}") from e

        return v.rstrip("=") + "=" * (4 - len(v.rstrip("=")) % 4) if len(v.rstrip("=")) % 4 else v