""" Request models for VoiceAuth API. Defines Pydantic models for API request validation. """ import base64 import re from typing import Annotated from pydantic import BaseModel from pydantic import ConfigDict from pydantic import Field from pydantic import field_validator from app.models.enums import AudioFormat from app.models.enums import SupportedLanguage class VoiceDetectionRequest(BaseModel): """ Request model for voice detection endpoint. Accepts Base64-encoded MP3 audio in one of 5 supported languages. """ model_config = ConfigDict( json_schema_extra={ "example": { "language": "Tamil", "audioFormat": "mp3", "audioBase64": "SUQzBAAAAAAAI1RTU0UAAAAPAAADTGF2ZjU2LjM2LjEwMAAAAAAA...", } } ) language: Annotated[ SupportedLanguage, Field( description="Language of the audio content. Must be one of: Tamil, English, Hindi, Malayalam, Telugu" ), ] audioFormat: Annotated[ AudioFormat, Field( default=AudioFormat.MP3, description="Format of the audio file. Currently only 'mp3' is supported", ), ] = AudioFormat.MP3 audioBase64: Annotated[ str, Field( min_length=100, description="Base64-encoded MP3 audio data. Minimum 100 characters for valid audio", ), ] @field_validator("audioBase64") @classmethod def validate_base64(cls, v: str) -> str: """ Validate that the string is valid Base64. Args: v: The base64 string to validate Returns: The validated base64 string Raises: ValueError: If the string is not valid base64 """ # Remove any whitespace v = v.strip() # Check for valid base64 characters base64_pattern = re.compile(r"^[A-Za-z0-9+/]*={0,2}$") if not base64_pattern.match(v): raise ValueError("Invalid Base64 encoding: contains invalid characters") # Try to decode to verify it's valid base64 try: # Add padding if needed padding = 4 - len(v) % 4 if padding != 4: v += "=" * padding decoded = base64.b64decode(v) if len(decoded) < 100: raise ValueError("Decoded audio data is too small to be a valid MP3 file") except Exception as e: if "Invalid Base64" in str(e) or "too small" in str(e): raise raise ValueError(f"Invalid Base64 encoding: {e}") from e return v.rstrip("=") + "=" * (4 - len(v.rstrip("=")) % 4) if len(v.rstrip("=")) % 4 else v