from pydantic import BaseModel, Field, ConfigDict
from typing import Optional

# ==============================
# SPEECH TO TEXT RESPONSE
# ==============================
class STTResponse(BaseModel):
    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "text": "hello how are you",
                "model_name": "openai/whisper-large-v3",
                "language": "en",
                "duration_seconds": 3.2
            }
        }
    )
    text: str = Field(..., description="Transcribed text from the input audio")
    model_name: str = Field(..., description="STT model used for inference")
    language: Optional[str] = Field(None, description="Detected language")
    duration_seconds: Optional[float] = Field(
        None,
        description="Approximate audio duration in seconds"
    )


# ==============================
# TEXT TO SPEECH REQUEST / RESPONSE
# ==============================
class TTSRequest(BaseModel):
    model_config = ConfigDict(
        json_schema_extra={"example": {"text": "Hello, welcome to our AI system."}}
    )
    text: str = Field(..., min_length=1, max_length=500, description="Text to convert to speech")


class TTSResponse(BaseModel):
    model_config = ConfigDict(
        json_schema_extra={
            "example": {
                "message": "Audio generated successfully",
                "audio_format": "wav",
                "length_seconds": 2.5,
                "model_name": "suno/bark"
            }
        }
    )
    message: str
    audio_format: str
    length_seconds: Optional[float] = None
    model_name: str