from pydantic import BaseModel, Field, field_validator, model_validator
from typing import Union, List, Annotated, Dict
import re

def clean_text(text: str) -> str:
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>', '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

class TextInput(BaseModel):
    text: Annotated[
        Union[str, List[str]],
        Field(..., title="Input text(s)", description="Single string or list of strings")
    ]

    @field_validator("text")
    def validate_text(cls, value):
        if isinstance(value, str):
            value = value.strip()
            if not value:
                raise ValueError("String input cannot be empty.")
        elif isinstance(value, list):
            if not value:
                raise ValueError("List input cannot be empty.")
            for i, v in enumerate(value):
                if not isinstance(v, str) or not v.strip():
                    raise ValueError(f"Item {i} in list is not a valid non-empty string.")
        else:
            raise TypeError("Input must be a string or a list of strings.")
        return value

    # Correct model validator for Pydantic v2
    @model_validator(mode="after")
    def clean_text_after(model):
        if isinstance(model.text, str):
            model.text = clean_text(model.text)
        else:
            model.text = [clean_text(t) for t in model.text]
        return model

    model_config = {
        "json_schema_extra": {
            "examples": [
                {"text": "Where can I get a new water connection?"},
                {"text": ["Where can I get a new water connection?", "My streetlight is broken."]}
            ]
        }
    }

# Response schema

class UrgencyClassificationOutput(BaseModel):
    label: str = Field(..., description="Top predicted urgency label")
    confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label")
    scores: Dict[str, float] = Field(..., description="All label confidence scores")