| from typing import Union, List, Annotated, Dict |
| from pydantic import BaseModel, Field, field_validator, model_validator |
| import re |
|
|
|
|
| |
| def clean_text(text: str) -> str: |
| """Clean grievance text by removing URLs, HTML tags, extra whitespace.""" |
| text = re.sub(r'https?://\S+|www\.\S+', '', text) |
| text = re.sub(r'<.*?>', '', text) |
| text = re.sub(r'\n', ' ', text) |
| text = re.sub(r'\s+', ' ', text).strip() |
| return text |
|
|
|
|
| |
|
|
| class TextInput(BaseModel): |
| text: Annotated[ |
| Union[str, List[str]], |
| Field( |
| ..., |
| title="Input text(s)", |
| description="A single string or a list of non-empty strings representing user input." |
| ) |
| ] |
|
|
| |
| @field_validator("text") |
| def validate_text(cls, value): |
| if isinstance(value, str): |
| value = value.strip() |
| if not value: |
| raise ValueError("String input cannot be empty.") |
| elif isinstance(value, list): |
| if not value: |
| raise ValueError("List input cannot be empty.") |
| for i, v in enumerate(value): |
| if not isinstance(v, str) or not v.strip(): |
| raise ValueError(f"Item {i} in list is not a valid non-empty string.") |
| else: |
| raise TypeError("Input must be a string or a list of strings.") |
| return value |
|
|
|
|
| @model_validator(mode="after") |
| def clean_texts(self): |
| if isinstance(self.text, str): |
| self.text = clean_text(self.text) |
| else: |
| self.text = [clean_text(t) for t in self.text] |
| return self |
|
|
| |
|
|
| |
| model_config = { |
| "json_schema_extra": { |
| "examples": [ |
| { |
| "text": "Where can I get a new water connection?" |
| }, |
| { |
| "text": [ |
| "Where can I get a new water connection?", |
| "My streetlight is broken." |
| ] |
| } |
| ] |
| } |
| } |
|
|
|
|
|
|
|
|
| class ClassificationOutput(BaseModel): |
| label: str = Field(..., description="Top predicted label") |
| confidence: float = Field(..., ge=0, le=1, description="Confidence score") |
| scores: Dict[str, float] = Field(..., description="All label confidence scores") |
|
|
|
|