Spaces:

sambodhan
/

urgency_classifier_space

Runtime error

App Files Files Community

mr-kush commited on Oct 29, 2025

Commit

f412ecf

1 Parent(s): 63c461f

Refactor response schema to enhance text validation and cleaning for urgency classification

Browse files

Files changed (1) hide show

response_schema.py +32 -24

response_schema.py CHANGED Viewed

@@ -1,48 +1,56 @@
-from typing import Dict
 from pydantic import BaseModel, Field, field_validator, model_validator
 import re
-# ---------------------------
-# Text cleaning function
-# ---------------------------
 def clean_text(text: str) -> str:
-    """Clean grievance text by removing URLs, HTML tags, extra whitespace."""
-    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
-    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
-    text = re.sub(r'\n', ' ', text)  # Replace newlines with space
-    text = re.sub(r'\s+', ' ', text).strip()  # Reduce multiple spaces
     return text
-# ---------------------------
-# Request schema
-# ---------------------------
 class TextInput(BaseModel):
-    text: str = Field(..., description="Grievance text to classify urgency")
     @field_validator("text")
-    def validate_non_empty(cls, value: str) -> str:
-        value = value.strip()
-        if not value:
-            raise ValueError("Input text cannot be empty")
         return value
     @model_validator(mode="after")
-    def clean_text_after(cls):
-        self.text = clean_text(self.text)
-        return self
     model_config = {
         "json_schema_extra": {
             "examples": [
-                {"text": "The water supply has been cut off for 3 days."},
-                {"text": "Streetlight on my street is not working, please fix urgently."}
             ]
         }
     }
-# ---------------------------
 # Response schema
-# ---------------------------
 class UrgencyClassificationOutput(BaseModel):
     label: str = Field(..., description="Top predicted urgency label")
     confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label")

 from pydantic import BaseModel, Field, field_validator, model_validator
+from typing import Union, List, Annotated
 import re
 def clean_text(text: str) -> str:
+    text = re.sub(r'https?://\S+|www\.\S+', '', text)
+    text = re.sub(r'<.*?>', '', text)
+    text = re.sub(r'\n', ' ', text)
+    text = re.sub(r'\s+', ' ', text).strip()
     return text
 class TextInput(BaseModel):
+    text: Annotated[
+        Union[str, List[str]],
+        Field(..., title="Input text(s)", description="Single string or list of strings")
+    ]
     @field_validator("text")
+    def validate_text(cls, value):
+        if isinstance(value, str):
+            value = value.strip()
+            if not value:
+                raise ValueError("String input cannot be empty.")
+        elif isinstance(value, list):
+            if not value:
+                raise ValueError("List input cannot be empty.")
+            for i, v in enumerate(value):
+                if not isinstance(v, str) or not v.strip():
+                    raise ValueError(f"Item {i} in list is not a valid non-empty string.")
+        else:
+            raise TypeError("Input must be a string or a list of strings.")
         return value
+    # Correct model validator for Pydantic v2
     @model_validator(mode="after")
+    def clean_text_after(model):
+        if isinstance(model.text, str):
+            model.text = clean_text(model.text)
+        else:
+            model.text = [clean_text(t) for t in model.text]
+        return model
     model_config = {
         "json_schema_extra": {
             "examples": [
+                {"text": "Where can I get a new water connection?"},
+                {"text": ["Where can I get a new water connection?", "My streetlight is broken."]}
             ]
         }
     }
 # Response schema
 class UrgencyClassificationOutput(BaseModel):
     label: str = Field(..., description="Top predicted urgency label")
     confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label")