mr-kush commited on
Commit
f412ecf
·
1 Parent(s): 63c461f

Refactor response schema to enhance text validation and cleaning for urgency classification

Browse files
Files changed (1) hide show
  1. response_schema.py +32 -24
response_schema.py CHANGED
@@ -1,48 +1,56 @@
1
- from typing import Dict
2
  from pydantic import BaseModel, Field, field_validator, model_validator
 
3
  import re
4
 
5
- # ---------------------------
6
- # Text cleaning function
7
- # ---------------------------
8
  def clean_text(text: str) -> str:
9
- """Clean grievance text by removing URLs, HTML tags, extra whitespace."""
10
- text = re.sub(r'https?://\S+|www\.\S+', '', text) # Remove URLs
11
- text = re.sub(r'<.*?>', '', text) # Remove HTML tags
12
- text = re.sub(r'\n', ' ', text) # Replace newlines with space
13
- text = re.sub(r'\s+', ' ', text).strip() # Reduce multiple spaces
14
  return text
15
 
16
- # ---------------------------
17
- # Request schema
18
- # ---------------------------
19
  class TextInput(BaseModel):
20
- text: str = Field(..., description="Grievance text to classify urgency")
 
 
 
21
 
22
  @field_validator("text")
23
- def validate_non_empty(cls, value: str) -> str:
24
- value = value.strip()
25
- if not value:
26
- raise ValueError("Input text cannot be empty")
 
 
 
 
 
 
 
 
 
27
  return value
28
 
 
29
  @model_validator(mode="after")
30
- def clean_text_after(cls):
31
- self.text = clean_text(self.text)
32
- return self
 
 
 
33
 
34
  model_config = {
35
  "json_schema_extra": {
36
  "examples": [
37
- {"text": "The water supply has been cut off for 3 days."},
38
- {"text": "Streetlight on my street is not working, please fix urgently."}
39
  ]
40
  }
41
  }
42
 
43
- # ---------------------------
44
  # Response schema
45
- # ---------------------------
46
  class UrgencyClassificationOutput(BaseModel):
47
  label: str = Field(..., description="Top predicted urgency label")
48
  confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label")
 
 
1
  from pydantic import BaseModel, Field, field_validator, model_validator
2
+ from typing import Union, List, Annotated
3
  import re
4
 
 
 
 
5
  def clean_text(text: str) -> str:
6
+ text = re.sub(r'https?://\S+|www\.\S+', '', text)
7
+ text = re.sub(r'<.*?>', '', text)
8
+ text = re.sub(r'\n', ' ', text)
9
+ text = re.sub(r'\s+', ' ', text).strip()
 
10
  return text
11
 
 
 
 
12
  class TextInput(BaseModel):
13
+ text: Annotated[
14
+ Union[str, List[str]],
15
+ Field(..., title="Input text(s)", description="Single string or list of strings")
16
+ ]
17
 
18
  @field_validator("text")
19
+ def validate_text(cls, value):
20
+ if isinstance(value, str):
21
+ value = value.strip()
22
+ if not value:
23
+ raise ValueError("String input cannot be empty.")
24
+ elif isinstance(value, list):
25
+ if not value:
26
+ raise ValueError("List input cannot be empty.")
27
+ for i, v in enumerate(value):
28
+ if not isinstance(v, str) or not v.strip():
29
+ raise ValueError(f"Item {i} in list is not a valid non-empty string.")
30
+ else:
31
+ raise TypeError("Input must be a string or a list of strings.")
32
  return value
33
 
34
+ # Correct model validator for Pydantic v2
35
  @model_validator(mode="after")
36
+ def clean_text_after(model):
37
+ if isinstance(model.text, str):
38
+ model.text = clean_text(model.text)
39
+ else:
40
+ model.text = [clean_text(t) for t in model.text]
41
+ return model
42
 
43
  model_config = {
44
  "json_schema_extra": {
45
  "examples": [
46
+ {"text": "Where can I get a new water connection?"},
47
+ {"text": ["Where can I get a new water connection?", "My streetlight is broken."]}
48
  ]
49
  }
50
  }
51
 
 
52
  # Response schema
53
+
54
  class UrgencyClassificationOutput(BaseModel):
55
  label: str = Field(..., description="Top predicted urgency label")
56
  confidence: float = Field(..., ge=0, le=1, description="Confidence score for top label")