Spaces:
Sleeping
Sleeping
| import json | |
| import sys | |
| import unicodedata | |
| from pydantic import ValidationError, BaseModel | |
| from tinytroupe.utils import logger | |
| ################################################################################ | |
| # Validation | |
| ################################################################################ | |
| def check_valid_fields(obj: dict, valid_fields: list) -> None: | |
| """ | |
| Checks whether the fields in the specified dict are valid, according to the list of valid fields. If not, raises a ValueError. | |
| """ | |
| for key in obj: | |
| if key not in valid_fields: | |
| raise ValueError(f"Invalid key {key} in dictionary. Valid keys are: {valid_fields}") | |
| def sanitize_raw_string(value: str) -> str: | |
| """ | |
| Sanitizes the specified string by: | |
| - removing any invalid characters. | |
| - ensuring it is not longer than the maximum Python string length. | |
| This is for an abundance of caution with security, to avoid any potential issues with the string. | |
| """ | |
| # remove any invalid characters by making sure it is a valid UTF-8 string | |
| value = value.encode("utf-8", "ignore").decode("utf-8") | |
| value = unicodedata.normalize("NFC", value) | |
| # ensure it is not longer than the maximum Python string length | |
| return value[:sys.maxsize] | |
| def sanitize_dict(value: dict) -> dict: | |
| """ | |
| Sanitizes the specified dictionary by: | |
| - removing any invalid characters. | |
| - ensuring that the dictionary is not too deeply nested. | |
| """ | |
| # sanitize the string representation of the dictionary | |
| for k, v in value.items(): | |
| if isinstance(v, str): | |
| value[k] = sanitize_raw_string(v) | |
| # ensure that the dictionary is not too deeply nested | |
| return value | |
| def to_pydantic_or_sanitized_dict(value: dict, model: BaseModel=None) -> dict: | |
| """ | |
| Converts the specified model response dictionary to a Pydantic model instance, or sanitizes it if the model is not valid. | |
| It is assumed that the dict contains the `content` key. | |
| """ | |
| if model is not None and (isinstance(model, type) and issubclass(model, BaseModel)): | |
| # If a model is provided, try to validate the value against the model | |
| try: | |
| res = model.model_validate(sanitize_dict(json.loads(value['content']))) | |
| return res | |
| except ValidationError as e: | |
| logger.warning(f"Validation error: {e}") | |
| return sanitize_dict(value) | |
| else: | |
| return sanitize_dict(value) # If no model, just sanitize the dict | |