File size: 4,553 Bytes
037ee88
0bb062b
 
 
 
 
 
 
 
 
 
 
 
037ee88
 
0bb062b
 
 
 
 
 
 
 
 
037ee88
0bb062b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
"""
data_validator.py
-----------------
Validation and normalization utilities for extracted business data.

Used in the orchestrator pipeline between extraction and skill routing:

    extract_fields() β†’ validate_data() β†’ route_to_skill()

The validator normalises:
  - text fields (customer, item, reason)  β†’ stripped, lowercased or title-cased
  - payment_type aliases (gpay β†’ upi, paytm β†’ upi, etc.)
  - numeric fields (amount, quantity)     β†’ int or float, never negative
"""

from __future__ import annotations

import re
from typing import Any


_NUMBER_PATTERN = re.compile(r"-?\d+(?:\.\d+)?")


class DataValidator:
    """Validate and normalize extraction-agent output."""

    def validate(self, intent: str, data: dict[str, Any]) -> dict[str, Any]:
        """Return a cleaned copy of extracted data for the given intent."""
        cleaned = dict(data or {})

        if "customer" in cleaned:
            cleaned["customer"] = self._clean_text(cleaned.get("customer"), title=True)
        if "item" in cleaned:
            cleaned["item"] = self._clean_text(cleaned.get("item"))
        if "reason" in cleaned:
            cleaned["reason"] = self._clean_text(cleaned.get("reason"))
        if "payment_type" in cleaned:
            cleaned["payment_type"] = self._normalize_payment_type(cleaned.get("payment_type"))
        if "amount" in cleaned:
            cleaned["amount"] = self._to_number(cleaned.get("amount"), as_int_if_possible=True)
        if "quantity" in cleaned:
            cleaned["quantity"] = self._to_number(cleaned.get("quantity"), as_int_if_possible=True)

        # Business rules: amounts and quantities must be positive
        if intent == "payment" and cleaned.get("amount") is not None and cleaned["amount"] < 0:
            cleaned["amount"] = abs(cleaned["amount"])
        if (
            intent in {"order", "credit", "preparation"}
            and cleaned.get("quantity") is not None
            and cleaned["quantity"] < 0
        ):
            cleaned["quantity"] = abs(cleaned["quantity"])

        return cleaned

    @staticmethod
    def _clean_text(value: Any, *, title: bool = False) -> str | None:
        if value is None:
            return None
        text = str(value).strip()
        if not text:
            return None
        text = re.sub(r"\s+", " ", text)
        return text.title() if title else text.lower()

    @staticmethod
    def _normalize_payment_type(value: Any) -> str | None:
        text = DataValidator._clean_text(value)
        if text is None:
            return None

        aliases = {
            "gpay":          "upi",
            "google pay":    "upi",
            "phonepe":       "upi",
            "phone pe":      "upi",
            "paytm":         "upi",
            "upi":           "upi",
            "cash":          "cash",
            "online":        "online",
            "bank transfer": "online",
            "neft":          "online",
            "imps":          "online",
            "rtgs":          "online",
            "cheque":        "cheque",
            "check":         "cheque",
        }
        return aliases.get(text, text)

    @staticmethod
    def _to_number(value: Any, *, as_int_if_possible: bool = False) -> int | float | None:
        if value is None or value == "":
            return None
        if isinstance(value, (int, float)) and not isinstance(value, bool):
            number = float(value)
        else:
            text  = str(value).replace(",", "").lower()
            match = _NUMBER_PATTERN.search(text)
            if not match:
                return None
            number = float(match.group(0))
        if as_int_if_possible and number.is_integer():
            return int(number)
        return number


# ---------------------------------------------------------------------------
# Convenience wrapper β€” used by the orchestrator
# ---------------------------------------------------------------------------

def validate_data(intent: str, data: dict[str, Any]) -> dict[str, Any]:
    """
    Normalise and validate extracted data for the given intent.

    This is the function the orchestrator imports:

        from validators.data_validator import validate_data
        cleaned = validate_data(intent, raw_data)

    Args:
        intent: Detected intent string (e.g. "payment", "order").
        data:   Raw extraction dict from the Extraction Agent.

    Returns:
        Cleaned, normalised copy of the data dict.
    """
    return DataValidator().validate(intent, data)