File size: 3,795 Bytes
2fe23a7
 
 
8b96875
2fe23a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import re
from typing import List, Dict, Optional, Tuple
from slugify import slugify
from config import get_db

# -------------------------
# Helpers
# -------------------------
def normalize_term(s: str) -> str:
    return re.sub(r"\s+", " ", (s or "").strip().lower())

def product_id_from_name(name: str) -> str:
    return slugify(normalize_term(name))

def load_alias_map() -> Tuple[Dict[str, Dict], Dict[str, Dict]]:
    """
    Reads all docs in 'products' and returns:
      alias2prod: { "amatamatisi": {"id": "tomatoes", "name": "Tomatoes"}, ... }
      prod_by_id: { "tomatoes": {"id": "tomatoes", "name": "Tomatoes"} }
    """
    db = get_db()
    alias2prod, prod_by_id = {}, {}
    for snap in db.collection("products").stream():
        pid = snap.id
        data = snap.to_dict() or {}
        name = data.get("name") or pid
        aliases = set([name] + list(data.get("aliases", [])))
        for a in aliases:
            alias2prod[normalize_term(a)] = {"id": pid, "name": name}
        prod_by_id[pid] = {"id": pid, "name": name}
    return alias2prod, prod_by_id

NUM_WORDS_EN = {
    "one":1,"two":2,"three":3,"four":4,"five":5,"six":6,"seven":7,"eight":8,"nine":9,"ten":10,
}
def parse_quantity(text: str) -> Optional[int]:
    t = normalize_term(text)
    # isiZulu pattern like "ezi-3" or standalone digits
    m = re.search(r"(?:ezi-)?(\d{1,6})\b", t.replace(",", ""))
    if m:
        return int(m.group(1))
    for w, n in NUM_WORDS_EN.items():
        if re.search(rf"\b{w}\b", t):
            return n
    return None

def guess_product_from_phrase(phrase: str) -> str:
    t = normalize_term(phrase)
    m = re.search(r"\bof\s+([a-zA-Z][a-zA-Z ]+)$", t)  # "loaves of bread" -> bread
    if m:
        return m.group(1).strip()
    tokens = [x for x in re.split(r"[^a-zA-Z]+", t) if x]
    if not tokens:
        return t
    guess = " ".join(tokens[-2:]) if len(tokens) >= 2 else tokens[-1]
    return guess

BUY_PAT = re.compile(
    r"\b(buy|bought|purchase|purchased|ngithenge|ke\s+rekile)\b", re.IGNORECASE
)
SELL_PAT = re.compile(
    r"\b(sell|sold|ngithengise|ke\s+rekisitse)\b", re.IGNORECASE
)

def detect_intent(text: str) -> str:
    if BUY_PAT.search(text): return "buy"
    if SELL_PAT.search(text): return "sell"
    return "unknown"

def split_phrases(raw: str) -> List[str]:
    t = normalize_term(raw)
    parts = re.split(r"\s*,\s*|\s+and\s+|\s+no\s+|\s+le\s+|\s+na\s+", t)  # 'and' (EN), 'no' (zu), 'le/na' (st)
    return [p for p in parts if p]

def best_alias_match(part: str, alias2prod: Dict[str, Dict]) -> Optional[Dict]:
    t = f" {normalize_term(part)} "
    best = None
    for alias, meta in alias2prod.items():
        if re.search(rf"\b{re.escape(alias)}\b", t):
            if best is None or len(alias) > len(best["alias"]):
                best = {"alias": alias, "id": meta["id"], "name": meta["name"]}
    return best

def extract_items_from_text(text: str, alias2prod: Dict[str, Dict]) -> List[Dict]:
    """
    Returns [{ name, resolvedId (optional), quantity (optional) }]
    """
    items = []
    t = normalize_term(text)
    t = re.sub(r"^\b(i\s+)?(bought|buy|purchased|ngithenge|ke\s+rekile)\b[\s,:-]*", "", t)
    for part in split_phrases(t):
        qty = parse_quantity(part)
        match = best_alias_match(part, alias2prod)
        if match:
            items.append({"name": match["name"].title(), "resolvedId": match["id"], "quantity": qty})
        else:
            guess = guess_product_from_phrase(part)
            items.append({"name": guess.title(), "quantity": qty})
    return items

def interpret_message(text: str) -> Dict:
    intent = detect_intent(text)
    alias2prod, _ = load_alias_map()
    items = extract_items_from_text(text, alias2prod)
    return {"intent": intent, "items": items}