Spaces:
Sleeping
Sleeping
| import re | |
| from typing import List, Dict, Optional, Tuple | |
| from slugify import slugify | |
| from config import get_db | |
| # ------------------------- | |
| # Helpers | |
| # ------------------------- | |
| def normalize_term(s: str) -> str: | |
| return re.sub(r"\s+", " ", (s or "").strip().lower()) | |
| def product_id_from_name(name: str) -> str: | |
| return slugify(normalize_term(name)) | |
| def load_alias_map() -> Tuple[Dict[str, Dict], Dict[str, Dict]]: | |
| """ | |
| Reads all docs in 'products' and returns: | |
| alias2prod: { "amatamatisi": {"id": "tomatoes", "name": "Tomatoes"}, ... } | |
| prod_by_id: { "tomatoes": {"id": "tomatoes", "name": "Tomatoes"} } | |
| """ | |
| db = get_db() | |
| alias2prod, prod_by_id = {}, {} | |
| for snap in db.collection("products").stream(): | |
| pid = snap.id | |
| data = snap.to_dict() or {} | |
| name = data.get("name") or pid | |
| aliases = set([name] + list(data.get("aliases", []))) | |
| for a in aliases: | |
| alias2prod[normalize_term(a)] = {"id": pid, "name": name} | |
| prod_by_id[pid] = {"id": pid, "name": name} | |
| return alias2prod, prod_by_id | |
| NUM_WORDS_EN = { | |
| "one":1,"two":2,"three":3,"four":4,"five":5,"six":6,"seven":7,"eight":8,"nine":9,"ten":10, | |
| } | |
| def parse_quantity(text: str) -> Optional[int]: | |
| t = normalize_term(text) | |
| # isiZulu pattern like "ezi-3" or standalone digits | |
| m = re.search(r"(?:ezi-)?(\d{1,6})\b", t.replace(",", "")) | |
| if m: | |
| return int(m.group(1)) | |
| for w, n in NUM_WORDS_EN.items(): | |
| if re.search(rf"\b{w}\b", t): | |
| return n | |
| return None | |
| def guess_product_from_phrase(phrase: str) -> str: | |
| t = normalize_term(phrase) | |
| m = re.search(r"\bof\s+([a-zA-Z][a-zA-Z ]+)$", t) # "loaves of bread" -> bread | |
| if m: | |
| return m.group(1).strip() | |
| tokens = [x for x in re.split(r"[^a-zA-Z]+", t) if x] | |
| if not tokens: | |
| return t | |
| guess = " ".join(tokens[-2:]) if len(tokens) >= 2 else tokens[-1] | |
| return guess | |
| BUY_PAT = re.compile( | |
| r"\b(buy|bought|purchase|purchased|ngithenge|ke\s+rekile)\b", re.IGNORECASE | |
| ) | |
| SELL_PAT = re.compile( | |
| r"\b(sell|sold|ngithengise|ke\s+rekisitse)\b", re.IGNORECASE | |
| ) | |
| def detect_intent(text: str) -> str: | |
| if BUY_PAT.search(text): return "buy" | |
| if SELL_PAT.search(text): return "sell" | |
| return "unknown" | |
| def split_phrases(raw: str) -> List[str]: | |
| t = normalize_term(raw) | |
| parts = re.split(r"\s*,\s*|\s+and\s+|\s+no\s+|\s+le\s+|\s+na\s+", t) # 'and' (EN), 'no' (zu), 'le/na' (st) | |
| return [p for p in parts if p] | |
| def best_alias_match(part: str, alias2prod: Dict[str, Dict]) -> Optional[Dict]: | |
| t = f" {normalize_term(part)} " | |
| best = None | |
| for alias, meta in alias2prod.items(): | |
| if re.search(rf"\b{re.escape(alias)}\b", t): | |
| if best is None or len(alias) > len(best["alias"]): | |
| best = {"alias": alias, "id": meta["id"], "name": meta["name"]} | |
| return best | |
| def extract_items_from_text(text: str, alias2prod: Dict[str, Dict]) -> List[Dict]: | |
| """ | |
| Returns [{ name, resolvedId (optional), quantity (optional) }] | |
| """ | |
| items = [] | |
| t = normalize_term(text) | |
| t = re.sub(r"^\b(i\s+)?(bought|buy|purchased|ngithenge|ke\s+rekile)\b[\s,:-]*", "", t) | |
| for part in split_phrases(t): | |
| qty = parse_quantity(part) | |
| match = best_alias_match(part, alias2prod) | |
| if match: | |
| items.append({"name": match["name"].title(), "resolvedId": match["id"], "quantity": qty}) | |
| else: | |
| guess = guess_product_from_phrase(part) | |
| items.append({"name": guess.title(), "quantity": qty}) | |
| return items | |
| def interpret_message(text: str) -> Dict: | |
| intent = detect_intent(text) | |
| alias2prod, _ = load_alias_map() | |
| items = extract_items_from_text(text, alias2prod) | |
| return {"intent": intent, "items": items} | |