Qx-Vision

Sleeping

App Files Files Community

Qx-Vision / parser.py

yoursdvniel

Update parser.py

8b96875 verified 5 months ago

raw

history blame contribute delete

3.8 kB

	import re
	from typing import List, Dict, Optional, Tuple
	from slugify import slugify
	from config import get_db

	# -------------------------
	# Helpers
	# -------------------------
	def normalize_term(s: str) -> str:
	return re.sub(r"\s+", " ", (s or "").strip().lower())

	def product_id_from_name(name: str) -> str:
	return slugify(normalize_term(name))

	def load_alias_map() -> Tuple[Dict[str, Dict], Dict[str, Dict]]:
	"""
	Reads all docs in 'products' and returns:
	alias2prod: { "amatamatisi": {"id": "tomatoes", "name": "Tomatoes"}, ... }
	prod_by_id: { "tomatoes": {"id": "tomatoes", "name": "Tomatoes"} }
	"""
	db = get_db()
	alias2prod, prod_by_id = {}, {}
	for snap in db.collection("products").stream():
	pid = snap.id
	data = snap.to_dict() or {}
	name = data.get("name") or pid
	aliases = set([name] + list(data.get("aliases", [])))
	for a in aliases:
	alias2prod[normalize_term(a)] = {"id": pid, "name": name}
	prod_by_id[pid] = {"id": pid, "name": name}
	return alias2prod, prod_by_id

	NUM_WORDS_EN = {
	"one":1,"two":2,"three":3,"four":4,"five":5,"six":6,"seven":7,"eight":8,"nine":9,"ten":10,
	}
	def parse_quantity(text: str) -> Optional[int]:
	t = normalize_term(text)
	# isiZulu pattern like "ezi-3" or standalone digits
	m = re.search(r"(?:ezi-)?(\d{1,6})\b", t.replace(",", ""))
	if m:
	return int(m.group(1))
	for w, n in NUM_WORDS_EN.items():
	if re.search(rf"\b{w}\b", t):
	return n
	return None

	def guess_product_from_phrase(phrase: str) -> str:
	t = normalize_term(phrase)
	m = re.search(r"\bof\s+([a-zA-Z][a-zA-Z ]+)$", t) # "loaves of bread" -> bread
	if m:
	return m.group(1).strip()
	tokens = [x for x in re.split(r"[^a-zA-Z]+", t) if x]
	if not tokens:
	return t
	guess = " ".join(tokens[-2:]) if len(tokens) >= 2 else tokens[-1]
	return guess

	BUY_PAT = re.compile(
	r"\b(buy\|bought\|purchase\|purchased\|ngithenge\|ke\s+rekile)\b", re.IGNORECASE
	)
	SELL_PAT = re.compile(
	r"\b(sell\|sold\|ngithengise\|ke\s+rekisitse)\b", re.IGNORECASE
	)

	def detect_intent(text: str) -> str:
	if BUY_PAT.search(text): return "buy"
	if SELL_PAT.search(text): return "sell"
	return "unknown"

	def split_phrases(raw: str) -> List[str]:
	t = normalize_term(raw)
	parts = re.split(r"\s,\s\|\s+and\s+\|\s+no\s+\|\s+le\s+\|\s+na\s+", t) # 'and' (EN), 'no' (zu), 'le/na' (st)
	return [p for p in parts if p]

	def best_alias_match(part: str, alias2prod: Dict[str, Dict]) -> Optional[Dict]:
	t = f" {normalize_term(part)} "
	best = None
	for alias, meta in alias2prod.items():
	if re.search(rf"\b{re.escape(alias)}\b", t):
	if best is None or len(alias) > len(best["alias"]):
	best = {"alias": alias, "id": meta["id"], "name": meta["name"]}
	return best

	def extract_items_from_text(text: str, alias2prod: Dict[str, Dict]) -> List[Dict]:
	"""
	Returns [{ name, resolvedId (optional), quantity (optional) }]
	"""
	items = []
	t = normalize_term(text)
	t = re.sub(r"^\b(i\s+)?(bought\|buy\|purchased\|ngithenge\|ke\s+rekile)\b[\s,:-]*", "", t)
	for part in split_phrases(t):
	qty = parse_quantity(part)
	match = best_alias_match(part, alias2prod)
	if match:
	items.append({"name": match["name"].title(), "resolvedId": match["id"], "quantity": qty})
	else:
	guess = guess_product_from_phrase(part)
	items.append({"name": guess.title(), "quantity": qty})
	return items

	def interpret_message(text: str) -> Dict:
	intent = detect_intent(text)
	alias2prod, _ = load_alias_map()
	items = extract_items_from_text(text, alias2prod)
	return {"intent": intent, "items": items}