"""LLM extraction: messy voice-note text -> structured order records. Runs on the unified Gemma 4 model (see gemma.py). Gemma's `transformers` path has no GBNF grammar, so instead of a hard guarantee we ask for pure JSON, parse it robustly (stripping code fences / surrounding prose), and retry once on failure. The editable confirm screen is the final safety net if a field still comes out wrong. A single note may describe several customers, so the schema is a list of orders. """ from __future__ import annotations import json import re import gemma import llama_backend from config import GEMMA_MAX_NEW_TOKENS # Fields every normalized order dict carries. (Storage keys are kept generic so the # DB schema is stable; their food meaning: services=items ordered, # next_appointment=pickup/delivery, supplies=ingredients to buy.) JOB_FIELDS = ( "customer", "services", "amount_charged", "amount_paid", "payment_method", "next_appointment", "supplies", "notes", ) # JSON schema -> GBNF grammar. All fields required so no key can ever go missing; # unknown numbers become 0, unknown strings "", empty lists []. payment_method is # an enum so the model can't free-type it. EXTRACTION_SCHEMA = { "type": "object", "properties": { "jobs": { "type": "array", "items": { "type": "object", "properties": { "customer": {"type": "string"}, "services": {"type": "array", "items": {"type": "string"}}, "amount_charged": {"type": "number"}, "amount_paid": {"type": "number"}, "payment_method": {"type": "string", "enum": ["cash", "card", "transfer", ""]}, "next_appointment": {"type": "string"}, "supplies": {"type": "array", "items": {"type": "string"}}, "notes": {"type": "string"}, }, "required": list(JOB_FIELDS), }, } }, "required": ["jobs"], } _SYSTEM = """You convert a home-based food maker's messy spoken voice note into structured ORDER records (custom cakes, party trays, festive treats, catering). The JSON keys are generic; fill them with food-order meaning: - customer = who the order is for. - services = the ITEMS ordered, each as a short string with quantity (e.g. "2 dozen pineapple tarts"). - amount_charged = the TOTAL price of the order. - amount_paid = how much they've paid so far (a deposit, or the full amount). • "80, paid 20 deposit" -> amount_charged 80, amount_paid 20. • "45, paid already" / "paid cash 45" -> amount_charged 45, amount_paid 45. • "200, pay on delivery" -> amount_charged 200, amount_paid 0. - payment_method = exactly one of: cash, card, transfer, or "" if not said. - next_appointment = the PICKUP / DELIVERY time in her own words ("Saturday", "tomorrow evening"). - supplies = INGREDIENTS she needs to buy for the order. - notes = short reminders that don't fit other fields. Rules: - A note may contain ONE or SEVERAL orders. Return one entry per order. - Unknown number -> 0. Unknown text -> "". Nothing to buy -> []. - Never invent data that isn't in the note. Return ONLY the JSON object.""" # Few-shot examples: deposit+balance, paid-in-full, unpaid catering, multi-order. _FEWSHOT = [ ( "Mrs Tan ordered two dozen pineapple tarts and a kaya cake, eighty dollars " "total, she paid twenty deposit by transfer, picking up Saturday. I need " "more butter and pandan leaves.", {"jobs": [{ "customer": "Mrs Tan", "services": ["2 dozen pineapple tarts", "Kaya cake"], "amount_charged": 80, "amount_paid": 20, "payment_method": "transfer", "next_appointment": "Saturday", "supplies": ["butter", "pandan leaves"], "notes": "", }]}, ), ( "David wants a chocolate birthday cake for forty-five, paid cash already, " "collecting tomorrow evening.", {"jobs": [{ "customer": "David", "services": ["Chocolate birthday cake"], "amount_charged": 45, "amount_paid": 45, "payment_method": "cash", "next_appointment": "tomorrow evening", "supplies": [], "notes": "", }]}, ), ( "Catering for the Lim office party, fifty curry puffs and three trays of " "nasi lemak, two hundred dollars, they'll pay on delivery Friday. Order more " "chicken and curry powder.", {"jobs": [{ "customer": "Lim office party", "services": ["50 curry puffs", "3 trays nasi lemak"], "amount_charged": 200, "amount_paid": 0, "payment_method": "", "next_appointment": "Friday", "supplies": ["chicken", "curry powder"], "notes": "", }]}, ), ( "Auntie Siti picking up four dozen kueh lapis later, sixty dollars cash done. " "And Mr Goh ordered a durian cake for next Sunday, ninety, paid thirty deposit card.", {"jobs": [ {"customer": "Auntie Siti", "services": ["4 dozen kueh lapis"], "amount_charged": 60, "amount_paid": 60, "payment_method": "cash", "next_appointment": "later today", "supplies": [], "notes": ""}, {"customer": "Mr Goh", "services": ["Durian cake"], "amount_charged": 90, "amount_paid": 30, "payment_method": "card", "next_appointment": "next Sunday", "supplies": [], "notes": ""}, ]}, ), ] def _build_messages(text: str) -> list[dict]: messages = [{"role": "system", "content": _SYSTEM}] for user_text, answer in _FEWSHOT: messages.append({"role": "user", "content": user_text}) messages.append({"role": "assistant", "content": json.dumps(answer, ensure_ascii=False)}) messages.append({"role": "user", "content": text}) return messages def _to_float(x) -> float: try: return round(float(x), 2) except (TypeError, ValueError): return 0.0 def _normalize(job: dict) -> dict: services = [str(s).strip() for s in (job.get("services") or []) if str(s).strip()] supplies = [str(s).strip() for s in (job.get("supplies") or []) if str(s).strip()] method = str(job.get("payment_method") or "").strip().lower() if method not in {"cash", "card", "transfer", ""}: method = "" return { "customer": str(job.get("customer") or "Customer").strip() or "Customer", "services": services, "amount_charged": _to_float(job.get("amount_charged")), "amount_paid": _to_float(job.get("amount_paid")), "payment_method": method, "next_appointment": str(job.get("next_appointment") or "").strip(), "supplies": supplies, "notes": str(job.get("notes") or "").strip(), } def _parse_jobs(content: str) -> list[dict] | None: """Pull the jobs list out of a model reply. Tolerates ```json fences and prose around the object. Returns None if no valid JSON object is found.""" if not content: return None # Strip code fences, then grab the outermost {...} block. cleaned = re.sub(r"^```(?:json)?|```$", "", content.strip(), flags=re.MULTILINE).strip() start, end = cleaned.find("{"), cleaned.rfind("}") if start == -1 or end == -1 or end <= start: return None try: data = json.loads(cleaned[start:end + 1]) except (json.JSONDecodeError, TypeError): return None jobs = data.get("jobs") if isinstance(data, dict) else None return jobs if isinstance(jobs, list) else None def extract(text: str) -> list[dict]: """Extract a list of normalized order dicts from a transcript. Never raises on a bad model response — returns [] so the UI can show a friendly message. Primary path: llama.cpp with a grammar (JSON is guaranteed valid). Fallback: Gemma via transformers (no grammar -> robust parse + one retry).""" text = (text or "").strip() if not text: return [] messages = _build_messages(text) # --- Primary: llama.cpp + GBNF grammar --- try: content = llama_backend.extract_json(messages, EXTRACTION_SCHEMA) jobs = _parse_jobs(content) if jobs is not None: return [_normalize(j) for j in jobs if isinstance(j, dict)] except llama_backend.BackendUnavailable: pass # fall through to the transformers backend # --- Fallback: Gemma (transformers) --- content = gemma.generate_chat(messages, max_new_tokens=GEMMA_MAX_NEW_TOKENS) jobs = _parse_jobs(content) if jobs is None: # one corrective retry — common fix is stray prose around the JSON messages = messages + [ {"role": "assistant", "content": content}, {"role": "user", "content": "Output ONLY the JSON object, no prose, no code fences."}, ] content = gemma.generate_chat(messages, max_new_tokens=GEMMA_MAX_NEW_TOKENS) jobs = _parse_jobs(content) if not jobs: return [] return [_normalize(j) for j in jobs if isinstance(j, dict)]