Spaces:
Running
Running
| """LLM extraction: messy voice-note text -> structured order records. | |
| Runs on the unified Gemma 4 model (see gemma.py). Gemma's `transformers` path has no | |
| GBNF grammar, so instead of a hard guarantee we ask for pure JSON, parse it robustly | |
| (stripping code fences / surrounding prose), and retry once on failure. The editable | |
| confirm screen is the final safety net if a field still comes out wrong. | |
| A single note may describe several customers, so the schema is a list of orders. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| import gemma | |
| import llama_backend | |
| from config import GEMMA_MAX_NEW_TOKENS | |
| # Fields every normalized order dict carries. (Storage keys are kept generic so the | |
| # DB schema is stable; their food meaning: services=items ordered, | |
| # next_appointment=pickup/delivery, supplies=ingredients to buy.) | |
| JOB_FIELDS = ( | |
| "customer", "services", "amount_charged", "amount_paid", | |
| "payment_method", "next_appointment", "supplies", "notes", | |
| ) | |
| # JSON schema -> GBNF grammar. All fields required so no key can ever go missing; | |
| # unknown numbers become 0, unknown strings "", empty lists []. payment_method is | |
| # an enum so the model can't free-type it. | |
| EXTRACTION_SCHEMA = { | |
| "type": "object", | |
| "properties": { | |
| "jobs": { | |
| "type": "array", | |
| "items": { | |
| "type": "object", | |
| "properties": { | |
| "customer": {"type": "string"}, | |
| "services": {"type": "array", "items": {"type": "string"}}, | |
| "amount_charged": {"type": "number"}, | |
| "amount_paid": {"type": "number"}, | |
| "payment_method": {"type": "string", "enum": ["cash", "card", "transfer", ""]}, | |
| "next_appointment": {"type": "string"}, | |
| "supplies": {"type": "array", "items": {"type": "string"}}, | |
| "notes": {"type": "string"}, | |
| }, | |
| "required": list(JOB_FIELDS), | |
| }, | |
| } | |
| }, | |
| "required": ["jobs"], | |
| } | |
| _SYSTEM = """You convert a home-based food maker's messy spoken voice note into structured ORDER records (custom cakes, party trays, festive treats, catering). | |
| The JSON keys are generic; fill them with food-order meaning: | |
| - customer = who the order is for. | |
| - services = the ITEMS ordered, each as a short string with quantity (e.g. "2 dozen pineapple tarts"). | |
| - amount_charged = the TOTAL price of the order. | |
| - amount_paid = how much they've paid so far (a deposit, or the full amount). | |
| • "80, paid 20 deposit" -> amount_charged 80, amount_paid 20. | |
| • "45, paid already" / "paid cash 45" -> amount_charged 45, amount_paid 45. | |
| • "200, pay on delivery" -> amount_charged 200, amount_paid 0. | |
| - payment_method = exactly one of: cash, card, transfer, or "" if not said. | |
| - next_appointment = the PICKUP / DELIVERY time in her own words ("Saturday", "tomorrow evening"). | |
| - supplies = INGREDIENTS she needs to buy for the order. | |
| - notes = short reminders that don't fit other fields. | |
| Rules: | |
| - A note may contain ONE or SEVERAL orders. Return one entry per order. | |
| - Unknown number -> 0. Unknown text -> "". Nothing to buy -> []. | |
| - Never invent data that isn't in the note. | |
| Return ONLY the JSON object.""" | |
| # Few-shot examples: deposit+balance, paid-in-full, unpaid catering, multi-order. | |
| _FEWSHOT = [ | |
| ( | |
| "Mrs Tan ordered two dozen pineapple tarts and a kaya cake, eighty dollars " | |
| "total, she paid twenty deposit by transfer, picking up Saturday. I need " | |
| "more butter and pandan leaves.", | |
| {"jobs": [{ | |
| "customer": "Mrs Tan", | |
| "services": ["2 dozen pineapple tarts", "Kaya cake"], | |
| "amount_charged": 80, "amount_paid": 20, | |
| "payment_method": "transfer", "next_appointment": "Saturday", | |
| "supplies": ["butter", "pandan leaves"], "notes": "", | |
| }]}, | |
| ), | |
| ( | |
| "David wants a chocolate birthday cake for forty-five, paid cash already, " | |
| "collecting tomorrow evening.", | |
| {"jobs": [{ | |
| "customer": "David", | |
| "services": ["Chocolate birthday cake"], | |
| "amount_charged": 45, "amount_paid": 45, | |
| "payment_method": "cash", "next_appointment": "tomorrow evening", | |
| "supplies": [], "notes": "", | |
| }]}, | |
| ), | |
| ( | |
| "Catering for the Lim office party, fifty curry puffs and three trays of " | |
| "nasi lemak, two hundred dollars, they'll pay on delivery Friday. Order more " | |
| "chicken and curry powder.", | |
| {"jobs": [{ | |
| "customer": "Lim office party", | |
| "services": ["50 curry puffs", "3 trays nasi lemak"], | |
| "amount_charged": 200, "amount_paid": 0, | |
| "payment_method": "", "next_appointment": "Friday", | |
| "supplies": ["chicken", "curry powder"], "notes": "", | |
| }]}, | |
| ), | |
| ( | |
| "Auntie Siti picking up four dozen kueh lapis later, sixty dollars cash done. " | |
| "And Mr Goh ordered a durian cake for next Sunday, ninety, paid thirty deposit card.", | |
| {"jobs": [ | |
| {"customer": "Auntie Siti", "services": ["4 dozen kueh lapis"], | |
| "amount_charged": 60, "amount_paid": 60, "payment_method": "cash", | |
| "next_appointment": "later today", "supplies": [], "notes": ""}, | |
| {"customer": "Mr Goh", "services": ["Durian cake"], | |
| "amount_charged": 90, "amount_paid": 30, "payment_method": "card", | |
| "next_appointment": "next Sunday", "supplies": [], "notes": ""}, | |
| ]}, | |
| ), | |
| ] | |
| def _build_messages(text: str) -> list[dict]: | |
| messages = [{"role": "system", "content": _SYSTEM}] | |
| for user_text, answer in _FEWSHOT: | |
| messages.append({"role": "user", "content": user_text}) | |
| messages.append({"role": "assistant", "content": json.dumps(answer, ensure_ascii=False)}) | |
| messages.append({"role": "user", "content": text}) | |
| return messages | |
| def _to_float(x) -> float: | |
| try: | |
| return round(float(x), 2) | |
| except (TypeError, ValueError): | |
| return 0.0 | |
| def _normalize(job: dict) -> dict: | |
| services = [str(s).strip() for s in (job.get("services") or []) if str(s).strip()] | |
| supplies = [str(s).strip() for s in (job.get("supplies") or []) if str(s).strip()] | |
| method = str(job.get("payment_method") or "").strip().lower() | |
| if method not in {"cash", "card", "transfer", ""}: | |
| method = "" | |
| return { | |
| "customer": str(job.get("customer") or "Customer").strip() or "Customer", | |
| "services": services, | |
| "amount_charged": _to_float(job.get("amount_charged")), | |
| "amount_paid": _to_float(job.get("amount_paid")), | |
| "payment_method": method, | |
| "next_appointment": str(job.get("next_appointment") or "").strip(), | |
| "supplies": supplies, | |
| "notes": str(job.get("notes") or "").strip(), | |
| } | |
| def _parse_jobs(content: str) -> list[dict] | None: | |
| """Pull the jobs list out of a model reply. Tolerates ```json fences and prose | |
| around the object. Returns None if no valid JSON object is found.""" | |
| if not content: | |
| return None | |
| # Strip code fences, then grab the outermost {...} block. | |
| cleaned = re.sub(r"^```(?:json)?|```$", "", content.strip(), flags=re.MULTILINE).strip() | |
| start, end = cleaned.find("{"), cleaned.rfind("}") | |
| if start == -1 or end == -1 or end <= start: | |
| return None | |
| try: | |
| data = json.loads(cleaned[start:end + 1]) | |
| except (json.JSONDecodeError, TypeError): | |
| return None | |
| jobs = data.get("jobs") if isinstance(data, dict) else None | |
| return jobs if isinstance(jobs, list) else None | |
| def extract(text: str) -> list[dict]: | |
| """Extract a list of normalized order dicts from a transcript. Never raises on a | |
| bad model response — returns [] so the UI can show a friendly message. | |
| Primary path: llama.cpp with a grammar (JSON is guaranteed valid). | |
| Fallback: Gemma via transformers (no grammar -> robust parse + one retry).""" | |
| text = (text or "").strip() | |
| if not text: | |
| return [] | |
| messages = _build_messages(text) | |
| # --- Primary: llama.cpp + GBNF grammar --- | |
| try: | |
| content = llama_backend.extract_json(messages, EXTRACTION_SCHEMA) | |
| jobs = _parse_jobs(content) | |
| if jobs is not None: | |
| return [_normalize(j) for j in jobs if isinstance(j, dict)] | |
| except llama_backend.BackendUnavailable: | |
| pass # fall through to the transformers backend | |
| # --- Fallback: Gemma (transformers) --- | |
| content = gemma.generate_chat(messages, max_new_tokens=GEMMA_MAX_NEW_TOKENS) | |
| jobs = _parse_jobs(content) | |
| if jobs is None: # one corrective retry — common fix is stray prose around the JSON | |
| messages = messages + [ | |
| {"role": "assistant", "content": content}, | |
| {"role": "user", "content": "Output ONLY the JSON object, no prose, no code fences."}, | |
| ] | |
| content = gemma.generate_chat(messages, max_new_tokens=GEMMA_MAX_NEW_TOKENS) | |
| jobs = _parse_jobs(content) | |
| if not jobs: | |
| return [] | |
| return [_normalize(j) for j in jobs if isinstance(j, dict)] | |