Spaces:
Sleeping
Sleeping
| """ | |
| NLU — Hybrid Hausa intent + entity extraction. | |
| Three-tier architecture: | |
| 1. Rule-based keyword matcher (fast path, ~80% of demo utterances) | |
| 2. Qwen2.5-1.5B-Instruct zero-shot JSON extractor (paraphrases, novel phrasings) | |
| 3. Rule-based fallback (if LLM fails or returns unparseable output) | |
| The LLM is lazy-loaded on first non-matched utterance so the Space boots fast. | |
| In production this would be replaced with a fine-tuned classifier on | |
| PlotWeaver's Hausa intent corpus. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| import json | |
| import logging | |
| from typing import Optional | |
| logger = logging.getLogger("plotweaver.nlu") | |
| # --------------------------------------------------------------------------- | |
| # Layer 1: rule-based fast path (covers common demo phrases) | |
| # --------------------------------------------------------------------------- | |
| INTENT_KEYWORDS = { | |
| "check_balance": ["duba", "ma'auni", "balance", "kudi", "asusu"], | |
| "block_card": ["toshe", "kati", "block"], | |
| "transfer_money": ["tura", "canji", "canjin", "aika", "transfer"], | |
| "buy_airtime": ["airtime", "caji"], | |
| "buy_bundle": ["bundle", "data", "intanet"], | |
| "complaint": ["korafi", "matsala", "complain"], | |
| "check_order": ["bincika", "order", "oda"], | |
| "reschedule": ["sake tsara", "reschedule", "canja lokaci"], | |
| "return_item": ["mayar", "mayarwa", "return"], | |
| "human_agent": ["mutum", "wakili", "agent", "human"], | |
| "yes": ["i ", " i", "eh", "haka ne", "yes", "ok", "okay"], | |
| "no": ["a'a", "a'aa", "ba haka", " no", "no "], | |
| } | |
| WORD_DIGITS = { | |
| "sifili": "0", "daya": "1", "ɗaya": "1", "biyu": "2", "uku": "3", | |
| "hudu": "4", "huɗu": "4", "biyar": "5", "shida": "6", "bakwai": "7", | |
| "takwas": "8", "tara": "9", | |
| } | |
| WORD_AMOUNTS = { | |
| "dubu goma": 10000, "dubu biyar": 5000, "dubu biyu": 2000, | |
| "dubu": 1000, "ɗari biyar": 500, "dari biyar": 500, | |
| "ɗari": 100, "dari": 100, | |
| } | |
| def _norm(t: str) -> str: | |
| return " " + t.lower().strip() + " " | |
| def _match_intent_kw(text: str) -> Optional[str]: | |
| t = _norm(text) | |
| for intent, kws in INTENT_KEYWORDS.items(): | |
| for kw in kws: | |
| if kw in t: | |
| return intent | |
| return None | |
| def _extract_digits(text: str) -> Optional[str]: | |
| m = re.findall(r"\d+", text) | |
| if m: | |
| return "".join(m) | |
| tokens = text.lower().split() | |
| d = [WORD_DIGITS[tok] for tok in tokens if tok in WORD_DIGITS] | |
| return "".join(d) if d else None | |
| def _extract_amount(text: str) -> Optional[int]: | |
| m = re.search(r"\d+", text) | |
| if m: | |
| return int(m.group()) | |
| t = text.lower() | |
| for phrase in sorted(WORD_AMOUNTS.keys(), key=len, reverse=True): | |
| if phrase in t: | |
| return WORD_AMOUNTS[phrase] | |
| return None | |
| def _rule_based_parse(text: str, expected: Optional[str]) -> tuple[str, dict]: | |
| """Layer 1 + 3: deterministic keyword + slot matcher.""" | |
| entities: dict = {} | |
| if not text or not text.strip(): | |
| return "unknown", entities | |
| # Universal escape | |
| if _match_intent_kw(text) == "human_agent": | |
| return "human_agent", entities | |
| if expected == "digits": | |
| d = _extract_digits(text) | |
| if d: | |
| entities["digits"] = d | |
| return "provide_digits", entities | |
| if expected == "amount": | |
| a = _extract_amount(text) | |
| if a is not None: | |
| entities["amount"] = a | |
| return "provide_amount", entities | |
| if expected == "name": | |
| name = text.strip().split()[-1] if text.strip() else "" | |
| if name: | |
| entities["name"] = name | |
| return "provide_name", entities | |
| if expected == "date": | |
| entities["date"] = text.strip() | |
| return "provide_date", entities | |
| if expected == "bundle": | |
| t = text.lower() | |
| for b in ("rana", "mako", "wata"): | |
| if b in t: | |
| entities["bundle"] = b | |
| return "provide_bundle", entities | |
| if expected == "text": | |
| entities["text"] = text.strip() | |
| return "provide_text", entities | |
| if expected == "yesno": | |
| i = _match_intent_kw(text) | |
| if i in ("yes", "no"): | |
| return i, entities | |
| i = _match_intent_kw(text) | |
| if i: | |
| return i, entities | |
| return "unknown", entities | |
| # --------------------------------------------------------------------------- | |
| # Layer 2: Qwen2.5-1.5B-Instruct zero-shot NLU | |
| # --------------------------------------------------------------------------- | |
| _llm_model = None | |
| _llm_tokenizer = None | |
| _llm_failed = False # set to True after any load failure, to prevent retries | |
| def _load_llm(): | |
| """Lazy-load Qwen2.5-1.5B-Instruct. Called only when rule-based misses.""" | |
| global _llm_model, _llm_tokenizer, _llm_failed | |
| if _llm_failed: | |
| return None, None | |
| if _llm_model is not None: | |
| return _llm_model, _llm_tokenizer | |
| try: | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| logger.info("Loading Qwen2.5-1.5B-Instruct for NLU…") | |
| model_id = "Qwen/Qwen2.5-1.5B-Instruct" | |
| _llm_tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| _llm_model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float32, # CPU — bfloat16 not broadly supported | |
| low_cpu_mem_usage=True, | |
| ) | |
| _llm_model.eval() | |
| logger.info("Qwen2.5-1.5B-Instruct ready.") | |
| return _llm_model, _llm_tokenizer | |
| except Exception as e: | |
| logger.warning(f"LLM load failed: {e}") | |
| _llm_failed = True | |
| return None, None | |
| # Candidate intents per expected-slot context. Keeps the LLM prompt small | |
| # and constrains output to valid options only. | |
| CANDIDATE_INTENTS = { | |
| None: ["check_balance", "block_card", "transfer_money", | |
| "buy_airtime", "buy_bundle", "complaint", | |
| "check_order", "reschedule", "return_item", | |
| "human_agent", "unknown"], | |
| "intent": ["check_balance", "block_card", "transfer_money", | |
| "buy_airtime", "buy_bundle", "complaint", | |
| "check_order", "reschedule", "return_item", | |
| "human_agent", "unknown"], | |
| "yesno": ["yes", "no", "human_agent", "unknown"], | |
| "digits": ["provide_digits", "human_agent", "unknown"], | |
| "amount": ["provide_amount", "human_agent", "unknown"], | |
| "name": ["provide_name", "human_agent", "unknown"], | |
| "date": ["provide_date", "human_agent", "unknown"], | |
| "bundle": ["provide_bundle", "human_agent", "unknown"], | |
| "text": ["provide_text", "human_agent", "unknown"], | |
| } | |
| SYSTEM_PROMPT = """You are an intent classifier for a Hausa-language customer service voice agent. | |
| Analyze the user's Hausa utterance and return a JSON object with: | |
| - "intent": one of the candidate intents provided | |
| - "entities": a dict of extracted values (may be empty) | |
| Intent meanings: | |
| - check_balance: user wants to check their account balance | |
| - block_card: user wants to block or freeze their bank card | |
| - transfer_money: user wants to transfer or send money | |
| - buy_airtime: user wants to buy phone airtime | |
| - buy_bundle: user wants to buy a data bundle | |
| - complaint: user wants to file a complaint | |
| - check_order: user wants to check an order status | |
| - reschedule: user wants to reschedule a delivery | |
| - return_item: user wants to return an item | |
| - human_agent: user wants to speak to a human | |
| - yes / no: affirmative or negative response | |
| - provide_digits / provide_amount / provide_name / provide_date / provide_bundle / provide_text: user is providing specific information | |
| - unknown: cannot determine the intent | |
| Return ONLY a valid JSON object, no explanation. Example: {"intent": "check_balance", "entities": {}}""" | |
| def _llm_parse(text: str, expected: Optional[str]) -> Optional[tuple[str, dict]]: | |
| """Layer 2: zero-shot LLM classification. Returns None on any failure.""" | |
| model, tokenizer = _load_llm() | |
| if model is None: | |
| return None | |
| candidates = CANDIDATE_INTENTS.get(expected, CANDIDATE_INTENTS[None]) | |
| user_prompt = ( | |
| f'Hausa utterance: "{text}"\n' | |
| f'Expected slot type: {expected or "any"}\n' | |
| f'Candidate intents: {", ".join(candidates)}\n\n' | |
| 'Respond with JSON only.' | |
| ) | |
| messages = [ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| try: | |
| import torch | |
| prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| with torch.no_grad(): | |
| out = model.generate( | |
| **inputs, | |
| max_new_tokens=80, | |
| do_sample=False, | |
| pad_token_id=tokenizer.eos_token_id, | |
| ) | |
| generated = tokenizer.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip() | |
| logger.info(f"LLM raw output: {generated}") | |
| # Extract JSON (model sometimes wraps it in markdown fences or prose) | |
| m = re.search(r"\{.*?\}", generated, re.DOTALL) | |
| if not m: | |
| return None | |
| parsed = json.loads(m.group()) | |
| intent = parsed.get("intent", "unknown") | |
| entities = parsed.get("entities", {}) or {} | |
| if not isinstance(entities, dict): | |
| entities = {} | |
| # Validate intent is in candidate list | |
| if intent not in candidates: | |
| logger.info(f"LLM returned out-of-candidate intent: {intent}") | |
| return None | |
| return intent, entities | |
| except Exception as e: | |
| logger.warning(f"LLM inference failed: {e}") | |
| return None | |
| # --------------------------------------------------------------------------- | |
| # Public API | |
| # --------------------------------------------------------------------------- | |
| def parse(text: str, expected: Optional[str] = None, | |
| use_llm: bool = True) -> tuple[str, dict, str]: | |
| """ | |
| Hybrid NLU. Returns (intent, entities, source) where source is one of | |
| 'rule', 'llm', or 'rule_fallback'. | |
| Flow: | |
| 1. Try rule-based keyword/slot matcher (fast, deterministic) | |
| 2. If result is 'unknown' AND use_llm=True: try Qwen2.5 zero-shot | |
| 3. If LLM fails or returns invalid output: return rule-based 'unknown' | |
| """ | |
| intent, entities = _rule_based_parse(text, expected) | |
| if intent != "unknown": | |
| return intent, entities, "rule" | |
| if not use_llm: | |
| return intent, entities, "rule" | |
| # Rule-based missed — try LLM | |
| llm_result = _llm_parse(text, expected) | |
| if llm_result is None: | |
| return intent, entities, "rule_fallback" | |
| llm_intent, llm_entities = llm_result | |
| # Sanity-check entities for slot-typed expected (LLM might hallucinate | |
| # digits; re-run our deterministic extractors for strict-format slots) | |
| if expected == "digits": | |
| d = _extract_digits(text) | |
| if d: | |
| llm_entities["digits"] = d | |
| elif expected == "amount": | |
| a = _extract_amount(text) | |
| if a is not None: | |
| llm_entities["amount"] = a | |
| return llm_intent, llm_entities, "llm" | |