Spaces:

PlotweaverAI
/

Voice-AI-Agent-Clean

Sleeping

App Files Files Community

Toadoum commited on 4 days ago

Commit

49b1566

verified ·

1 Parent(s): ae6619f

Update nlu.py

Browse files

Files changed (1) hide show

nlu.py +262 -255

nlu.py CHANGED Viewed

@@ -1,32 +1,29 @@
 """
-NLU — NLLB + Qwen pivot-through-English architecture with keyword fast-path.
-Flow:
-  1. Deterministic structural extractors run FIRST on the original Hausa
-     text (digits, amounts, yes/no keywords). These MUST be deterministic
-     because "1234" → "provide_digits" with digits="1234" is non-negotiable
-     for banks, and regex is faster + more reliable than any model for
-     this sub-task.
-  2. Keyword fast-path for common Hausa + English intent phrases. Matches
-     "check balance", "duba ma'auni", "canjin kuɗi", etc. in <10ms without
-     loading any model. This is what real voice bots use for 90% of turns.
-  3. If structural + keyword layers don't match, the text is translated
-     Hausa → English via NLLB-200 (skipped if input is already English),
-     then classified by Qwen2.5-1.5B in English (where it is strong) into
-     one of a small fixed set of intent labels.
-  4. If NLLB or Qwen fails, we return "unknown" cleanly — the dialogue
-     manager routes to a vertical-specific fallback prompt.
-All heavy models are lazy-loaded on first use. Cold-start downloads:
-  - NLLB-200-distilled-600M: ~2.4 GB
-  - Qwen2.5-1.5B-Instruct: ~3 GB
 """
 from __future__ import annotations
 import re
-import json
 import logging
 from typing import Optional
@@ -48,11 +45,9 @@ WORD_AMOUNTS = {
     "ɗari": 100, "dari": 100,
 }
-# Hausa yes/no keywords for the sole case where we short-circuit Qwen
 HAUSA_YES = {"i", "eh", "haka ne", "haka", "ok", "okay", "yes"}
 HAUSA_NO = {"a'a", "a'aa", "ba haka", "ba", "no"}
-# Human-agent escape hatch
 HUMAN_KEYWORDS = {"mutum", "wakili", "agent", "human"}
@@ -92,10 +87,9 @@ def _contains_human_keyword(text: str) -> bool:
     return any(kw in t for kw in HUMAN_KEYWORDS)
-# Keyword fast-path for common intents. Runs BEFORE NLLB+Qwen so that the
-# scripted demo flows don't require a 6GB LLM load. Phrases are Hausa and
-# English pairs that customers actually use. When none match, we fall
-# through to NLLB+Qwen for paraphrases.
 INTENT_KEYWORDS = {
     "check_balance": [
         "duba ma'auni", "ma'auni", "balance", "check balance",
@@ -136,10 +130,7 @@ INTENT_KEYWORDS = {
 def _match_intent_keyword(text: str) -> Optional[str]:
-    """Keyword fast-path for common customer-service intents.
-    Returns the intent name if a keyword matches, else None."""
     t = text.lower().strip()
-    # Check longer phrases first so "check balance" wins over "check order"
     all_kw = [(intent, kw) for intent, kws in INTENT_KEYWORDS.items() for kw in kws]
     all_kw.sort(key=lambda x: len(x[1]), reverse=True)
     for intent, kw in all_kw:
@@ -148,204 +139,231 @@ def _match_intent_keyword(text: str) -> Optional[str]:
     return None
-def _looks_english(text: str) -> bool:
-    """Heuristic: if text contains no Hausa-specific characters and is majority
-    ASCII, treat as English and skip NLLB translation. Hausa uses ɓ, ɗ, ƙ, ƴ
-    and the apostrophe in 'a'a', 'ma'auni', 'jumma'a' etc."""
-    hausa_chars = set("ɓɗƙƴƁƊƘƳ")
-    if any(c in hausa_chars for c in text):
-        return False
-    # Common Hausa words — if any match, treat as Hausa
-    hausa_markers = {
-        "duba", "ma'auni", "toshe", "kati", "canjin", "kuɗi", "kudi",
-        "saya", "airtime", "bundle", "korafi", "bincika", "oda",
-        "sake", "tsara", "mayar", "kaya", "wakili", "mutum",
-        "sannu", "nagode", "don", "allah", "ka", "yana", "tana",
-        "dubu", "ɗari", "dari", "biyar", "biyu", "uku", "hudu", "huɗu",
-    }
-    tokens = set(text.lower().split())
-    return not bool(tokens & hausa_markers)
 # ---------------------------------------------------------------------------
-# NLLB-200 Ha → En translation (lazy-loaded)
 # ---------------------------------------------------------------------------
-_nllb_model = None
-_nllb_tokenizer = None
-_nllb_failed = False
-def _load_nllb():
-    """Lazy-load NLLB-200-distilled-600M."""
-    global _nllb_model, _nllb_tokenizer, _nllb_failed
-    if _nllb_failed:
-        return None, None
-    if _nllb_model is not None:
-        return _nllb_model, _nllb_tokenizer
-    try:
-        import torch
-        from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-        logger.info("Loading NLLB-200-distilled-600M…")
-        model_id = "facebook/nllb-200-distilled-600M"
-        _nllb_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        _nllb_model = AutoModelForSeq2SeqLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.float32,
-            low_cpu_mem_usage=True,
-        )
-        _nllb_model.eval()
-        logger.info("NLLB-200 ready.")
-        return _nllb_model, _nllb_tokenizer
-    except Exception as e:
-        logger.warning(f"NLLB load failed: {e}")
-        _nllb_failed = True
-        return None, None
-def translate_ha_to_en(text: str) -> Optional[str]:
-    """Translate Hausa to English via NLLB. Returns None on failure."""
-    model, tokenizer = _load_nllb()
-    if model is None or not text.strip():
-        return None
-    try:
-        import torch
-        # NLLB requires source language token set on tokenizer
-        tokenizer.src_lang = "hau_Latn"
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
-        # Force English output via forced_bos_token_id
-        forced_bos_id = tokenizer.convert_tokens_to_ids("eng_Latn")
-        with torch.no_grad():
-            out = model.generate(
-                **inputs,
-                forced_bos_token_id=forced_bos_id,
-                max_new_tokens=128,
-                num_beams=2,
-            )
-        translated = tokenizer.batch_decode(out, skip_special_tokens=True)[0].strip()
-        logger.info(f"NLLB Ha→En: {text!r} → {translated!r}")
-        return translated
-    except Exception as e:
-        logger.warning(f"NLLB translate failed: {e}")
-        return None
 # ---------------------------------------------------------------------------
-# Qwen2.5-1.5B intent classifier (operates on English text)
 # ---------------------------------------------------------------------------
-_llm_model = None
-_llm_tokenizer = None
-_llm_failed = False
-def _load_llm():
-    global _llm_model, _llm_tokenizer, _llm_failed
-    if _llm_failed:
-        return None, None
-    if _llm_model is not None:
-        return _llm_model, _llm_tokenizer
     try:
-        import torch
-        from transformers import AutoModelForCausalLM, AutoTokenizer
-        logger.info("Loading Qwen2.5-1.5B-Instruct…")
-        model_id = "Qwen/Qwen2.5-1.5B-Instruct"
-        _llm_tokenizer = AutoTokenizer.from_pretrained(model_id)
-        _llm_model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            torch_dtype=torch.float32,
-            low_cpu_mem_usage=True,
-        )
-        _llm_model.eval()
-        logger.info("Qwen2.5-1.5B ready.")
-        return _llm_model, _llm_tokenizer
     except Exception as e:
-        logger.warning(f"Qwen load failed: {e}")
-        _llm_failed = True
-        return None, None
-CANDIDATE_INTENTS = {
-    None: ["check_balance", "block_card", "transfer_money",
-           "buy_airtime", "buy_bundle", "complaint",
-           "check_order", "reschedule", "return_item",
-           "human_agent", "unknown"],
-    "intent": ["check_balance", "block_card", "transfer_money",
-               "buy_airtime", "buy_bundle", "complaint",
-               "check_order", "reschedule", "return_item",
-               "human_agent", "unknown"],
-    "yesno": ["yes", "no", "human_agent", "unknown"],
-    "name": ["provide_name", "human_agent", "unknown"],
-    "date": ["provide_date", "human_agent", "unknown"],
-    "bundle": ["provide_bundle", "human_agent", "unknown"],
-    "text": ["provide_text", "human_agent", "unknown"],
-}
-SYSTEM_PROMPT = """You are an intent classifier for a customer-service voice bot.
-You will be given an English-language utterance (translated from Hausa) and a list of candidate intents. Return JSON with the single best-matching intent and any entities you can extract.
-Intent meanings:
-- check_balance: user wants to check an account balance
-- block_card: user wants to block, freeze, or cancel a bank card
-- transfer_money: user wants to send or transfer money
-- buy_airtime: user wants to buy phone airtime / top-up
-- buy_bundle: user wants to buy a data bundle / internet package
-- complaint: user wants to file a complaint or report a problem
-- check_order: user wants to check the status of an order
-- reschedule: user wants to reschedule a delivery
-- return_item: user wants to return an item
-- human_agent: user wants to speak to a human person
-- yes / no: affirmative or negative reply
-- provide_name / provide_date / provide_bundle / provide_text: user is supplying information
-- unknown: cannot determine intent
-Return ONLY valid JSON. No explanation, no markdown. Example: {"intent": "check_balance", "entities": {}}"""
-def _qwen_classify(english_text: str, expected: Optional[str]) -> Optional[tuple[str, dict]]:
-    """Classify an English utterance into an intent. Returns None on failure."""
-    model, tokenizer = _load_llm()
-    if model is None:
         return None
-    candidates = CANDIDATE_INTENTS.get(expected, CANDIDATE_INTENTS[None])
-    user_prompt = (
-        f'Utterance: "{english_text}"\n'
-        f'Candidate intents: {", ".join(candidates)}\n\n'
-        'Return JSON only.'
-    )
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": user_prompt},
-    ]
     try:
-        import torch
-        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer(prompt, return_tensors="pt")
-        with torch.no_grad():
-            out = model.generate(
-                **inputs,
-                max_new_tokens=60,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id,
-            )
-        generated = tokenizer.decode(out[0][inputs.input_ids.shape[1]:], skip_special_tokens=True).strip()
-        logger.info(f"Qwen raw: {generated}")
-        m = re.search(r"\{.*?\}", generated, re.DOTALL)
-        if not m:
-            return None
-        parsed = json.loads(m.group())
-        intent = parsed.get("intent", "unknown")
-        entities = parsed.get("entities", {}) or {}
-        if not isinstance(entities, dict):
-            entities = {}
-        if intent not in candidates:
-            logger.info(f"Qwen returned out-of-candidate intent: {intent}")
-            return None
-        return intent, entities
     except Exception as e:
-        logger.warning(f"Qwen inference failed: {e}")
         return None
@@ -355,23 +373,26 @@ def _qwen_classify(english_text: str, expected: Optional[str]) -> Optional[tuple
 def parse(text: str, expected: Optional[str] = None,
           use_llm: bool = True) -> tuple[str, dict, str]:
     """
-    NLU. Returns (intent, entities, source) where source is one of:
-      - 'structural': deterministic extractor caught digits/amount/yes-no
-      - 'keyword': fast-path keyword matcher caught a common intent
-      - 'qwen_en': input was English, classified directly by Qwen
-      - 'nllb+qwen': translated via NLLB then classified via Qwen
-      - 'human_keyword': caught human-agent escape hatch by keyword
-      - 'unknown': nothing matched
     """
     entities: dict = {}
     if not text or not text.strip():
         return "unknown", entities, "unknown"
-    # Always-on human-agent escape (safety)
     if _contains_human_keyword(text):
         return "human_agent", entities, "human_keyword"
-    # Layer 1: deterministic structural extractors for strict-format slots
     if expected == "digits":
         d = _extract_digits(text)
         if d:
@@ -390,7 +411,6 @@ def parse(text: str, expected: Optional[str] = None,
             return yn, entities, "structural"
     if expected == "name":
-        # Name is free-form; take the last token as a quick heuristic.
         name = text.strip().split()[-1] if text.strip() else ""
         if name:
             entities["name"] = name
@@ -400,51 +420,38 @@ def parse(text: str, expected: Optional[str] = None,
         entities["date"] = text.strip()
         return "provide_date", entities, "structural"
-    # Layer 1.5: Keyword fast-path for common intents (Hausa + English).
-    # Runs in ANY state so users can pivot intent mid-flow ("actually I want
-    # to transfer money instead"). Structural extractors above already
-    # claimed strict-slot cases, so if we're in a slot-filling state and
-    # the text didn't match the slot, it's fair game to re-interpret as a
-    # new intent.
     kw_intent = _match_intent_keyword(text)
     if kw_intent:
-        logger.info(f"NLU: keyword matched {text!r} → {kw_intent}")
         return kw_intent, entities, "keyword"
-    # Layer 2: NLLB Ha → En (skip if input already English), then Qwen
     if not use_llm:
         logger.info(f"NLU: use_llm=False, returning unknown for {text!r}")
         return "unknown", entities, "unknown"
-    if _looks_english(text):
-        logger.info(f"NLU: input looks English, skipping NLLB: {text!r}")
-        english_text = text
-        source_tag = "qwen_en"
-    else:
-        logger.info(f"NLU: translating Hausa via NLLB: {text!r}")
-        english_text = translate_ha_to_en(text)
-        if english_text is None:
-            logger.warning("NLU: NLLB failed, returning unknown")
-            return "unknown", entities, "unknown"
-        source_tag = "nllb+qwen"
-    qwen_result = _qwen_classify(english_text, expected)
-    if qwen_result is None:
-        logger.warning(f"NLU: Qwen returned no valid intent for {english_text!r}")
         return "unknown", entities, "unknown"
-    intent, llm_entities = qwen_result
-    logger.info(f"NLU: Qwen classified {english_text!r} → intent={intent}")
-    # For free-text slots, pass the original Hausa text through
     if expected == "bundle":
         t = text.lower()
         for b in ("rana", "mako", "wata"):
             if b in t:
-                llm_entities["bundle"] = b
                 break
     if expected == "text":
-        llm_entities["text"] = text.strip()
-    return intent, llm_entities, source_tag

 """
+NLU — Embedding similarity architecture.
+=========================================
+Replaces the legacy NLLB+Qwen pipeline (preserved in nlu_legacy.py).
+Why embeddings?
+  - Latency: ~200ms vs ~10s on CPU for the legacy stack
+  - Memory:  ~420MB vs ~8GB
+  - Hausa coverage: paraphrase-multilingual-MiniLM-L12-v2 was trained on 50+
+    languages including Hausa, so we no longer need a translation step
+  - Confidence comes for free: cosine similarity IS a calibrated confidence
+Pipeline (in order):
+  Layer 0: Human-keyword escape ("wakili", "agent") → always wins
+  Layer 1: Structural extractors (digits, amounts, yes/no, name, date)
+           when the dialogue state has expected_slot set
+  Layer 1.5: Keyword fast-path for ultra-common phrases ("duba ma'auni")
+             — sub-millisecond, no model call
+  Layer 2: Sentence-embedding similarity vs per-intent centroids
+           — cosine sim ≥ threshold (0.4) → that intent, else unknown
+The dialogue manager receives the same (intent, entities, source) tuple
+as before, so app.py needs no changes.
 """
 from __future__ import annotations
 import re
 import logging
 from typing import Optional
     "ɗari": 100, "dari": 100,
 }
 HAUSA_YES = {"i", "eh", "haka ne", "haka", "ok", "okay", "yes"}
 HAUSA_NO = {"a'a", "a'aa", "ba haka", "ba", "no"}
 HUMAN_KEYWORDS = {"mutum", "wakili", "agent", "human"}
     return any(kw in t for kw in HUMAN_KEYWORDS)
+# ---------------------------------------------------------------------------
+# Keyword fast-path — instant matches for common scripted phrases
+# ---------------------------------------------------------------------------
 INTENT_KEYWORDS = {
     "check_balance": [
         "duba ma'auni", "ma'auni", "balance", "check balance",
 def _match_intent_keyword(text: str) -> Optional[str]:
     t = text.lower().strip()
     all_kw = [(intent, kw) for intent, kws in INTENT_KEYWORDS.items() for kw in kws]
     all_kw.sort(key=lambda x: len(x[1]), reverse=True)
     for intent, kw in all_kw:
     return None
 # ---------------------------------------------------------------------------
+# Intent example dataset — the heart of the embedding NLU.
+# These phrases are encoded once into centroids; at inference, user input is
+# compared (cosine similarity) against each centroid. More examples = better
+# coverage of paraphrases. Hausa + English mixed deliberately so cross-lingual
+# matches work via the multilingual encoder.
 # ---------------------------------------------------------------------------
+INTENT_EXAMPLES = {
+    "check_balance": [
+        # Hausa
+        "duba ma'auni",
+        "ina son sanin kuɗin asusuna",
+        "nawa ne a asusuna",
+        "menene ma'aunin asusuna",
+        "yi mini bayanin asusuna",
+        "ina son ganin kuɗina",
+        # English
+        "check my balance",
+        "what is my account balance",
+        "how much money do I have",
+        "show me my balance",
+        "tell me my balance",
+        "how much is in my account",
+    ],
+    "block_card": [
+        "toshe kati",
+        "ina son toshe katina",
+        "ɓatar da kati na",
+        "katina ya ɓace",
+        "yi mini taimako, kati na ya ɓace",
+        "in toshe ATM card",
+        "block my card",
+        "I lost my card",
+        "freeze my debit card",
+        "I need to cancel my card",
+        "my card was stolen",
+        "please block my ATM card",
+    ],
+    "transfer_money": [
+        "canjin kuɗi",
+        "ina son aika kuɗi",
+        "tura kuɗi zuwa wani",
+        "yi canji",
+        "in turawa abokina kuɗi",
+        "aiki kuɗi ga abokina",
+        "transfer money",
+        "send money to someone",
+        "I want to make a transfer",
+        "wire money to my friend",
+        "send naira to another account",
+        "make a payment",
+    ],
+    "buy_airtime": [
+        "saya airtime",
+        "ina son saya airtime",
+        "kunna waya",
+        "in saya credit",
+        "saya credit na waya",
+        "recharge waya na",
+        "buy airtime",
+        "top up my phone",
+        "recharge my phone",
+        "I need airtime",
+        "load credit",
+        "add credit to my phone",
+    ],
+    "buy_bundle": [
+        "saya bundle",
+        "ina son saya data",
+        "kunna intanet",
+        "in saya data bundle",
+        "saya megabyte",
+        "buy data",
+        "buy internet bundle",
+        "I want a data plan",
+        "purchase data bundle",
+        "get me a megabyte plan",
+        "subscribe to data",
+        "renew my data",
+    ],
+    "complaint": [
+        "yin korafi",
+        "ina da matsala",
+        "in yi koka",
+        "akwai matsala da hidima",
+        "ina son in kawo matsala",
+        "ba na gamsuwa",
+        "I want to file a complaint",
+        "I have a problem",
+        "report an issue",
+        "something is wrong",
+        "the service is bad",
+        "I'm not satisfied",
+    ],
+    "check_order": [
+        "bincika oda",
+        "ina oda na yake",
+        "tabbatar oda",
+        "yaushe za a kawo oda na",
+        "in san halin oda na",
+        "track order",
+        "where is my order",
+        "check order status",
+        "when will my order arrive",
+        "is my order ready",
+        "I want to know about my order",
+    ],
+    "reschedule": [
+        "sake tsara",
+        "ina son sake tsara lokaci",
+        "canjin ranar isar",
+        "in canza ranar kawowa",
+        "rana ta dabam",
+        "reschedule delivery",
+        "change delivery date",
+        "I want a different day",
+        "deliver tomorrow instead",
+        "postpone the delivery",
+        "move the delivery to later",
+    ],
+    "return_item": [
+        "mayar da kaya",
+        "ina son mayar da kaya",
+        "ba na son kaya",
+        "ina son mayarwa",
+        "kaya ba shi da kyau",
+        "return this item",
+        "I want to return my order",
+        "send it back",
+        "I want a refund",
+        "I don't want this anymore",
+        "the item is broken",
+    ],
+    "human_agent": [
+        "ina son magana da mutum",
+        "ka kawo mutum",
+        "wakili",
+        "in yi magana da wakilin",
+        "ba zan iya da bot ba",
+        "I want to speak to a human",
+        "connect me to an agent",
+        "transfer me to a person",
+        "I need to talk to someone",
+        "real person please",
+        "agent please",
+    ],
+}
+# Confidence threshold: cosine similarities below this become 'unknown'.
+# Tuned by hand at 0.4; lower if too many things are routed to 'unknown',
+# raise if too many incorrect intents get through. See nlu/tests for the
+# validation methodology.
+CONFIDENCE_THRESHOLD = 0.4
+# Embedding model. Multilingual (50+ languages), 420MB, CPU-fast.
+EMBEDDING_MODEL_ID = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 # ---------------------------------------------------------------------------
+# Embedding model + centroid cache (lazy-loaded)
 # ---------------------------------------------------------------------------
+_encoder = None
+_intent_centroids: Optional[dict] = None  # intent_name -> np.ndarray
+_embed_failed = False
+def _load_encoder():
+    """Lazy-load the sentence encoder + compute intent centroids."""
+    global _encoder, _intent_centroids, _embed_failed
+    if _embed_failed:
+        return None
+    if _encoder is not None:
+        return _encoder
     try:
+        import numpy as np
+        from sentence_transformers import SentenceTransformer
+        logger.info(f"Loading embedding model {EMBEDDING_MODEL_ID}…")
+        _encoder = SentenceTransformer(EMBEDDING_MODEL_ID)
+        logger.info("Computing intent centroids…")
+        _intent_centroids = {}
+        for intent, phrases in INTENT_EXAMPLES.items():
+            # normalize_embeddings=True ⇒ unit vectors ⇒ dot product = cosine sim
+            embeddings = _encoder.encode(phrases, normalize_embeddings=True)
+            centroid = embeddings.mean(axis=0)
+            # Re-normalize the centroid so cosine math stays clean
+            centroid = centroid / np.linalg.norm(centroid)
+            _intent_centroids[intent] = centroid
+        logger.info(f"Encoder ready, {len(_intent_centroids)} intents.")
+        return _encoder
     except Exception as e:
+        logger.warning(f"Encoder load failed: {e}")
+        _embed_failed = True
+        return None
+def _classify_with_embedding(text: str, expected: Optional[str]) -> Optional[tuple[str, float]]:
+    """Cosine similarity vs intent centroids. Returns (intent, confidence)
+    or None on failure. Respects expected_slot if it constrains valid intents."""
+    encoder = _load_encoder()
+    if encoder is None or _intent_centroids is None:
         return None
     try:
+        import numpy as np
+        query = encoder.encode(text, normalize_embeddings=True)
+        # If expected_slot constrains the answer space, filter candidates.
+        # For 'yesno', embedding NLU shouldn't fire — yes/no is handled by
+        # the structural layer. If we get here with yesno expected, it means
+        # the user said something non-standard; we treat that as a possible
+        # intent pivot (any intent is fair game).
+        valid_intents = list(_intent_centroids.keys())
+        scores = {}
+        for intent in valid_intents:
+            centroid = _intent_centroids[intent]
+            scores[intent] = float(np.dot(query, centroid))
+        best_intent = max(scores, key=scores.get)
+        best_score = scores[best_intent]
+        logger.info(f"NLU embedding: top match {best_intent}@{best_score:.3f}, "
+                    f"all scores: { {k: round(v,3) for k,v in sorted(scores.items(), key=lambda x: -x[1])[:3]} }")
+        return best_intent, best_score
     except Exception as e:
+        logger.warning(f"Embedding classification failed: {e}")
         return None
 def parse(text: str, expected: Optional[str] = None,
           use_llm: bool = True) -> tuple[str, dict, str]:
     """
+    NLU entry point. Returns (intent, entities, source) where source is:
+      - 'structural': digit/amount/yes-no/name/date regex matched
+      - 'keyword':    keyword fast-path matched
+      - 'embedding':  sentence encoder matched above threshold
+      - 'human_keyword': escape-hatch keyword caught
+      - 'unknown':    nothing matched
+    `use_llm` is a misnomer kept for backward compat with the legacy module's
+    signature — here it means "use the embedding layer". Set False to test
+    rule-only behavior.
     """
     entities: dict = {}
     if not text or not text.strip():
         return "unknown", entities, "unknown"
+    # Layer 0: Always-on human-agent escape
     if _contains_human_keyword(text):
         return "human_agent", entities, "human_keyword"
+    # Layer 1: Structural extractors for strict-format slots
     if expected == "digits":
         d = _extract_digits(text)
         if d:
             return yn, entities, "structural"
     if expected == "name":
         name = text.strip().split()[-1] if text.strip() else ""
         if name:
             entities["name"] = name
         entities["date"] = text.strip()
         return "provide_date", entities, "structural"
+    # Layer 1.5: Keyword fast-path (cheap, runs in any state so users can
+    # pivot intent mid-flow).
     kw_intent = _match_intent_keyword(text)
     if kw_intent:
+        logger.info(f"NLU keyword: matched {text!r} → {kw_intent}")
         return kw_intent, entities, "keyword"
+    # Layer 2: Embedding similarity
     if not use_llm:
         logger.info(f"NLU: use_llm=False, returning unknown for {text!r}")
         return "unknown", entities, "unknown"
+    embed_result = _classify_with_embedding(text, expected)
+    if embed_result is None:
+        logger.warning(f"NLU embedding unavailable, returning unknown for {text!r}")
         return "unknown", entities, "unknown"
+    intent, confidence = embed_result
+    if confidence < CONFIDENCE_THRESHOLD:
+        logger.info(f"NLU embedding: {intent}@{confidence:.3f} below threshold "
+                    f"{CONFIDENCE_THRESHOLD}, returning unknown")
+        return "unknown", entities, "unknown"
+    # Free-text slot pass-through (preserve original Hausa)
     if expected == "bundle":
         t = text.lower()
         for b in ("rana", "mako", "wata"):
             if b in t:
+                entities["bundle"] = b
                 break
     if expected == "text":
+        entities["text"] = text.strip()
+    logger.info(f"NLU embedding accepted: {text!r} → {intent} (conf={confidence:.3f})")
+    return intent, entities, "embedding"