anriltine commited on
Commit
4339a77
·
verified ·
1 Parent(s): 61ff229

Deploy TinyModel1Space from GitHub Actions

Browse files
scripts/eval_report_routing.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Read the Phase 2 **`routing`** object from a classifier checkpoint's **`eval_report.json`**.
3
+
4
+ Used by Horizon 1 glue, **rag_faq_smoke**, **embeddings_smoke_test**, **routing_policy** (**`--from-checkpoint`**), **horizon1_route_then_retrieve**, and training/report CLIs so training notes and runtime gates stay aligned."""
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import sys
10
+ from pathlib import Path
11
+
12
+
13
+ def load_routing_from_eval_report(model_path: str | Path) -> dict | None:
14
+ """Return the top-level ``routing`` dict if ``model_path`` is a dir with a valid report."""
15
+ p = Path(model_path)
16
+ if not p.is_dir():
17
+ return None
18
+ er = p / "eval_report.json"
19
+ if not er.is_file():
20
+ return None
21
+ try:
22
+ data = json.loads(er.read_text(encoding="utf-8"))
23
+ except json.JSONDecodeError:
24
+ return None
25
+ r = data.get("routing")
26
+ return r if isinstance(r, dict) else None
27
+
28
+
29
+ def format_checkpoint_tip_path(
30
+ output_dir: str | Path,
31
+ *,
32
+ cwd: Path | None = None,
33
+ ) -> str:
34
+ """Return a repo-relative checkpoint path when ``output_dir`` is under ``cwd``."""
35
+ p = Path(output_dir).resolve()
36
+ base = (cwd if cwd is not None else Path.cwd()).resolve()
37
+ try:
38
+ return p.relative_to(base).as_posix()
39
+ except ValueError:
40
+ return p.as_posix()
41
+
42
+
43
+ def format_routing_policy_from_checkpoint_command(
44
+ output_dir: str | Path,
45
+ *,
46
+ cwd: Path | None = None,
47
+ ) -> str:
48
+ """Full ``python scripts/routing_policy.py --from-checkpoint …`` line (no shell quoting)."""
49
+ tip = format_checkpoint_tip_path(output_dir, cwd=cwd)
50
+ return f"python scripts/routing_policy.py --from-checkpoint {tip}"
51
+
52
+
53
+ def print_routing_policy_from_checkpoint_tip(
54
+ output_dir: str | Path,
55
+ *,
56
+ headline: str = "Tip: dump Phase 2 `routing` JSON (no model load):",
57
+ cwd: Path | None = None,
58
+ ) -> None:
59
+ """Print a copy-paste **Tip:** for ``routing_policy`` (shared by train/compare/verify scripts)."""
60
+ cmd = format_routing_policy_from_checkpoint_command(output_dir, cwd=cwd)
61
+ print(f"{headline}\n {cmd}", flush=True)
62
+
63
+
64
+ def maybe_print_routing_section(model_path: str, *, enabled: bool, prog: str) -> None:
65
+ """If ``enabled``, print ``routing`` JSON or a stderr hint (``prog`` labels the caller)."""
66
+ if not enabled:
67
+ return
68
+ notes = load_routing_from_eval_report(model_path)
69
+ if notes is None:
70
+ print(
71
+ f"{prog}: no eval_report.json with top-level `routing` "
72
+ "(Hub id or missing artifact).",
73
+ file=sys.stderr,
74
+ )
75
+ return
76
+ print("=== eval_report.json routing (Phase 2 training notes) ===\n")
77
+ print(json.dumps(notes, indent=2))
78
+ print()
scripts/horizon2_core.py CHANGED
@@ -173,9 +173,26 @@ def load_causal_lm(
173
  model_id: str,
174
  device: str,
175
  ) -> LoadedLM:
 
 
 
 
 
 
 
 
 
 
176
  import torch
177
  from transformers import AutoModelForCausalLM, AutoTokenizer
178
 
 
 
 
 
 
 
 
179
  d = device if device in ("cpu", "cuda", "mps") else "cpu"
180
  tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
181
  if tok.pad_token is None and tok.eos_token is not None:
@@ -187,15 +204,42 @@ def load_causal_lm(
187
  )
188
  else:
189
  dt = torch.float32
190
- # Prefer `dtype` (newer Transformers); fall back to `torch_dtype` (older).
191
- try:
192
- model = AutoModelForCausalLM.from_pretrained(
193
- model_id, trust_remote_code=True, dtype=dt
194
- )
195
- except TypeError:
196
- model = AutoModelForCausalLM.from_pretrained(
197
- model_id, trust_remote_code=True, torch_dtype=dt
 
 
 
 
 
 
 
 
 
 
198
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  model.eval()
200
  model = model.to(d)
201
  return LoadedLM(model=model, tokenizer=tok, device=d)
 
173
  model_id: str,
174
  device: str,
175
  ) -> LoadedLM:
176
+ import os
177
+ import sys
178
+
179
+ # Must run before `import torch` on first use (e.g. horizon2_server on Windows).
180
+ if sys.platform == "win32":
181
+ os.environ.setdefault("OMP_NUM_THREADS", "1")
182
+ os.environ.setdefault("MKL_NUM_THREADS", "1")
183
+ os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
184
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
185
+
186
  import torch
187
  from transformers import AutoModelForCausalLM, AutoTokenizer
188
 
189
+ if sys.platform == "win32":
190
+ torch.set_num_threads(1)
191
+ try:
192
+ torch.set_num_interop_threads(1)
193
+ except RuntimeError:
194
+ pass
195
+
196
  d = device if device in ("cpu", "cuda", "mps") else "cpu"
197
  tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
198
  if tok.pad_token is None and tok.eos_token is not None:
 
204
  )
205
  else:
206
  dt = torch.float32
207
+
208
+ def _from_pretrained(extra: dict[str, Any]) -> Any:
209
+ # Prefer `dtype` (newer Transformers); fall back to `torch_dtype` (older).
210
+ try:
211
+ return AutoModelForCausalLM.from_pretrained(
212
+ model_id, trust_remote_code=True, dtype=dt, **extra
213
+ )
214
+ except TypeError:
215
+ return AutoModelForCausalLM.from_pretrained(
216
+ model_id, trust_remote_code=True, torch_dtype=dt, **extra
217
+ )
218
+
219
+ # Retry with progressively fewer options (compat + stability on Windows CPU).
220
+ if d == "cpu":
221
+ extras: tuple[dict[str, Any], ...] = (
222
+ {"low_cpu_mem_usage": True, "attn_implementation": "eager"},
223
+ {"low_cpu_mem_usage": True},
224
+ {},
225
  )
226
+ else:
227
+ extras = ({"low_cpu_mem_usage": True}, {})
228
+
229
+ model = None
230
+ last_err: BaseException | None = None
231
+ for extra in extras:
232
+ try:
233
+ model = _from_pretrained(extra)
234
+ break
235
+ except (TypeError, ValueError, OSError) as e:
236
+ last_err = e
237
+ continue
238
+ if model is None:
239
+ raise RuntimeError(
240
+ f"Failed to load causal LM {model_id!r}; last error: {last_err!r}"
241
+ ) from last_err
242
+
243
  model.eval()
244
  model = model.to(d)
245
  return LoadedLM(model=model, tokenizer=tok, device=d)
scripts/nl_controls.py ADDED
@@ -0,0 +1,652 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Natural-language control phrases for Universal Brain chat.
2
+
3
+ This is a lightweight, deterministic pre-router for actions that should not depend on
4
+ LLM JSON routing (and should work without requiring users to remember slash commands).
5
+
6
+ It is intentionally conservative: it only triggers on fairly explicit phrasing.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass
12
+ import re
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class ControlAction:
17
+ name: str
18
+ value: str | None = None
19
+
20
+
21
+ _WS = re.compile(r"\s+")
22
+
23
+
24
+ def _norm(s: str) -> str:
25
+ return _WS.sub(" ", (s or "").strip().lower())
26
+
27
+
28
+ def parse_control_action(message: str) -> ControlAction | None:
29
+ """Return a ControlAction if the message is a natural-language control request."""
30
+ m = _norm(message)
31
+ if not m:
32
+ return None
33
+
34
+ # "What mode is this? What session/scope am I in?"
35
+ if re.search(r"\b(what|show)\b.*\b(my )?(session|scope|settings|mode|status)\b", m) or re.search(
36
+ r"\bwhich\b.*\b(scope|session)\b", m
37
+ ):
38
+ return ControlAction("show_session")
39
+
40
+ # Start a fresh private session (new scope key).
41
+ if re.search(r"\b(new|fresh)\b.*\b(private )?(session|scope)\b", m) or re.search(
42
+ r"\b(start|begin)\b.*\b(private )?(session|scope)\b", m
43
+ ):
44
+ return ControlAction("new_private_session")
45
+
46
+ # Switch to a named scope in chat, e.g. "use scope abc-123" / "switch to session foo".
47
+ m2 = re.search(r"\b(use|switch to|set)\b.*\b(scope|session)\b\s*[:=]?\s*([a-z0-9][a-z0-9_.:-]{1,63})\b", m)
48
+ if m2:
49
+ return ControlAction("set_scope", m2.group(3))
50
+
51
+ # Memory controls (order matters: list/show before export/download)
52
+ if re.search(
53
+ r"\b(show|list)\b.*\b(my )?(data|memory|memories|notes)\b",
54
+ m,
55
+ ):
56
+ return ControlAction("list_memories")
57
+ if re.search(
58
+ r"\b(export|download)\b.*\b(my )?(data|memory|memories|notes)\b",
59
+ m,
60
+ ):
61
+ return ControlAction("export_memory")
62
+ if re.search(r"\b(clear|wipe|delete|forget)\b.*\b(session)\b.*\b(memory|memories|notes)?\b", m):
63
+ return ControlAction("clear_session")
64
+ if re.search(r"\b(forget|delete|erase|wipe)\b.*\b(all|everything)\b.*\b(memory|memories|notes|data)\b", m) or re.search(
65
+ r"\b(delete|erase)\b.*\b(my )?(data|account data|data for this chat)\b", m
66
+ ):
67
+ return ControlAction("forget_scope")
68
+
69
+ # Session toggles (chat UX)
70
+ if re.search(r"\b(turn on|enable|show)\b.*\b(trace|brain trace|debug)\b", m):
71
+ return ControlAction("set_trace", "on")
72
+ if re.search(r"\b(turn off|disable|hide)\b.*\b(trace|brain trace|debug)\b", m):
73
+ return ControlAction("set_trace", "off")
74
+
75
+ if re.search(r"\b(turn on|enable)\b.*\b(smart routing|auto routing|router)\b", m):
76
+ return ControlAction("set_smart_route", "on")
77
+ if re.search(r"\b(turn off|disable)\b.*\b(smart routing|auto routing|router)\b", m):
78
+ return ControlAction("set_smart_route", "off")
79
+
80
+ if re.search(r"\b(turn on|enable)\b.*\b(faq|rag|retrieval)\b", m):
81
+ return ControlAction("set_rag", "on")
82
+ if re.search(r"\b(turn off|disable)\b.*\b(faq|rag|retrieval)\b", m):
83
+ return ControlAction("set_rag", "off")
84
+
85
+ # Reply style for the generative model (short lines only to avoid hijacking real questions).
86
+ # Require "reply"/"answer" before style|format|length so phrases like "default quote style" / "reset tables"
87
+ # are handled by narrower matchers below.
88
+ if len(m) <= 140 and (
89
+ re.search(r"\breset\b.*\b(reply|answer)\s+(style|format|length)\b", m)
90
+ or re.search(r"\b(default|normal)\b.*\b(reply|answer)\s+(style|format|length)\b", m)
91
+ ):
92
+ return ControlAction("reset_reply_style")
93
+
94
+ if len(m) <= 96 and re.search(
95
+ r"\b(be brief|stay brief|keep it short|short answers|answer briefly|concise replies)\b",
96
+ m,
97
+ ):
98
+ return ControlAction("set_verbosity", "brief")
99
+
100
+ if len(m) <= 120 and re.search(
101
+ r"\b(more detail|go deeper|in greater detail|explain thoroughly|longer answers|detailed answers)\b",
102
+ m,
103
+ ):
104
+ return ControlAction("set_verbosity", "detailed")
105
+
106
+ if len(m) <= 100 and re.search(
107
+ r"\b(normal (answer )?length|default length|balanced length)\b",
108
+ m,
109
+ ):
110
+ return ControlAction("set_verbosity", "normal")
111
+
112
+ if len(m) <= 110 and re.search(r"\b(use|prefer)\b", m) and re.search(
113
+ r"\b(bullet points?|numbered lists?)\b",
114
+ m,
115
+ ):
116
+ return ControlAction("set_reply_format", "bullets")
117
+
118
+ if len(m) <= 100 and re.search(
119
+ r"\b(no bullets|plain paragraphs?|prose only|stop using lists)\b",
120
+ m,
121
+ ):
122
+ return ControlAction("set_reply_format", "prose")
123
+
124
+ # FAQ / RAG grounding hints for the assistant (short control lines).
125
+ if len(m) <= 100 and re.search(
126
+ r"\b(strict faq|faq only|stick to (the )?faq|only use (the )?faq|only trust (the )?faq)\b",
127
+ m,
128
+ ):
129
+ return ControlAction("set_faq_grounding", "strict")
130
+
131
+ if len(m) <= 115 and re.search(
132
+ r"\b(balanced faq|normal faq|default faq(\s+grounding)?|default faq mode)\b",
133
+ m,
134
+ ):
135
+ return ControlAction("set_faq_grounding", "normal")
136
+
137
+ if len(m) <= 130 and re.search(
138
+ r"\b(relaxed faq|faq plus general knowledge|general knowledge(\s+is)?\s+ok|mix faq and general knowledge)\b",
139
+ m,
140
+ ):
141
+ return ControlAction("set_faq_grounding", "relaxed")
142
+
143
+ # Explanation depth (who the answer is for) — short control lines only.
144
+ if (
145
+ (len(m) <= 40 and re.match(r"^(please\s+)?explain simply[\s.!?]*$", m))
146
+ or re.match(r"^(please\s+)?eli5\b[\s.!?]*$", m)
147
+ or (len(m) <= 56 and re.search(r"\b(i'?m\s+a\s+beginner|beginner\s+here)\b", m))
148
+ or re.match(r"^(please\s+)?assume i'?m\s+new\b[\s.!?]*$", m)
149
+ or (len(m) <= 56 and re.search(r"\bi\s+need\s+(the\s+)?basics\b", m))
150
+ ):
151
+ return ControlAction("set_audience", "simple")
152
+
153
+ if len(m) <= 72 and (
154
+ re.match(r"^(please\s+)?assume i'?m\s+technical[\s.!?]*$", m)
155
+ or re.match(r"^expert\s+mode[\s.!?]*$", m)
156
+ or re.match(r"^(please\s+)?use jargon freely[\s.!?]*$", m)
157
+ or re.match(r"^technical audience[\s.!?]*$", m)
158
+ or re.match(r"^for experts[\s.!?]*$", m)
159
+ ):
160
+ return ControlAction("set_audience", "technical")
161
+
162
+ if len(m) <= 78 and (
163
+ re.match(r"^(please\s+)?(default explanation level|normal explanation level|general audience)[\s.!?]*$", m)
164
+ or re.match(r"^(please\s+)?(reset|default)\s+audience[\s.!?]*$", m)
165
+ ):
166
+ return ControlAction("set_audience", "normal")
167
+
168
+ # Answer lead — whether to front-load a TL;DR line (orthogonal to verbosity).
169
+ if len(m) <= 88 and (
170
+ re.match(r"^(please\s+)?(tl;|tl)dr\s+first\b[\s.!?]*$", m)
171
+ or re.match(r"^(please\s+)?(lead|start)\s+with\s+(a\s+)?(short\s+)?summary\b[\s.!?]*$", m)
172
+ or re.match(r"^(please\s+)?summary\s+first\b[\s.!?]*$", m)
173
+ ):
174
+ return ControlAction("set_answer_lead", "tldr_first")
175
+
176
+ if len(m) <= 92 and (
177
+ re.match(r"^(please\s+)?no\s+tl;?dr\b[\s.!?]*$", m)
178
+ or re.match(r"^(please\s+)?skip (the\s+)?summary\b[\s.!?]*$", m)
179
+ or re.match(r"^(please\s+)?answer directly\b[\s.!?]*$", m)
180
+ or re.match(r"^(please\s+)?direct answer\s+only\b[\s.!?]*$", m)
181
+ or re.match(r"^(please\s+)?without\s+a\s+tldr\b[\s.!?]*$", m)
182
+ ):
183
+ return ControlAction("set_answer_lead", "direct")
184
+
185
+ if len(m) <= 64 and (
186
+ re.match(r"^(please\s+)?(default answer structure|normal answer opening|usual\s+opening)[\s.!?]*$", m)
187
+ or re.match(r"^(please\s+)?reset\s+(answer\s+)?opening[\s.!?]*$", m)
188
+ ):
189
+ return ControlAction("set_answer_lead", "normal")
190
+
191
+ # Procedures: numbered steps vs continuous prose (orthogonal to bullets).
192
+ if len(m) <= 88 and (
193
+ re.match(r"^(please\s+)?(step by step|step-by-step)[\s.!?]*$", m)
194
+ or re.match(r"^(please\s+)?use numbered steps[\s.!?]*$", m)
195
+ or re.match(r"^(please\s+)?numbered steps\b[\s.!?]*$", m)
196
+ or re.match(r"^(please\s+)?walk me through( the)? steps\b[\s.!?]*$", m)
197
+ or re.match(r"^(please\s+)?break it into steps[\s.!?]*$", m)
198
+ ):
199
+ return ControlAction("set_step_style", "numbered")
200
+
201
+ if len(m) <= 92 and (
202
+ re.match(r"^(please\s+)?(no numbered steps|don'?t number steps|skip step numbers)[\s.!?]*$", m)
203
+ or re.match(r"^(please\s+)?(continuous prose|prose without steps)[\s.!?]*$", m)
204
+ ):
205
+ return ControlAction("set_step_style", "continuous")
206
+
207
+ if len(m) <= 64 and re.match(r"^(please\s+)?(default step style|normal steps|reset steps)[\s.!?]*$", m):
208
+ return ControlAction("set_step_style", "normal")
209
+
210
+ # How hard to hedge / flag limits (orthogonal to FAQ strictness).
211
+ if len(m) <= 94 and (
212
+ re.match(r"^(please\s+)?flag your assumptions[\s.!?]*$", m)
213
+ or re.match(r"^(please\s+)?be explicit about uncertainty[\s.!?]*$", m)
214
+ or re.match(r"^(please\s+)?say if you don'?t know[\s.!?]*$", m)
215
+ or re.match(r"^(please\s+)?tell me when you(?:'?re|\s+are)\s+unsure[\s.!?]*$", m)
216
+ or re.match(r"^(please\s+)?say when you(?:'?re|\s+are)\s+unsure[\s.!?]*$", m)
217
+ ):
218
+ return ControlAction("set_confidence_tone", "transparent")
219
+
220
+ if len(m) <= 72 and (
221
+ re.match(r"^(please\s+)?be decisive[\s.!?]*$", m)
222
+ or re.match(r"^(please\s+)?don'?t hedge[\s.!?]*$", m)
223
+ or re.match(r"^(please\s+)?give firm answers[\s.!?]*$", m)
224
+ ):
225
+ return ControlAction("set_confidence_tone", "assertive")
226
+
227
+ if len(m) <= 80 and re.match(
228
+ r"^(please\s+)?(default confidence tone|normal confidence|reset uncertainty)[\s.!?]*$",
229
+ m,
230
+ ):
231
+ return ControlAction("set_confidence_tone", "normal")
232
+
233
+ # Whether to offer follow-ups / next steps at the end of answers.
234
+ if len(m) <= 96 and (
235
+ re.match(r"^(please\s+)?suggest next steps[\s.!?]*$", m)
236
+ or re.match(r"^(please\s+)?offer follow[- ]up questions[\s.!?]*$", m)
237
+ or re.match(r"^(please\s+)?end with (optional )?next steps[\s.!?]*$", m)
238
+ ):
239
+ return ControlAction("set_followup_close", "suggest")
240
+
241
+ if len(m) <= 100 and (
242
+ re.match(r"^(please\s+)?no follow[- ]up questions[\s.!?]*$", m)
243
+ or re.match(r"^(please\s+)?don'?t ask follow[- ]up questions[\s.!?]*$", m)
244
+ or re.match(r"^(please\s+)?no questions at the end[\s.!?]*$", m)
245
+ ):
246
+ return ControlAction("set_followup_close", "minimal")
247
+
248
+ if len(m) <= 78 and (
249
+ re.match(r"^(please\s+)?(default follow[- ]ups?|reset follow[- ]ups?|normal follow[- ]ups?)[\s.!?]*$", m)
250
+ ):
251
+ return ControlAction("set_followup_close", "normal")
252
+
253
+ # Teach order: define terms vs motivate first (orthogonal to TL;DR / steps).
254
+ if len(m) <= 80 and (
255
+ re.match(r"^(please\s+)?definitions first[\s.!?]*$", m)
256
+ or re.match(r"^(please\s+)?start with definitions[\s.!?]*$", m)
257
+ or re.match(r"^(please\s+)?define terms first[\s.!?]*$", m)
258
+ ):
259
+ return ControlAction("set_exposition_order", "definitions_first")
260
+
261
+ if len(m) <= 96 and (
262
+ re.match(r"^(please\s+)?intuition first[\s.!?]*$", m)
263
+ or re.match(r"^(please\s+)?big picture first[\s.!?]*$", m)
264
+ or re.match(r"^(please\s+)?start with the big picture[\s.!?]*$", m)
265
+ ):
266
+ return ControlAction("set_exposition_order", "intuition_first")
267
+
268
+ if len(m) <= 88 and re.match(
269
+ r"^(please\s+)?(default explanation order|reset explanation order|normal explanation order)[\s.!?]*$",
270
+ m,
271
+ ):
272
+ return ControlAction("set_exposition_order", "normal")
273
+
274
+ # Examples vs terse explanations when comparing or teaching.
275
+ if len(m) <= 76 and (
276
+ re.match(r"^(please\s+)?include examples[\s.!?]*$", m)
277
+ or re.match(r"^(please\s+)?use concrete examples[\s.!?]*$", m)
278
+ or re.match(r"^(please\s+)?illustrate with examples[\s.!?]*$", m)
279
+ ):
280
+ return ControlAction("set_example_density", "rich")
281
+
282
+ if len(m) <= 92 and (
283
+ re.match(r"^(please\s+)?skip examples[\s.!?]*$", m)
284
+ or re.match(r"^(please\s+)?don'?t add examples[\s.!?]*$", m)
285
+ or re.match(r"^(please\s+)?no examples unless i ask[\s.!?]*$", m)
286
+ ):
287
+ return ControlAction("set_example_density", "sparse")
288
+
289
+ if len(m) <= 68 and re.match(
290
+ r"^(please\s+)?(default examples|normal examples|reset examples)[\s.!?]*$",
291
+ m,
292
+ ):
293
+ return ControlAction("set_example_density", "normal")
294
+
295
+ # Compare/contrast presentation.
296
+ if len(m) <= 96 and (
297
+ re.match(r"^(please\s+)?use pros and cons[\s.!?]*$", m)
298
+ or re.match(r"^(please\s+)?pros and cons sections[\s.!?]*$", m)
299
+ or re.match(r"^(please\s+)?compare with pros and cons[\s.!?]*$", m)
300
+ ):
301
+ return ControlAction("set_comparison_frame", "pros_cons")
302
+
303
+ if len(m) <= 100 and (
304
+ re.match(r"^(please\s+)?compare in flowing prose[\s.!?]*$", m)
305
+ or re.match(r"^(please\s+)?prose comparison only[\s.!?]*$", m)
306
+ or re.match(r"^(please\s+)?no pros and cons sections[\s.!?]*$", m)
307
+ ):
308
+ return ControlAction("set_comparison_frame", "narrative")
309
+
310
+ if len(m) <= 82 and re.match(
311
+ r"^(please\s+)?(default comparison style|normal comparison|reset comparison)[\s.!?]*$",
312
+ m,
313
+ ):
314
+ return ControlAction("set_comparison_frame", "normal")
315
+
316
+ # Professional vs conversational wording (orthogonal to verbosity).
317
+ if len(m) <= 92 and (
318
+ re.match(r"^(please\s+)?formal tone[\s.!?]*$", m)
319
+ or re.match(r"^(please\s+)?professional register[\s.!?]*$", m)
320
+ or re.match(r"^(please\s+)?business writing style[\s.!?]*$", m)
321
+ ):
322
+ return ControlAction("set_register_tone", "formal")
323
+
324
+ if len(m) <= 96 and (
325
+ re.match(r"^(please\s+)?casual tone[\s.!?]*$", m)
326
+ or re.match(r"^(please\s+)?friendly casual style[\s.!?]*$", m)
327
+ or re.match(r"^(please\s+)?speak casually[\s.!?]*$", m)
328
+ ):
329
+ return ControlAction("set_register_tone", "casual")
330
+
331
+ if len(m) <= 76 and re.match(
332
+ r"^(please\s+)?(default tone|neutral tone|reset tone)[\s.!?]*$",
333
+ m,
334
+ ):
335
+ return ControlAction("set_register_tone", "normal")
336
+
337
+ # Markdown code snippet layout.
338
+ if len(m) <= 100 and (
339
+ re.match(r"^(please\s+)?use code fences[\s.!?]*$", m)
340
+ or re.match(r"^(please\s+)?fenced code blocks[\s.!?]*$", m)
341
+ or re.match(r"^(please\s+)?markdown code fences[\s.!?]*$", m)
342
+ ):
343
+ return ControlAction("set_code_block_style", "fenced")
344
+
345
+ if len(m) <= 104 and (
346
+ re.match(r"^(please\s+)?inline code only[\s.!?]*$", m)
347
+ or re.match(r"^(please\s+)?no triple backticks[\s.!?]*$", m)
348
+ or re.match(r"^(please\s+)?no fenced code blocks[\s.!?]*$", m)
349
+ ):
350
+ return ControlAction("set_code_block_style", "inline")
351
+
352
+ if len(m) <= 96 and re.match(
353
+ r"^(please\s+)?(default code formatting|reset code style|normal code blocks)[\s.!?]*$",
354
+ m,
355
+ ):
356
+ return ControlAction("set_code_block_style", "normal")
357
+
358
+ # Analogies / metaphors vs literal explanations only.
359
+ if len(m) <= 92 and (
360
+ re.match(r"^(please\s+)?use analogies[\s.!?]*$", m)
361
+ or re.match(r"^(please\s+)?analogies when helpful[\s.!?]*$", m)
362
+ or re.match(r"^(please\s+)?metaphors are ok[\s.!?]*$", m)
363
+ ):
364
+ return ControlAction("set_analogy_use", "prefer")
365
+
366
+ if len(m) <= 100 and (
367
+ re.match(r"^(please\s+)?no analogies[\s.!?]*$", m)
368
+ or re.match(r"^(please\s+)?skip metaphors[\s.!?]*$", m)
369
+ or re.match(r"^(please\s+)?literal explanations only[\s.!?]*$", m)
370
+ ):
371
+ return ControlAction("set_analogy_use", "avoid")
372
+
373
+ if len(m) <= 82 and re.match(
374
+ r"^(please\s+)?(default analogy style|reset analogies|normal analogies)[\s.!?]*$",
375
+ m,
376
+ ):
377
+ return ControlAction("set_analogy_use", "normal")
378
+
379
+ # Expand vs terse acronym handling on first introduce.
380
+ if len(m) <= 112 and (
381
+ re.match(r"^(please\s+)?spell out acronyms[\s.!?]*$", m)
382
+ or re.match(r"^(please\s+)?expand acronyms on first use[\s.!?]*$", m)
383
+ or re.match(r"^(please\s+)?define acronyms when you use them[\s.!?]*$", m)
384
+ ):
385
+ return ControlAction("set_acronym_style", "spell_out")
386
+
387
+ if len(m) <= 112 and (
388
+ re.match(r"^(please\s+)?assume i know acronyms[\s.!?]*$", m)
389
+ or re.match(r"^(please\s+)?don'?t expand acronyms[\s.!?]*$", m)
390
+ or re.match(r"^(please\s+)?keep acronyms as is[\s.!?]*$", m)
391
+ ):
392
+ return ControlAction("set_acronym_style", "terse")
393
+
394
+ if len(m) <= 92 and re.match(
395
+ r"^(please\s+)?(default acronym style|reset acronyms|normal acronyms)[\s.!?]*$",
396
+ m,
397
+ ):
398
+ return ControlAction("set_acronym_style", "normal")
399
+
400
+ # Clarify-first: ask brief questions before answering if key info is missing.
401
+ if len(m) <= 110 and (
402
+ re.match(r"^(please\s+)?ask clarifying questions first[\s.!?]*$", m)
403
+ or re.match(r"^(please\s+)?clarify first[\s.!?]*$", m)
404
+ or re.match(r"^(please\s+)?ask me questions before answering[\s.!?]*$", m)
405
+ ):
406
+ return ControlAction("set_clarify_first", "on")
407
+
408
+ if len(m) <= 110 and (
409
+ re.match(r"^(please\s+)?no clarifying questions[\s.!?]*$", m)
410
+ or re.match(r"^(please\s+)?just answer without questions[\s.!?]*$", m)
411
+ or re.match(r"^(please\s+)?answer without asking questions[\s.!?]*$", m)
412
+ ):
413
+ return ControlAction("set_clarify_first", "off")
414
+
415
+ if len(m) <= 96 and re.match(
416
+ r"^(please\s+)?(default clarify mode|reset clarify mode|normal clarify mode)[\s.!?]*$",
417
+ m,
418
+ ):
419
+ return ControlAction("set_clarify_first", "normal")
420
+
421
+ # Speculation level: strict factual vs brainstorming.
422
+ if len(m) <= 110 and (
423
+ re.match(r"^(please\s+)?no speculation[\s.!?]*$", m)
424
+ or re.match(r"^(please\s+)?stick to high confidence only[\s.!?]*$", m)
425
+ or re.match(r"^(please\s+)?avoid guessing[\s.!?]*$", m)
426
+ ):
427
+ return ControlAction("set_speculation", "strict")
428
+
429
+ if len(m) <= 110 and (
430
+ re.match(r"^(please\s+)?brainstorm freely[\s.!?]*$", m)
431
+ or re.match(r"^(please\s+)?speculate freely[\s.!?]*$", m)
432
+ or re.match(r"^(please\s+)?wild ideas ok[\s.!?]*$", m)
433
+ ):
434
+ return ControlAction("set_speculation", "creative")
435
+
436
+ if len(m) <= 100 and re.match(
437
+ r"^(please\s+)?(default speculation|normal speculation|reset speculation)[\s.!?]*$",
438
+ m,
439
+ ):
440
+ return ControlAction("set_speculation", "normal")
441
+
442
+ # Math/explanations: show work vs final-only.
443
+ if len(m) <= 110 and (
444
+ re.match(r"^(please\s+)?show your work[\s.!?]*$", m)
445
+ or re.match(r"^(please\s+)?show the derivation[\s.!?]*$", m)
446
+ or re.match(r"^(please\s+)?include steps in math[\s.!?]*$", m)
447
+ ):
448
+ return ControlAction("set_math_detail", "show_work")
449
+
450
+ if len(m) <= 110 and (
451
+ re.match(r"^(please\s+)?final answer only[\s.!?]*$", m)
452
+ or re.match(r"^(please\s+)?no derivation[\s.!?]*$", m)
453
+ or re.match(r"^(please\s+)?skip the steps[\s.!?]*$", m)
454
+ ):
455
+ return ControlAction("set_math_detail", "final_only")
456
+
457
+ if len(m) <= 110 and re.match(
458
+ r"^(please\s+)?(default math detail|normal math detail|reset math detail)[\s.!?]*$",
459
+ m,
460
+ ):
461
+ return ControlAction("set_math_detail", "normal")
462
+
463
+ # Output structure: JSON-shaped vs normal prose.
464
+ if len(m) <= 110 and (
465
+ re.match(r"^(please\s+)?answer in json[\s.!?]*$", m)
466
+ or re.match(r"^(please\s+)?json output[\s.!?]*$", m)
467
+ or re.match(r"^(please\s+)?structured json[\s.!?]*$", m)
468
+ ):
469
+ return ControlAction("set_output_format", "json")
470
+
471
+ if len(m) <= 110 and (
472
+ re.match(r"^(please\s+)?plain text only[\s.!?]*$", m)
473
+ or re.match(r"^(please\s+)?no json[\s.!?]*$", m)
474
+ or re.match(r"^(please\s+)?no structured output[\s.!?]*$", m)
475
+ ):
476
+ return ControlAction("set_output_format", "plain")
477
+
478
+ if len(m) <= 110 and re.match(
479
+ r"^(please\s+)?(default output format|normal output format|reset output format)[\s.!?]*$",
480
+ m,
481
+ ):
482
+ return ControlAction("set_output_format", "normal")
483
+
484
+ # Safety/risk posture for recommendations.
485
+ if len(m) <= 110 and (
486
+ re.match(r"^(please\s+)?be risk averse[\s.!?]*$", m)
487
+ or re.match(r"^(please\s+)?be conservative[\s.!?]*$", m)
488
+ or re.match(r"^(please\s+)?err on the side of safety[\s.!?]*$", m)
489
+ ):
490
+ return ControlAction("set_risk_posture", "conservative")
491
+
492
+ if len(m) <= 110 and (
493
+ re.match(r"^(please\s+)?be pragmatic[\s.!?]*$", m)
494
+ or re.match(r"^(please\s+)?optimize for speed[\s.!?]*$", m)
495
+ or re.match(r"^(please\s+)?good enough is fine[\s.!?]*$", m)
496
+ ):
497
+ return ControlAction("set_risk_posture", "pragmatic")
498
+
499
+ if len(m) <= 110 and re.match(
500
+ r"^(please\s+)?(default risk posture|normal risk posture|reset risk posture)[\s.!?]*$",
501
+ m,
502
+ ):
503
+ return ControlAction("set_risk_posture", "normal")
504
+
505
+ # Actionability: runnable steps vs conceptual explanation.
506
+ if len(m) <= 110 and (
507
+ re.match(r"^(please\s+)?give me runnable commands[\s.!?]*$", m)
508
+ or re.match(r"^(please\s+)?include commands[\s.!?]*$", m)
509
+ or re.match(r"^(please\s+)?make it actionable[\s.!?]*$", m)
510
+ ):
511
+ return ControlAction("set_actionability", "commands")
512
+
513
+ if len(m) <= 110 and (
514
+ re.match(r"^(please\s+)?no commands[\s.!?]*$", m)
515
+ or re.match(r"^(please\s+)?conceptual only[\s.!?]*$", m)
516
+ or re.match(r"^(please\s+)?high level only[\s.!?]*$", m)
517
+ ):
518
+ return ControlAction("set_actionability", "conceptual")
519
+
520
+ if len(m) <= 110 and re.match(
521
+ r"^(please\s+)?(default actionability|normal actionability|reset actionability)[\s.!?]*$",
522
+ m,
523
+ ):
524
+ return ControlAction("set_actionability", "normal")
525
+
526
+ # Quote/citation preference when using supplied excerpts.
527
+ if len(m) <= 110 and (
528
+ re.match(r"^(please\s+)?quote the faq excerpts[\s.!?]*$", m)
529
+ or re.match(r"^(please\s+)?use direct quotes[\s.!?]*$", m)
530
+ or re.match(r"^(please\s+)?cite with quotes[\s.!?]*$", m)
531
+ ):
532
+ return ControlAction("set_quote_style", "quote")
533
+
534
+ if len(m) <= 110 and (
535
+ re.match(r"^(please\s+)?no quotes[\s.!?]*$", m)
536
+ or re.match(r"^(please\s+)?don'?t quote excerpts[\s.!?]*$", m)
537
+ or re.match(r"^(please\s+)?paraphrase only[\s.!?]*$", m)
538
+ ):
539
+ return ControlAction("set_quote_style", "paraphrase")
540
+
541
+ if len(m) <= 110 and re.match(
542
+ r"^(please\s+)?(default quote style|normal quote style|reset quote style)[\s.!?]*$",
543
+ m,
544
+ ):
545
+ return ControlAction("set_quote_style", "normal")
546
+
547
+ # Tables: prefer markdown tables vs avoid.
548
+ if len(m) <= 110 and (
549
+ re.match(r"^(please\s+)?use tables[\s.!?]*$", m)
550
+ or re.match(r"^(please\s+)?markdown tables[\s.!?]*$", m)
551
+ or re.match(r"^(please\s+)?tabular format[\s.!?]*$", m)
552
+ ):
553
+ return ControlAction("set_table_style", "prefer")
554
+
555
+ if len(m) <= 110 and (
556
+ re.match(r"^(please\s+)?no tables[\s.!?]*$", m)
557
+ or re.match(r"^(please\s+)?avoid tables[\s.!?]*$", m)
558
+ or re.match(r"^(please\s+)?no markdown tables[\s.!?]*$", m)
559
+ ):
560
+ return ControlAction("set_table_style", "avoid")
561
+
562
+ if len(m) <= 110 and re.match(
563
+ r"^(please\s+)?(default table style|normal tables|reset tables)[\s.!?]*$",
564
+ m,
565
+ ):
566
+ return ControlAction("set_table_style", "normal")
567
+
568
+ # Emoji in assistant replies (short lines; conservative wording).
569
+ if len(m) <= 110 and (
570
+ re.match(r"^(please\s+)?(use emoji|emoji ok|emoji welcome|include emoji)[\s.!?]*$", m)
571
+ or re.match(r"^(please\s+)?add (a few )?emoji[\s.!?]*$", m)
572
+ ):
573
+ return ControlAction("set_emoji_style", "include")
574
+
575
+ if len(m) <= 110 and (
576
+ re.match(r"^(please\s+)?no emojis?[\s.!?]*$", m)
577
+ or re.match(r"^(please\s+)?avoid emoji[\s.!?]*$", m)
578
+ or re.match(r"^(please\s+)?don'?t use emoji[\s.!?]*$", m)
579
+ ):
580
+ return ControlAction("set_emoji_style", "avoid")
581
+
582
+ if len(m) <= 110 and re.match(
583
+ r"^(please\s+)?(default emoji style|normal emoji|reset emoji)[\s.!?]*$",
584
+ m,
585
+ ):
586
+ return ControlAction("set_emoji_style", "normal")
587
+
588
+ # Markdown section headings (## / ###) vs flat prose.
589
+ if len(m) <= 110 and (
590
+ re.match(r"^(please\s+)?use section headings[\s.!?]*$", m)
591
+ or re.match(r"^(please\s+)?organize with headings[\s.!?]*$", m)
592
+ or re.match(r"^(please\s+)?use markdown headings[\s.!?]*$", m)
593
+ ):
594
+ return ControlAction("set_section_headings", "prefer")
595
+
596
+ if len(m) <= 110 and (
597
+ re.match(r"^(please\s+)?no section headings[\s.!?]*$", m)
598
+ or re.match(r"^(please\s+)?avoid markdown headings[\s.!?]*$", m)
599
+ or re.match(r"^(please\s+)?flat (answer|prose)( please)?[\s.!?]*$", m)
600
+ ):
601
+ return ControlAction("set_section_headings", "avoid")
602
+
603
+ if len(m) <= 110 and re.match(
604
+ r"^(please\s+)?(default section headings|normal headings|reset headings)[\s.!?]*$",
605
+ m,
606
+ ):
607
+ return ControlAction("set_section_headings", "normal")
608
+
609
+ # Inline emphasis: bold a few key terms vs keep markdown minimal.
610
+ if len(m) <= 110 and (
611
+ re.match(r"^(please\s+)?bold key terms[\s.!?]*$", m)
612
+ or re.match(r"^(please\s+)?highlight important terms[\s.!?]*$", m)
613
+ or re.match(r"^(please\s+)?emphasize keywords[\s.!?]*$", m)
614
+ ):
615
+ return ControlAction("set_term_emphasis", "highlight")
616
+
617
+ if len(m) <= 110 and (
618
+ re.match(r"^(please\s+)?minimal bold[\s.!?]*$", m)
619
+ or re.match(r"^(please\s+)?don'?t overuse bold[\s.!?]*$", m)
620
+ or re.match(r"^(please\s+)?avoid excessive bold[\s.!?]*$", m)
621
+ ):
622
+ return ControlAction("set_term_emphasis", "minimal")
623
+
624
+ if len(m) <= 110 and re.match(
625
+ r"^(please\s+)?(default emphasis|normal bold|reset emphasis)[\s.!?]*$",
626
+ m,
627
+ ):
628
+ return ControlAction("set_term_emphasis", "normal")
629
+
630
+ # Counterpoint tone: supportive vs challenge assumptions (short lines).
631
+ if len(m) <= 110 and (
632
+ re.match(r"^(please\s+)?challenge my assumptions[\s.!?]*$", m)
633
+ or re.match(r"^(please\s+)?play devils advocate[\s.!?]*$", m)
634
+ or re.match(r"^(please\s+)?push back on weak points[\s.!?]*$", m)
635
+ ):
636
+ return ControlAction("set_counterpoint_tone", "challenge")
637
+
638
+ if len(m) <= 110 and (
639
+ re.match(r"^(please\s+)?be supportive[\s.!?]*$", m)
640
+ or re.match(r"^(please\s+)?assume good intent[\s.!?]*$", m)
641
+ or re.match(r"^(please\s+)?encourage my ideas[\s.!?]*$", m)
642
+ ):
643
+ return ControlAction("set_counterpoint_tone", "supportive")
644
+
645
+ if len(m) <= 110 and re.match(
646
+ r"^(please\s+)?(default counterpoints|normal pushback|reset counterpoints)[\s.!?]*$",
647
+ m,
648
+ ):
649
+ return ControlAction("set_counterpoint_tone", "normal")
650
+
651
+ return None
652
+
scripts/rag_faq_smoke.py CHANGED
@@ -3,7 +3,9 @@
3
 
4
  Chunks a FAQ markdown corpus by `##` sections, embeds with TinyModelRuntime, retrieves top
5
  matches for a query, and reports **keyword overlap** in the top hit as a cheap faithfulness
6
- proxy (not neural entailment)."""
 
 
7
 
8
  from __future__ import annotations
9
 
@@ -11,12 +13,15 @@ import argparse
11
  import re
12
  import sys
13
  from pathlib import Path
 
14
 
15
  _scripts = Path(__file__).resolve().parent
16
  if str(_scripts) not in sys.path:
17
  sys.path.insert(0, str(_scripts))
18
 
19
- from tinymodel_runtime import TinyModelRuntime
 
 
20
 
21
  _STOP = frozenset(
22
  "a an the to of and or for in on at is are was be as it with from by not"
@@ -58,8 +63,22 @@ def _pick_model(explicit: str | None) -> str:
58
  return explicit # Hub id, e.g. HyperlinksSpace/TinyModel1
59
 
60
 
61
- def parse_args() -> argparse.Namespace:
62
- p = argparse.ArgumentParser(description=__doc__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  p.add_argument(
64
  "--model",
65
  type=str,
@@ -82,7 +101,25 @@ def parse_args() -> argparse.Namespace:
82
  action="store_true",
83
  help="Use only TinyModelRuntime.retrieve (stricter; tiny encoders may fail on short FAQ chunks).",
84
  )
85
- return p.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  def load_chunks(corpus: Path) -> list[str]:
@@ -164,7 +201,29 @@ def main() -> None:
164
  raise SystemExit(1)
165
 
166
  chunks = load_chunks(corpus)
 
 
 
 
 
167
  rt = TinyModelRuntime(model_id, device="cpu", max_length=128)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  print("=== RAG FAQ smoke (retrieval) ===\n")
169
  # (query, substring that must appear in top-1 chunk for a pass — citation-style check)
170
  samples: list[tuple[str, str]] = [
 
3
 
4
  Chunks a FAQ markdown corpus by `##` sections, embeds with TinyModelRuntime, retrieves top
5
  matches for a query, and reports **keyword overlap** in the top hit as a cheap faithfulness
6
+ proxy (not neural entailment). Optional **--show-train-routing** prints Phase 2 **`routing`**
7
+ notes from the checkpoint's **eval_report.json** (same helper as **embeddings_smoke_test** /
8
+ **horizon1_route_then_retrieve**)."""
9
 
10
  from __future__ import annotations
11
 
 
13
  import re
14
  import sys
15
  from pathlib import Path
16
+ from typing import Any
17
 
18
  _scripts = Path(__file__).resolve().parent
19
  if str(_scripts) not in sys.path:
20
  sys.path.insert(0, str(_scripts))
21
 
22
+ from eval_report_routing import maybe_print_routing_section
23
+
24
+ _PROG = "rag_faq_smoke"
25
 
26
  _STOP = frozenset(
27
  "a an the to of and or for in on at is are was be as it with from by not"
 
63
  return explicit # Hub id, e.g. HyperlinksSpace/TinyModel1
64
 
65
 
66
+ def build_parser() -> argparse.ArgumentParser:
67
+ epilog = (
68
+ "Examples:\n"
69
+ " python scripts/rag_faq_smoke.py\n"
70
+ " python scripts/rag_faq_smoke.py --query \"How do I get a refund?\" --top-k 3\n"
71
+ " python scripts/rag_faq_smoke.py --model artifacts/phase1/runs/smoke/ag_news/scratch "
72
+ "--show-train-routing\n"
73
+ "If --model is omitted, the first default checkpoint dir with config.json is used, "
74
+ f"else {_DEFAULT_HUB!r} (see --model above)."
75
+ )
76
+ p = argparse.ArgumentParser(
77
+ prog=_PROG,
78
+ description=__doc__,
79
+ formatter_class=argparse.RawDescriptionHelpFormatter,
80
+ epilog=epilog,
81
+ )
82
  p.add_argument(
83
  "--model",
84
  type=str,
 
101
  action="store_true",
102
  help="Use only TinyModelRuntime.retrieve (stricter; tiny encoders may fail on short FAQ chunks).",
103
  )
104
+ p.add_argument(
105
+ "--query",
106
+ type=str,
107
+ default=None,
108
+ help=(
109
+ "If set, run a single retrieval for this query and print top-k chunks with scores "
110
+ "(citation-style index into the chunk list). Skips the built-in smoke assertions."
111
+ ),
112
+ )
113
+ p.add_argument(
114
+ "--show-train-routing",
115
+ action="store_true",
116
+ help="Print eval_report.json top-level routing (Phase 2 notes) before retrieval output.",
117
+ )
118
+ return p
119
+
120
+
121
+ def parse_args() -> argparse.Namespace:
122
+ return build_parser().parse_args()
123
 
124
 
125
  def load_chunks(corpus: Path) -> list[str]:
 
201
  raise SystemExit(1)
202
 
203
  chunks = load_chunks(corpus)
204
+ maybe_print_routing_section(
205
+ model_id, enabled=args.show_train_routing, prog=_PROG,
206
+ )
207
+ from tinymodel_runtime import TinyModelRuntime
208
+
209
  rt = TinyModelRuntime(model_id, device="cpu", max_length=128)
210
+
211
+ if args.query:
212
+ q = args.query.strip()
213
+ print("=== RAG FAQ (single query) ===\n")
214
+ print(f"model={model_id!r}\ncorpus={corpus}\nquery={q!r}\n")
215
+ if args.semantic_only:
216
+ hits = rt.retrieve(q, chunks, top_k=args.top_k)
217
+ for rank, h in enumerate(hits, 1):
218
+ prev = h.text[:240].replace("\n", " ")
219
+ print(f" #{rank} idx={h.index} score={h.score:.4f} {prev!r}...")
220
+ else:
221
+ hr = hybrid_retrieve(rt, q, chunks, top_k=args.top_k)
222
+ for rank, (score, idx, text) in enumerate(hr, 1):
223
+ prev = text[:240].replace("\n", " ")
224
+ print(f" #{rank} idx={idx} hybrid_score={score:.4f} {prev!r}...")
225
+ return
226
+
227
  print("=== RAG FAQ smoke (retrieval) ===\n")
228
  # (query, substring that must appear in top-1 chunk for a pass — citation-style check)
229
  samples: list[tuple[str, str]] = [
scripts/universal_brain_chat.py CHANGED
@@ -25,8 +25,27 @@ import json
25
  import os
26
  import sqlite3
27
  import sys
 
28
  import warnings
29
  from pathlib import Path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  _scripts = Path(__file__).resolve().parent
32
  _REPO = _scripts.parent
@@ -46,13 +65,59 @@ from horizon2_core import ( # noqa: E402
46
  load_causal_lm,
47
  pick_device,
48
  )
49
- from horizon3_store import clear_session, connect, init_schema, list_for_scope, put # noqa: E402
 
 
 
 
 
 
 
 
 
50
  from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
51
  from tinymodel_runtime import TinyModelRuntime # noqa: E402
52
 
53
  HELP_TEXT = """**How to use**
54
  - **Normal language:** ask in plain English (or mixed); the app **infers** what you want (summarize, search FAQ, save a note, etc.).
55
- - **Shortcuts:** slash commands still work (`/help`, `/status`, ).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  **Intents the router understands** (examples, not exact wording):
58
  - Ordinary chat / questions
@@ -61,6 +126,9 @@ HELP_TEXT = """**How to use**
61
  - **Answer using only** these facts — include both facts and question
62
  - **Search** the FAQ / **find** in the knowledge base
63
  - **Classify** (topic model) this paragraph
 
 
 
64
  - **Remember** / note / store: **long-term** vs **this session only**
65
  - **Show** saved notes; **clear** session notes
66
  - **Status** of loaded models
@@ -81,6 +149,9 @@ intent must be one of:
81
  - grounded — answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
82
  - retrieve — search FAQ/knowledge; put search query in "text"
83
  - classify — show topic-classifier probabilities; put passage in "text"
 
 
 
84
  - remember — save a durable note; put note body in "text"
85
  - session_note — save a session-only note; put note in "text"
86
  - list_memories — user wants to see saved notes
@@ -101,6 +172,9 @@ VALID_INTENTS = frozenset(
101
  "grounded",
102
  "retrieve",
103
  "classify",
 
 
 
104
  "remember",
105
  "session_note",
106
  "list_memories",
@@ -117,9 +191,69 @@ _INTENT_ALIASES = {
117
  "search": "retrieve",
118
  "faq": "retrieve",
119
  "lookup": "retrieve",
 
 
 
 
 
 
 
120
  }
121
 
122
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  def _classifier_result_markdown(probs: dict[str, float]) -> str:
124
  ranked = sorted(probs.items(), key=lambda x: -x[1])
125
  top_lab, top_p = ranked[0]
@@ -323,6 +457,45 @@ def run_routed_tool(
323
  out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
324
  return "\n".join(out)
325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  if intent in ("summarize", "reformulate", "grounded"):
327
  if intent == "grounded":
328
  qn = question or text
@@ -387,6 +560,798 @@ def run_routed_tool(
387
  return ""
388
 
389
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
390
  def handle_slash(
391
  msg: str,
392
  *,
@@ -442,6 +1407,39 @@ def handle_slash(
442
  out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
443
  return "\n".join(out)
444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  if cmd in ("/summarize", "/reformulate", "/grounded"):
446
  if lm is None:
447
  return "Generative model not loaded."
@@ -665,27 +1663,61 @@ def main() -> None:
665
  print(f"Loading generative model {mid!r} on {dev!r} ...", flush=True)
666
  lm = load_causal_lm(mid, dev)
667
  turn_counter = {"n": 0}
668
- show_trace = not args.no_trace and (
669
- encoder is not None or mem_conn is not None or (rag_chunks is not None)
670
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
671
 
672
  def respond(
673
  message: str,
674
  history: list[dict],
675
- ) -> tuple[str, list[dict]]:
 
676
  msg = (message or "").strip()
677
  hist = list(history or [])
678
  if not msg:
679
- return "", hist
680
 
681
  turn_counter["n"] += 1
682
  seed = (args.seed + turn_counter["n"]) % (2**31)
683
 
 
 
684
  slash_out = handle_slash(
685
  msg,
686
  lm=lm,
687
  mem_conn=mem_conn,
688
- scope_key=args.memory_scope,
689
  encoder=encoder,
690
  rag_chunks=rag_chunks,
691
  rag_top_k=args.rag_top_k,
@@ -699,10 +1731,28 @@ def main() -> None:
699
  if slash_out is not None:
700
  hist.append({"role": "user", "content": msg})
701
  hist.append({"role": "assistant", "content": slash_out})
702
- return "", hist
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
703
 
704
  chat_line = msg
705
- if not args.no_smart_route:
706
  try:
707
  route = infer_route(
708
  lm,
@@ -719,9 +1769,9 @@ def main() -> None:
719
  msg=msg,
720
  lm=lm,
721
  mem_conn=mem_conn,
722
- scope_key=args.memory_scope,
723
  encoder=encoder,
724
- rag_chunks=rag_chunks,
725
  rag_top_k=args.rag_top_k,
726
  task_max_new_tokens=args.task_max_new_tokens,
727
  seed=(seed + 11) % (2**31),
@@ -734,12 +1784,13 @@ def main() -> None:
734
  foot = f"\n\n---\n*Routed intent:* `{route['intent']}`"
735
  hist.append({"role": "user", "content": msg})
736
  hist.append({"role": "assistant", "content": tool_reply + foot})
737
- return "", hist
738
 
739
  chat_line = route["text"] or msg
740
 
741
  trace: list[str] = []
742
  extras: list[str] = []
 
743
 
744
  if encoder:
745
  probs = encoder.classify([chat_line])[0]
@@ -752,8 +1803,8 @@ def main() -> None:
752
  )
753
 
754
  rag_block = ""
755
- if encoder and rag_chunks:
756
- hr = hybrid_retrieve(encoder, chat_line, rag_chunks, top_k=args.rag_top_k)
757
  if hr:
758
  trace.append(f"RAG:{len(hr)}chunk(s)")
759
  pieces = []
@@ -767,7 +1818,7 @@ def main() -> None:
767
  )
768
 
769
  if mem_conn:
770
- items = list_for_scope(mem_conn, args.memory_scope)
771
  if items:
772
  trace.append(f"mem:{len(items)}item(s)")
773
  mem_lines = []
@@ -796,12 +1847,21 @@ def main() -> None:
796
  do_sample=True,
797
  )
798
  out = reply or "(empty generation)"
799
- if show_trace and trace:
 
 
 
 
 
 
 
 
 
800
  out += "\n\n---\n*Brain trace:* " + " · ".join(trace)
801
 
802
  hist.append({"role": "user", "content": msg})
803
  hist.append({"role": "assistant", "content": out})
804
- return "", hist
805
 
806
  brain_bits = []
807
  if encoder:
@@ -812,33 +1872,67 @@ def main() -> None:
812
  brain_bits.append("memory")
813
  brain_label = "+".join(brain_bits) if brain_bits else "LM only"
814
 
815
- with gr.Blocks(title="Universal Brain (chat prototype)") as demo:
 
 
 
 
816
  gr.Markdown(
817
  "### Universal Brain — chat prototype\n"
818
  f"**Generative:** `{mid}` ({lm.device}) · **Brain layers:** {brain_label}\n\n"
819
  "**NL routing:** the model infers what you want (summarize, FAQ search, save note, …). "
820
  "Use **`--no-smart-route`** for plain chat-only + slash shortcuts. "
821
  "`/help` lists slash commands.\n\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
  "Encoder topics (Hub TinyModel1 ≈ AG News) still feed context and an optional *Brain trace* line; "
823
  "use `/classify` or ask naturally to see the full probability table in chat."
824
  )
825
  chat = gr.Chatbot(type="messages", height=520, label="Conversation", allow_tags=False)
 
826
  with gr.Row():
827
  inp = gr.Textbox(
828
- lines=1,
829
- max_lines=1,
830
  show_label=False,
831
  placeholder="Ask in plain language, or use /help …",
832
  scale=9,
 
833
  )
834
  go = gr.Button("Send", variant="primary", scale=1)
835
  gr.ClearButton([chat, inp])
836
 
837
- def _submit(m: str, h: list[dict]) -> tuple[str, list[dict]]:
838
- return respond(m, h)
839
-
840
- go.click(_submit, [inp, chat], [inp, chat])
841
- inp.submit(_submit, [inp, chat], [inp, chat])
 
 
 
 
 
 
 
 
 
 
842
 
843
  demo.queue(default_concurrency_limit=2)
844
  share = args.share
@@ -850,6 +1944,7 @@ def main() -> None:
850
  server_port=args.port,
851
  share=share,
852
  ssr_mode=False,
 
853
  )
854
  except ValueError as e:
855
  err = str(e)
 
25
  import os
26
  import sqlite3
27
  import sys
28
+ import uuid
29
  import warnings
30
  from pathlib import Path
31
+ from typing import Any
32
+
33
+ # Windows: avoid OpenMP/MKL oversubscription and duplicate CRT issues that can
34
+ # segfault during large `from_pretrained` CPU loads (common with torch+transformers).
35
+ if sys.platform == "win32":
36
+ os.environ.setdefault("OMP_NUM_THREADS", "1")
37
+ os.environ.setdefault("MKL_NUM_THREADS", "1")
38
+ os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
39
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
40
+
41
+ import torch
42
+
43
+ if sys.platform == "win32":
44
+ torch.set_num_threads(1)
45
+ try:
46
+ torch.set_num_interop_threads(1)
47
+ except RuntimeError:
48
+ pass
49
 
50
  _scripts = Path(__file__).resolve().parent
51
  _REPO = _scripts.parent
 
65
  load_causal_lm,
66
  pick_device,
67
  )
68
+ from horizon3_store import ( # noqa: E402
69
+ clear_session,
70
+ connect,
71
+ export_scope_json,
72
+ forget_scope,
73
+ init_schema,
74
+ list_for_scope,
75
+ put,
76
+ )
77
+ from nl_controls import parse_control_action # noqa: E402
78
  from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
79
  from tinymodel_runtime import TinyModelRuntime # noqa: E402
80
 
81
  HELP_TEXT = """**How to use**
82
  - **Normal language:** ask in plain English (or mixed); the app **infers** what you want (summarize, search FAQ, save a note, etc.).
83
+ - **Session controls (say it in chat, no slash command):**
84
+ - *What is my current scope?*, *Show my session settings* -> prints scope + toggles (FAQ context, routing, trace)
85
+ - *Start a new private session*, *Begin a fresh scope* -> generates a **new memory scope key** so notes are isolated from the shared default demo scope
86
+ - *Switch to scope my-team-123* / *Use session demo-key* -> set the Horizon 3 **`scope_key`** from chat (ASCII id)
87
+ - *Be brief* / *More detail please* / *Use bullet points* / *No bullets, plain paragraphs* -> soft **reply-style** hints (injected into the assistant system context; short control lines only)
88
+ - *Strict FAQ* / *FAQ only* / *Stick to the FAQ* vs *Relaxed FAQ* / *FAQ plus general knowledge* vs *Balanced FAQ* / *Normal FAQ* -> **FAQ grounding** hints for how tightly to treat injected FAQ excerpts vs general knowledge
89
+ - *Explain simply* / *ELI5* / *I'm a beginner* vs *Expert mode* / *Assume I'm technical* vs *Normal explanation level* -> **audience depth** hints (simple vs technical vs default)
90
+ - *TLDR first* / *Lead with a summary* vs *No TLDR* / *Answer directly* vs *Default answer structure* -> **answer opening** style (short upfront summary vs dive straight in)
91
+ - *Step by step* / *Numbered steps* vs *No numbered steps* / *Continuous prose* vs *Default step style* -> **procedure layout** (numbered steps vs flowing paragraphs)
92
+ - *Flag your assumptions* / *Be explicit about uncertainty* vs *Be decisive* / *Don't hedge* vs *Reset uncertainty* -> **confidence tone** hints
93
+ - *Suggest next steps* / *Offer follow-up questions* vs *No follow-up questions* / *No questions at the end* vs *Default follow-ups* -> **closing** style at end of answers
94
+ - *Definitions first* / *Define terms first* vs *Intuition first* / *Big picture first* vs *Default explanation order* -> **concept order** in explanations
95
+ - *Include examples* / *Use concrete examples* vs *Skip examples* / *No examples unless I ask* vs *Default examples* -> **example density**
96
+ - *Use pros and cons* / *Pros and cons sections* vs *Compare in flowing prose* / *No pros and cons sections* vs *Default comparison style* -> **comparison layout** for trade-offs
97
+ - *Formal tone* / *Professional register* vs *Casual tone* / *Speak casually* vs *Default tone* -> **writing register**
98
+ - *Use code fences* / *Fenced code blocks* vs *Inline code only* / *No fenced code blocks* vs *Default code formatting* -> **markdown code layout**
99
+ - *Use analogies* / *Analogies when helpful* vs *No analogies* / *Literal explanations only* vs *Default analogy style* -> **analogy / metaphor** usage
100
+ - *Spell out acronyms* / *Expand acronyms on first use* vs *Assume I know acronyms* / *Don't expand acronyms* vs *Default acronym style* -> **acronym verbosity**
101
+ - *Ask clarifying questions first* / *Clarify first* vs *No clarifying questions* / *Just answer without questions* vs *Default clarify mode* -> whether the assistant should ask for missing info before answering
102
+ - *No speculation* / *Stick to high confidence only* vs *Brainstorm freely* / *Wild ideas ok* vs *Default speculation* -> how strictly to avoid guessing vs allow ideation
103
+ - *Show your work* / *Show the derivation* vs *Final answer only* / *No derivation* vs *Default math detail* -> how much intermediate reasoning to show for math-like answers
104
+ - *Answer in JSON* / *JSON output* vs *Plain text only* / *No JSON* vs *Default output format* -> structured output preference
105
+ - *Be risk averse* / *Err on the side of safety* vs *Be pragmatic* / *Optimize for speed* vs *Default risk posture* -> conservative vs practical recommendations
106
+ - *Give me runnable commands* / *Make it actionable* vs *No commands* / *Conceptual only* vs *Default actionability* -> how command-heavy responses should be
107
+ - *Quote the FAQ excerpts* / *Use direct quotes* vs *Paraphrase only* / *Don't quote excerpts* vs *Default quote style* -> quoting vs paraphrasing when relying on injected excerpts
108
+ - *Use tables* / *Tabular format* vs *No tables* / *Avoid tables* vs *Default table style* -> whether markdown tables are preferred
109
+ - *Use emoji* / *Emoji ok* vs *No emoji* / *Avoid emoji* vs *Default emoji style* -> light **emoji** usage in answers
110
+ - *Use section headings* / *Organize with headings* vs *No section headings* / *Flat answer* vs *Default section headings* -> **markdown headings** vs flat prose
111
+ - *Bold key terms* / *Highlight important terms* vs *Minimal bold* / *Don't overuse bold* vs *Default emphasis* -> **inline bold** for key phrases vs sparse formatting
112
+ - *Challenge my assumptions* / *Play devils advocate* vs *Be supportive* / *Assume good intent* vs *Default counterpoints* -> how much to **push back** vs stay encouraging
113
+ - *Reset reply style* -> back to defaults for length + prose + balanced FAQ grounding + audience + opening + steps + confidence tone + follow-ups + concept order + examples + comparisons + register + code layout + analogy + acronym style + clarify + speculation + math detail + output format + risk posture + actionability + quote style + table style + emoji + section headings + term emphasis + counterpoints
114
+ - *Export my memories*, *Download my notes as JSON* -> returns a Horizon 3 export blob for **this Space session scope**
115
+ - *Delete all my memories for this chat* / *Erase everything you stored about me here* -> **forget-scope** wipe for this scope (**long-term + session** rows)
116
+ - *Clear my session notes* -> wipes **session** notes only
117
+ - *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
118
+ - *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
119
+ - *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
120
+ - **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
121
 
122
  **Intents the router understands** (examples, not exact wording):
123
  - Ordinary chat / questions
 
126
  - **Answer using only** these facts — include both facts and question
127
  - **Search** the FAQ / **find** in the knowledge base
128
  - **Classify** (topic model) this paragraph
129
+ - **Similarity:** are these two snippets close in meaning? (encoder cosine)
130
+ - **Embedding** stats for a passage (dimension, norm, preview)
131
+ - **Nearest** among several options: which candidate is closest to a query? (`query ||| opt1 ||| opt2 …`)
132
  - **Remember** / note / store: **long-term** vs **this session only**
133
  - **Show** saved notes; **clear** session notes
134
  - **Status** of loaded models
 
149
  - grounded — answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
150
  - retrieve — search FAQ/knowledge; put search query in "text"
151
  - classify — show topic-classifier probabilities; put passage in "text"
152
+ - similarity — cosine similarity between two texts; put "text_a ||| text_b" in "text"
153
+ - embedding — embedding vector summary for one passage; put passage in "text"
154
+ - nearest — encoder top-k over candidates; put "query ||| candidate1 ||| candidate2 ||| …" in "text" (at least one candidate)
155
  - remember — save a durable note; put note body in "text"
156
  - session_note — save a session-only note; put note in "text"
157
  - list_memories — user wants to see saved notes
 
172
  "grounded",
173
  "retrieve",
174
  "classify",
175
+ "similarity",
176
+ "embedding",
177
+ "nearest",
178
  "remember",
179
  "session_note",
180
  "list_memories",
 
191
  "search": "retrieve",
192
  "faq": "retrieve",
193
  "lookup": "retrieve",
194
+ "similar": "similarity",
195
+ "cosine": "similarity",
196
+ "embed": "embedding",
197
+ "embeddings": "embedding",
198
+ "knn": "nearest",
199
+ "triage": "nearest",
200
+ "encoder_retrieve": "nearest",
201
  }
202
 
203
 
204
+ def _parse_two_segments(blob: str) -> tuple[str, str]:
205
+ if "|||" not in blob:
206
+ raise ValueError("Need two segments separated by `|||` (e.g. `text A ||| text B`).")
207
+ a, _, b = blob.partition("|||")
208
+ a, b = a.strip(), b.strip()
209
+ if not a or not b:
210
+ raise ValueError("Both sides of `|||` must be non-empty.")
211
+ return a, b
212
+
213
+
214
+ def _parse_nearest_blob(blob: str) -> tuple[str, list[str]]:
215
+ parts = [p.strip() for p in blob.split("|||") if p.strip()]
216
+ if len(parts) < 2:
217
+ raise ValueError(
218
+ "Need `query ||| candidate1 ||| candidate2` (at least one candidate after `|||`)."
219
+ )
220
+ return parts[0], parts[1:]
221
+
222
+
223
+ def _embedding_summary_markdown(encoder: TinyModelRuntime, passage: str) -> str:
224
+ vec = encoder.embed([passage], normalize=False)[0]
225
+ dim = int(vec.shape[0])
226
+ norm = float(torch.linalg.vector_norm(vec))
227
+ k = min(8, dim)
228
+ head = ", ".join(f"{float(vec[i]):.4f}" for i in range(k))
229
+ return "\n".join(
230
+ [
231
+ "### Encoder embedding (raw [CLS], not L2-normalized)\n",
232
+ f"- **dim:** {dim}",
233
+ f"- **L2 norm:** {norm:.4f}",
234
+ f"- **first {k} values:** {head}",
235
+ ]
236
+ )
237
+
238
+
239
+ def _nearest_markdown(
240
+ encoder: TinyModelRuntime,
241
+ query: str,
242
+ candidates: list[str],
243
+ *,
244
+ top_k: int,
245
+ ) -> str:
246
+ hits = encoder.retrieve(query, candidates, top_k=top_k)
247
+ if not hits:
248
+ return "(No candidates.)"
249
+ lines = ["### Encoder nearest neighbors (cosine on pooled embeddings)\n"]
250
+ for rank, h in enumerate(hits, 1):
251
+ lines.append(
252
+ f"**#{rank}** score={h.score:.4f} · index={h.index}\n{_clip(h.text, 700)}\n"
253
+ )
254
+ return "\n".join(lines)
255
+
256
+
257
  def _classifier_result_markdown(probs: dict[str, float]) -> str:
258
  ranked = sorted(probs.items(), key=lambda x: -x[1])
259
  top_lab, top_p = ranked[0]
 
457
  out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
458
  return "\n".join(out)
459
 
460
+ if intent == "similarity":
461
+ if not encoder:
462
+ return "Similarity needs the encoder (drop `--lm-only` / `--no-encoder`)."
463
+ blob = (text or msg).strip()
464
+ if not blob:
465
+ return "Provide two texts: `first ||| second`."
466
+ try:
467
+ ta, tb = _parse_two_segments(blob)
468
+ except ValueError as e:
469
+ return str(e)
470
+ score = encoder.similarity(ta, tb)
471
+ return (
472
+ "### Similarity (encoder cosine)\n"
473
+ f"**Score:** {score:.4f}\n\n"
474
+ f"**A:** {_clip(ta, 480)}\n\n"
475
+ f"**B:** {_clip(tb, 480)}"
476
+ )
477
+
478
+ if intent == "embedding":
479
+ if not encoder:
480
+ return "Embedding stats need the encoder (drop `--lm-only` / `--no-encoder`)."
481
+ passage = (text or msg).strip()
482
+ if not passage:
483
+ return "What text should I embed?"
484
+ return _embedding_summary_markdown(encoder, passage)
485
+
486
+ if intent == "nearest":
487
+ if not encoder:
488
+ return "Nearest-neighbor search needs the encoder (drop `--lm-only` / `--no-encoder`)."
489
+ blob = (text or msg).strip()
490
+ if not blob:
491
+ return "Usage: `query ||| option1 ||| option2 ...`"
492
+ try:
493
+ query, cands = _parse_nearest_blob(blob)
494
+ except ValueError as e:
495
+ return str(e)
496
+ k = max(1, min(rag_top_k, len(cands)))
497
+ return _nearest_markdown(encoder, query, cands, top_k=k)
498
+
499
  if intent in ("summarize", "reformulate", "grounded"):
500
  if intent == "grounded":
501
  qn = question or text
 
560
  return ""
561
 
562
 
563
+ def handle_nl_control(
564
+ msg: str,
565
+ session: dict[str, Any],
566
+ *,
567
+ mem_conn: sqlite3.Connection | None,
568
+ scope_key: str,
569
+ rag_chunks_base: list[str] | None,
570
+ locked_no_smart_route: bool,
571
+ ) -> str | None:
572
+ act = parse_control_action(msg)
573
+ if act is None:
574
+ return None
575
+
576
+ if act.name == "show_session":
577
+ bits = [
578
+ f"- scope: `{scope_key}`",
579
+ f"- smart routing: **{'on' if session.get('smart_route') and not locked_no_smart_route else 'off'}**",
580
+ f"- FAQ context: **{'on' if session.get('rag') and rag_chunks_base is not None else 'off'}**",
581
+ f"- brain trace footer: **{'on' if session.get('trace') else 'off'}**",
582
+ f"- memory store: **{'on' if mem_conn is not None else 'off'}**",
583
+ f"- reply length: **{session.get('verbosity', 'normal')}**",
584
+ f"- lists: **{'bullets when helpful' if session.get('reply_format') == 'bullets' else 'prose'}**",
585
+ f"- FAQ grounding: **{session.get('faq_grounding', 'normal')}**",
586
+ f"- audience: **{session.get('audience', 'normal')}**",
587
+ f"- answer opening: **{session.get('answer_lead', 'normal')}**",
588
+ f"- procedure steps: **{session.get('step_style', 'normal')}**",
589
+ f"- confidence tone: **{session.get('confidence_tone', 'normal')}**",
590
+ f"- follow-up ending: **{session.get('followup_close', 'normal')}**",
591
+ f"- concept order: **{session.get('exposition_order', 'normal')}**",
592
+ f"- examples: **{session.get('example_density', 'normal')}**",
593
+ f"- comparisons: **{session.get('comparison_frame', 'normal')}**",
594
+ f"- register: **{session.get('register_tone', 'normal')}**",
595
+ f"- code blocks: **{session.get('code_block_style', 'normal')}**",
596
+ f"- analogies: **{session.get('analogy_use', 'normal')}**",
597
+ f"- acronyms: **{session.get('acronym_style', 'normal')}**",
598
+ f"- clarify-first: **{session.get('clarify_first', 'normal')}**",
599
+ f"- speculation: **{session.get('speculation', 'normal')}**",
600
+ f"- math detail: **{session.get('math_detail', 'normal')}**",
601
+ f"- output format: **{session.get('output_format', 'normal')}**",
602
+ f"- risk posture: **{session.get('risk_posture', 'normal')}**",
603
+ f"- actionability: **{session.get('actionability', 'normal')}**",
604
+ f"- quote style: **{session.get('quote_style', 'normal')}**",
605
+ f"- tables: **{session.get('table_style', 'normal')}**",
606
+ f"- emoji: **{session.get('emoji_style', 'normal')}**",
607
+ f"- section headings: **{session.get('section_headings', 'normal')}**",
608
+ f"- term emphasis: **{session.get('term_emphasis', 'normal')}**",
609
+ f"- counterpoints: **{session.get('counterpoint_tone', 'normal')}**",
610
+ ]
611
+ return "### Session settings\n" + "\n".join(bits)
612
+
613
+ if act.name == "new_private_session":
614
+ # Keep it readable and low-collision; not a secret, just a scope id.
615
+ new_scope = f"ub-{uuid.uuid4().hex[:8]}"
616
+ session["scope_key"] = new_scope
617
+ return (
618
+ f"**Started a new private session scope.**\n\n"
619
+ f"Current scope is now `{new_scope}`.\n"
620
+ "Memory operations (remember/export/forget) will apply to this new scope."
621
+ )
622
+
623
+ if act.name == "set_scope":
624
+ if not act.value:
625
+ return "Tell me the scope key, e.g. `Switch to scope demo-123`."
626
+ session["scope_key"] = act.value
627
+ return f"Switched session scope to `{act.value}`."
628
+
629
+ if act.name == "export_memory":
630
+ if mem_conn is None:
631
+ return "Memory is off for this Space (no SQLite store); nothing to export."
632
+ blob = export_scope_json(mem_conn, scope_key)
633
+ js = json.dumps(blob, indent=2, ensure_ascii=False)
634
+ max_chars = 48_000
635
+ if len(js) > max_chars:
636
+ js = js[:max_chars] + "\n…(truncated for chat; schema is horizon3_export/1.0)…"
637
+ return f"### Memory export (`{scope_key}`)\nPaste/save externally if needed.\n\n```json\n{js}\n```"
638
+
639
+ if act.name == "forget_scope":
640
+ if mem_conn is None:
641
+ return "Memory is off; nothing to delete."
642
+ n = forget_scope(mem_conn, scope_key)
643
+ return (
644
+ f"**Erased stored memory for this Space session.**\n\n"
645
+ f"Deleted **{n}** row(s) (**session + long-term**) for `{scope_key}`."
646
+ )
647
+
648
+ if act.name == "list_memories":
649
+ if mem_conn is None:
650
+ return "Memory is off."
651
+ items = list_for_scope(mem_conn, scope_key)
652
+ if not items:
653
+ return "(No saved notes for this scope.)"
654
+ lines = [f"- **{it.kind}** · {_clip(it.content, 320)}" for it in items[:24]]
655
+ extra = f"\n\n… {len(items) - 24} more" if len(items) > 24 else ""
656
+ return "**Saved notes:**\n" + "\n".join(lines) + extra
657
+
658
+ if act.name == "clear_session":
659
+ if mem_conn is None:
660
+ return "Memory is off."
661
+ n = clear_session(mem_conn, scope_key)
662
+ return f"Cleared **{n}** session note(s). Long-term notes unchanged."
663
+
664
+ if act.name == "set_trace":
665
+ session["trace"] = act.value == "on"
666
+ return f"**Brain trace** is now **{'on' if session['trace'] else 'off'}** (footer on assistant replies)."
667
+
668
+ if act.name == "set_smart_route":
669
+ if locked_no_smart_route:
670
+ return "Smart routing is **locked off** for this server (`--no-smart-route`)."
671
+ session["smart_route"] = act.value == "on"
672
+ return (
673
+ f"**Smart routing** is now **{'on' if session['smart_route'] else 'off'}** "
674
+ "(off = plain chat + FAQ context injection + slash shortcuts only)."
675
+ )
676
+
677
+ if act.name == "set_rag":
678
+ if rag_chunks_base is None:
679
+ return "FAQ/RAG corpus is **not loaded** on this deployment; nothing to toggle."
680
+ session["rag"] = act.value == "on"
681
+ return (
682
+ f"**FAQ/RAG excerpts in prompts** are now **{'on' if session['rag'] else 'off'}**."
683
+ )
684
+
685
+ if act.name == "reset_reply_style":
686
+ session["verbosity"] = "normal"
687
+ session["reply_format"] = "prose"
688
+ session["faq_grounding"] = "normal"
689
+ session["audience"] = "normal"
690
+ session["answer_lead"] = "normal"
691
+ session["step_style"] = "normal"
692
+ session["confidence_tone"] = "normal"
693
+ session["followup_close"] = "normal"
694
+ session["exposition_order"] = "normal"
695
+ session["example_density"] = "normal"
696
+ session["comparison_frame"] = "normal"
697
+ session["register_tone"] = "normal"
698
+ session["code_block_style"] = "normal"
699
+ session["analogy_use"] = "normal"
700
+ session["acronym_style"] = "normal"
701
+ session["clarify_first"] = "normal"
702
+ session["speculation"] = "normal"
703
+ session["math_detail"] = "normal"
704
+ session["output_format"] = "normal"
705
+ session["risk_posture"] = "normal"
706
+ session["actionability"] = "normal"
707
+ session["quote_style"] = "normal"
708
+ session["table_style"] = "normal"
709
+ session["emoji_style"] = "normal"
710
+ session["section_headings"] = "normal"
711
+ session["term_emphasis"] = "normal"
712
+ session["counterpoint_tone"] = "normal"
713
+ return (
714
+ "**Reply style reset:** normal length, prose, balanced FAQ grounding, general audience, "
715
+ "default opening, default steps, normal confidence tone, default follow-ups, default concept order, "
716
+ "default examples, default comparisons, default register, default code blocks, default analogies, "
717
+ "default acronyms, default clarify mode, default speculation, default math detail, default output format, "
718
+ "default risk posture, default actionability, default quote style, default tables, default emoji, "
719
+ "default section headings, default term emphasis, default counterpoints."
720
+ )
721
+
722
+ if act.name == "set_verbosity":
723
+ v = (act.value or "normal").lower()
724
+ if v not in ("brief", "normal", "detailed"):
725
+ v = "normal"
726
+ session["verbosity"] = v
727
+ return f"**Reply length** is now **{v}** (applies to assistant chat replies)."
728
+
729
+ if act.name == "set_reply_format":
730
+ f = (act.value or "prose").lower()
731
+ if f not in ("prose", "bullets"):
732
+ f = "prose"
733
+ session["reply_format"] = f
734
+ return f"**List formatting** is now **{f}** (how the assistant structures multi-point answers)."
735
+
736
+ if act.name == "set_faq_grounding":
737
+ mode = (act.value or "normal").lower()
738
+ if mode not in ("strict", "normal", "relaxed"):
739
+ mode = "normal"
740
+ session["faq_grounding"] = mode
741
+ extra = ""
742
+ if rag_chunks_base is None or not session.get("rag", True):
743
+ extra = (
744
+ "\n\n**Note:** FAQ excerpt injection is currently **off** in this chat session "
745
+ "(or no FAQ corpus loaded). Grounding hints apply whenever FAQ snippets are present."
746
+ )
747
+ return f"**FAQ grounding** is now **{mode}**.{extra}"
748
+
749
+ if act.name == "set_audience":
750
+ aud = (act.value or "normal").lower()
751
+ if aud not in ("simple", "normal", "technical"):
752
+ aud = "normal"
753
+ session["audience"] = aud
754
+ label = {"simple": "beginner-friendly", "normal": "general", "technical": "technical"}.get(aud, aud)
755
+ return f"**Audience** is now **{label}** (how deep or jargon-heavy explanations should feel)."
756
+
757
+ if act.name == "set_answer_lead":
758
+ lead = (act.value or "normal").lower()
759
+ if lead not in ("tldr_first", "direct", "normal"):
760
+ lead = "normal"
761
+ session["answer_lead"] = lead
762
+ human = {"tldr_first": "TL;DR first line", "direct": "straight in (no TL;DR line)", "normal": "default"}.get(
763
+ lead, lead
764
+ )
765
+ return f"**Answer opening** is now **{human}**."
766
+
767
+ if act.name == "set_step_style":
768
+ st = (act.value or "normal").lower()
769
+ if st not in ("numbered", "continuous", "normal"):
770
+ st = "normal"
771
+ session["step_style"] = st
772
+ human = {
773
+ "numbered": "numbered steps when explaining procedures",
774
+ "continuous": "continuous prose (avoid numbered step lists)",
775
+ "normal": "default",
776
+ }.get(st, st)
777
+ return f"**Procedure layout** is now **{human}**."
778
+
779
+ if act.name == "set_confidence_tone":
780
+ ct = (act.value or "normal").lower()
781
+ if ct not in ("transparent", "assertive", "normal"):
782
+ ct = "normal"
783
+ session["confidence_tone"] = ct
784
+ human = {
785
+ "transparent": "flag limits and assumptions",
786
+ "assertive": "decisive, minimal hedging",
787
+ "normal": "default",
788
+ }.get(ct, ct)
789
+ return f"**Confidence tone** is now **{human}**."
790
+
791
+ if act.name == "set_followup_close":
792
+ fu = (act.value or "normal").lower()
793
+ if fu not in ("suggest", "minimal", "normal"):
794
+ fu = "normal"
795
+ session["followup_close"] = fu
796
+ human = {
797
+ "suggest": "offer brief next steps / follow-ups when useful",
798
+ "minimal": "no rhetorical closing questions",
799
+ "normal": "default",
800
+ }.get(fu, fu)
801
+ return f"**Follow-up closing** is now **{human}**."
802
+
803
+ if act.name == "set_exposition_order":
804
+ eo = (act.value or "normal").lower()
805
+ if eo not in ("definitions_first", "intuition_first", "normal"):
806
+ eo = "normal"
807
+ session["exposition_order"] = eo
808
+ human = {
809
+ "definitions_first": "definitions and terms before intuition",
810
+ "intuition_first": "big-picture intuition before formal detail",
811
+ "normal": "default",
812
+ }.get(eo, eo)
813
+ return f"**Concept order** is now **{human}**."
814
+
815
+ if act.name == "set_example_density":
816
+ ed = (act.value or "normal").lower()
817
+ if ed not in ("rich", "sparse", "normal"):
818
+ ed = "normal"
819
+ session["example_density"] = ed
820
+ human = {
821
+ "rich": "include concrete examples when they help",
822
+ "sparse": "minimal examples unless asked",
823
+ "normal": "default",
824
+ }.get(ed, ed)
825
+ return f"**Examples** preference is now **{human}**."
826
+
827
+ if act.name == "set_comparison_frame":
828
+ cf = (act.value or "normal").lower()
829
+ if cf not in ("pros_cons", "narrative", "normal"):
830
+ cf = "normal"
831
+ session["comparison_frame"] = cf
832
+ human = {
833
+ "pros_cons": "explicit Pros / Cons sections for trade-offs",
834
+ "narrative": "flowing prose comparisons (no rigid Pros/Cons headings)",
835
+ "normal": "default",
836
+ }.get(cf, cf)
837
+ return f"**Comparison layout** is now **{human}**."
838
+
839
+ if act.name == "set_register_tone":
840
+ rt = (act.value or "normal").lower()
841
+ if rt not in ("formal", "casual", "normal"):
842
+ rt = "normal"
843
+ session["register_tone"] = rt
844
+ human = {
845
+ "formal": "professional / polished wording",
846
+ "casual": "friendly conversational wording",
847
+ "normal": "default",
848
+ }.get(rt, rt)
849
+ return f"**Register** is now **{human}**."
850
+
851
+ if act.name == "set_code_block_style":
852
+ cs = (act.value or "normal").lower()
853
+ if cs not in ("fenced", "inline", "normal"):
854
+ cs = "normal"
855
+ session["code_block_style"] = cs
856
+ human = {
857
+ "fenced": "use ``` fenced blocks for multi-line code",
858
+ "inline": "prefer inline `backticks`, avoid large fences",
859
+ "normal": "default",
860
+ }.get(cs, cs)
861
+ return f"**Code markdown** is now **{human}**."
862
+
863
+ if act.name == "set_analogy_use":
864
+ au = (act.value or "normal").lower()
865
+ if au not in ("prefer", "avoid", "normal"):
866
+ au = "normal"
867
+ session["analogy_use"] = au
868
+ human = {
869
+ "prefer": "use concise analogies when they clarify",
870
+ "avoid": "literal wording; skip analogies and metaphors",
871
+ "normal": "default",
872
+ }.get(au, au)
873
+ return f"**Analogy usage** is now **{human}**."
874
+
875
+ if act.name == "set_acronym_style":
876
+ ac = (act.value or "normal").lower()
877
+ if ac not in ("spell_out", "terse", "normal"):
878
+ ac = "normal"
879
+ session["acronym_style"] = ac
880
+ human = {
881
+ "spell_out": "expand unfamiliar acronyms on first mention",
882
+ "terse": "keep acronym forms without spelling them out first",
883
+ "normal": "default",
884
+ }.get(ac, ac)
885
+ return f"**Acronym style** is now **{human}**."
886
+
887
+ if act.name == "set_clarify_first":
888
+ cf = (act.value or "normal").lower()
889
+ if cf not in ("on", "off", "normal"):
890
+ cf = "normal"
891
+ session["clarify_first"] = cf
892
+ human = {
893
+ "on": "ask 1–3 targeted clarifying questions before answering when info is missing",
894
+ "off": "answer immediately; do not ask clarifying questions first",
895
+ "normal": "default",
896
+ }.get(cf, cf)
897
+ return f"**Clarify-first** is now **{human}**."
898
+
899
+ if act.name == "set_speculation":
900
+ sp = (act.value or "normal").lower()
901
+ if sp not in ("strict", "creative", "normal"):
902
+ sp = "normal"
903
+ session["speculation"] = sp
904
+ human = {
905
+ "strict": "avoid guessing; stick to high-confidence statements",
906
+ "creative": "brainstorm and speculate (label assumptions clearly)",
907
+ "normal": "default",
908
+ }.get(sp, sp)
909
+ return f"**Speculation level** is now **{human}**."
910
+
911
+ if act.name == "set_math_detail":
912
+ md = (act.value or "normal").lower()
913
+ if md not in ("show_work", "final_only", "normal"):
914
+ md = "normal"
915
+ session["math_detail"] = md
916
+ human = {
917
+ "show_work": "show intermediate steps/derivation when doing math-like reasoning",
918
+ "final_only": "final results only (no derivation/steps)",
919
+ "normal": "default",
920
+ }.get(md, md)
921
+ return f"**Math detail** is now **{human}**."
922
+
923
+ if act.name == "set_output_format":
924
+ of = (act.value or "normal").lower()
925
+ if of not in ("json", "plain", "normal"):
926
+ of = "normal"
927
+ session["output_format"] = of
928
+ human = {
929
+ "json": "reply in a JSON-shaped object when possible",
930
+ "plain": "plain text (no forced JSON structure)",
931
+ "normal": "default",
932
+ }.get(of, of)
933
+ return f"**Output format** is now **{human}**."
934
+
935
+ if act.name == "set_risk_posture":
936
+ rp = (act.value or "normal").lower()
937
+ if rp not in ("conservative", "pragmatic", "normal"):
938
+ rp = "normal"
939
+ session["risk_posture"] = rp
940
+ human = {
941
+ "conservative": "risk-averse / safety-first recommendations",
942
+ "pragmatic": "practical, speed-oriented recommendations",
943
+ "normal": "default",
944
+ }.get(rp, rp)
945
+ return f"**Risk posture** is now **{human}**."
946
+
947
+ if act.name == "set_actionability":
948
+ ac = (act.value or "normal").lower()
949
+ if ac not in ("commands", "conceptual", "normal"):
950
+ ac = "normal"
951
+ session["actionability"] = ac
952
+ human = {
953
+ "commands": "include runnable commands/snippets when possible",
954
+ "conceptual": "avoid commands; stay conceptual/high-level",
955
+ "normal": "default",
956
+ }.get(ac, ac)
957
+ return f"**Actionability** is now **{human}**."
958
+
959
+ if act.name == "set_quote_style":
960
+ qs = (act.value or "normal").lower()
961
+ if qs not in ("quote", "paraphrase", "normal"):
962
+ qs = "normal"
963
+ session["quote_style"] = qs
964
+ human = {
965
+ "quote": "prefer short direct quotes when relying on FAQ excerpts",
966
+ "paraphrase": "paraphrase excerpts; avoid quoting",
967
+ "normal": "default",
968
+ }.get(qs, qs)
969
+ return f"**Quote style** is now **{human}**."
970
+
971
+ if act.name == "set_table_style":
972
+ ts = (act.value or "normal").lower()
973
+ if ts not in ("prefer", "avoid", "normal"):
974
+ ts = "normal"
975
+ session["table_style"] = ts
976
+ human = {
977
+ "prefer": "use markdown tables when presenting structured comparisons",
978
+ "avoid": "avoid tables; use bullets/prose instead",
979
+ "normal": "default",
980
+ }.get(ts, ts)
981
+ return f"**Tables** preference is now **{human}**."
982
+
983
+ if act.name == "set_emoji_style":
984
+ es = (act.value or "normal").lower()
985
+ if es not in ("include", "avoid", "normal"):
986
+ es = "normal"
987
+ session["emoji_style"] = es
988
+ human = {
989
+ "include": "a few tasteful emoji are welcome when they aid scanning",
990
+ "avoid": "no emoji unless the user uses them first",
991
+ "normal": "default",
992
+ }.get(es, es)
993
+ return f"**Emoji style** is now **{human}**."
994
+
995
+ if act.name == "set_section_headings":
996
+ sh = (act.value or "normal").lower()
997
+ if sh not in ("prefer", "avoid", "normal"):
998
+ sh = "normal"
999
+ session["section_headings"] = sh
1000
+ human = {
1001
+ "prefer": "use markdown ##/### headings to structure longer answers",
1002
+ "avoid": "avoid markdown heading lines; keep flowing paragraphs/lists",
1003
+ "normal": "default",
1004
+ }.get(sh, sh)
1005
+ return f"**Section headings** preference is now **{human}**."
1006
+
1007
+ if act.name == "set_term_emphasis":
1008
+ te = (act.value or "normal").lower()
1009
+ if te not in ("highlight", "minimal", "normal"):
1010
+ te = "normal"
1011
+ session["term_emphasis"] = te
1012
+ human = {
1013
+ "highlight": "bold a few crucial terms/phrases for scanability",
1014
+ "minimal": "avoid decorative bold; use it sparingly",
1015
+ "normal": "default",
1016
+ }.get(te, te)
1017
+ return f"**Term emphasis** is now **{human}**."
1018
+
1019
+ if act.name == "set_counterpoint_tone":
1020
+ cp = (act.value or "normal").lower()
1021
+ if cp not in ("challenge", "supportive", "normal"):
1022
+ cp = "normal"
1023
+ session["counterpoint_tone"] = cp
1024
+ human = {
1025
+ "challenge": "look for gaps; name risks and counterarguments respectfully",
1026
+ "supportive": "prioritize encouragement and constructive framing",
1027
+ "normal": "default",
1028
+ }.get(cp, cp)
1029
+ return f"**Counterpoint tone** is now **{human}**."
1030
+
1031
+ return None
1032
+
1033
+
1034
+ def _append_reply_style_hints(extras: list[str], session: dict[str, Any]) -> None:
1035
+ verbosity = str(session.get("verbosity") or "normal").lower()
1036
+ rformat = str(session.get("reply_format") or "prose").lower()
1037
+ if verbosity not in ("brief", "normal", "detailed"):
1038
+ verbosity = "normal"
1039
+ if rformat not in ("prose", "bullets"):
1040
+ rformat = "prose"
1041
+ lines: list[str] = []
1042
+ if verbosity == "brief":
1043
+ lines.append(
1044
+ "Keep replies concise (about a short paragraph or less) unless the user explicitly asks for depth."
1045
+ )
1046
+ elif verbosity == "detailed":
1047
+ lines.append("Prefer fuller, well-structured explanations when they help the user.")
1048
+ if rformat == "bullets":
1049
+ lines.append("When listing multiple points, use markdown bullet or numbered lists.")
1050
+ audience = str(session.get("audience") or "normal").lower()
1051
+ if audience not in ("simple", "normal", "technical"):
1052
+ audience = "normal"
1053
+ if audience == "simple":
1054
+ lines.append(
1055
+ "Assume the reader is new to the topic: define jargon when you use it, prefer plain language and small steps."
1056
+ )
1057
+ elif audience == "technical":
1058
+ lines.append(
1059
+ "Assume a technical reader: standard domain terms and shorthand are fine; prioritize precision over hand-holding."
1060
+ )
1061
+ lead = str(session.get("answer_lead") or "normal").lower()
1062
+ if lead not in ("tldr_first", "direct", "normal"):
1063
+ lead = "normal"
1064
+ if lead == "tldr_first":
1065
+ lines.append(
1066
+ "Start substantive answers with one short **TL;DR:** line (one sentence), then elaborate."
1067
+ )
1068
+ elif lead == "direct":
1069
+ lines.append(
1070
+ "Do not add a standalone TL;DR/summary prelude; answer immediately in-flow (still use lists if configured)."
1071
+ )
1072
+ steps = str(session.get("step_style") or "normal").lower()
1073
+ if steps not in ("numbered", "continuous", "normal"):
1074
+ steps = "normal"
1075
+ if steps == "numbered":
1076
+ lines.append(
1077
+ "When explaining procedures or multi-part how-tos, structure the answer with clear **numbered steps** "
1078
+ "(1. 2. 3.) and one action per step when practical."
1079
+ )
1080
+ elif steps == "continuous":
1081
+ lines.append(
1082
+ "Avoid numbered step lists; explain procedures as **connected paragraphs** unless the user explicitly "
1083
+ "asks for steps."
1084
+ )
1085
+ conf = str(session.get("confidence_tone") or "normal").lower()
1086
+ if conf not in ("transparent", "assertive", "normal"):
1087
+ conf = "normal"
1088
+ if conf == "transparent":
1089
+ lines.append(
1090
+ "Be explicit about uncertainty: say when you are guessing, label key assumptions, and avoid overstating "
1091
+ "facts you cannot support from the prompt or supplied excerpts."
1092
+ )
1093
+ elif conf == "assertive":
1094
+ lines.append(
1095
+ "Answer in a direct, confident tone: minimize throat-clearing and hedging unless a short disclaimer is "
1096
+ "truly necessary for safety or policy."
1097
+ )
1098
+ fu = str(session.get("followup_close") or "normal").lower()
1099
+ if fu not in ("suggest", "minimal", "normal"):
1100
+ fu = "normal"
1101
+ if fu == "suggest":
1102
+ lines.append(
1103
+ "When helpful, end with concise **optional next steps** or a short **follow-up invitation** "
1104
+ '(e.g., one line like "Want me to drill into X?" — optional, not repetitive).'
1105
+ )
1106
+ elif fu == "minimal":
1107
+ lines.append(
1108
+ "Avoid stock closers such as prompting whether the user needs anything else unless they explicitly invite it; "
1109
+ "finish crisply after the core answer."
1110
+ )
1111
+ expo = str(session.get("exposition_order") or "normal").lower()
1112
+ if expo not in ("definitions_first", "intuition_first", "normal"):
1113
+ expo = "normal"
1114
+ if expo == "definitions_first":
1115
+ lines.append(
1116
+ "Prefer stating **definitions and key terms upfront**, then intuition, analogies, and examples."
1117
+ )
1118
+ elif expo == "intuition_first":
1119
+ lines.append(
1120
+ "Prefer a short **motivation / big-picture intuition** section first, then formal definitions and details."
1121
+ )
1122
+ ex_density = str(session.get("example_density") or "normal").lower()
1123
+ if ex_density not in ("rich", "sparse", "normal"):
1124
+ ex_density = "normal"
1125
+ if ex_density == "rich":
1126
+ lines.append(
1127
+ "When it clarifies the answer, include at least one **short concrete example** or miniature scenario."
1128
+ )
1129
+ elif ex_density == "sparse":
1130
+ lines.append(
1131
+ "Unless the user explicitly requests an example, keep answers **example-free** (no illustrative stories)."
1132
+ )
1133
+ comp = str(session.get("comparison_frame") or "normal").lower()
1134
+ if comp not in ("pros_cons", "narrative", "normal"):
1135
+ comp = "normal"
1136
+ if comp == "pros_cons":
1137
+ lines.append(
1138
+ "For trade-offs or comparing options, use markdown subheadings **Pros** and **Cons** (short bullets under each)."
1139
+ )
1140
+ elif comp == "narrative":
1141
+ lines.append(
1142
+ "For trade-offs or comparing options, weave pros/cons into **continuous prose** rather than labeled sections."
1143
+ )
1144
+ reg = str(session.get("register_tone") or "normal").lower()
1145
+ if reg not in ("formal", "casual", "normal"):
1146
+ reg = "normal"
1147
+ if reg == "formal":
1148
+ lines.append(
1149
+ "Use a **polished professional register**: clear sentences, minimal slang/emoji unless the topic demands it."
1150
+ )
1151
+ elif reg == "casual":
1152
+ lines.append(
1153
+ "**Conversational register** is preferred: contractions and light phrasing are fine; sound like a helpful teammate."
1154
+ )
1155
+ cb = str(session.get("code_block_style") or "normal").lower()
1156
+ if cb not in ("fenced", "inline", "normal"):
1157
+ cb = "normal"
1158
+ if cb == "fenced":
1159
+ lines.append(
1160
+ "For multi-line commands or code, use **markdown fenced code blocks** with a language hint when recognizable."
1161
+ )
1162
+ elif cb == "inline":
1163
+ lines.append(
1164
+ "Prefer **inline backticks** for short snippets; **avoid triple-backtick fences** unless the user pastes a block."
1165
+ )
1166
+ an = str(session.get("analogy_use") or "normal").lower()
1167
+ if an not in ("prefer", "avoid", "normal"):
1168
+ an = "normal"
1169
+ if an == "prefer":
1170
+ lines.append(
1171
+ "When stuck on an abstract concept, optionally add **one tight analogy/metaphor** (label it plainly; keep it respectful)."
1172
+ )
1173
+ elif an == "avoid":
1174
+ lines.append(
1175
+ "Keep explanations **literal and direct**: do **not** use analogies, metaphors, or cute comparisons."
1176
+ )
1177
+ acr = str(session.get("acronym_style") or "normal").lower()
1178
+ if acr not in ("spell_out", "terse", "normal"):
1179
+ acr = "normal"
1180
+ if acr == "spell_out":
1181
+ lines.append(
1182
+ 'On **first substantive mention** of a non-obvious acronym/title-case initialism (e.g. API, SLA), '
1183
+ 'write the **expanded form once** (`Long Form (ACRONYM)`), then use the acronym afterwards.'
1184
+ )
1185
+ elif acr == "terse":
1186
+ lines.append(
1187
+ "Assume the reader is acronym-literate: **reuse acronyms** as written without mandatory expansion."
1188
+ )
1189
+
1190
+ clarify = str(session.get("clarify_first") or "normal").lower()
1191
+ if clarify not in ("on", "off", "normal"):
1192
+ clarify = "normal"
1193
+ if clarify == "on":
1194
+ lines.append(
1195
+ "If the request is underspecified, ask **1–3 short clarifying questions first** (only the minimum needed), "
1196
+ "then wait for the user's answers before giving a full solution."
1197
+ )
1198
+ elif clarify == "off":
1199
+ lines.append(
1200
+ "Do not pause to ask clarifying questions first; provide the best answer immediately and note assumptions briefly."
1201
+ )
1202
+
1203
+ spec = str(session.get("speculation") or "normal").lower()
1204
+ if spec not in ("strict", "creative", "normal"):
1205
+ spec = "normal"
1206
+ if spec == "strict":
1207
+ lines.append(
1208
+ "Avoid speculation: prefer high-confidence statements, and say when something is unknown or not supported by the prompt."
1209
+ )
1210
+ elif spec == "creative":
1211
+ lines.append(
1212
+ "Brainstorming is allowed: you may propose speculative ideas, but label assumptions and uncertainty clearly."
1213
+ )
1214
+
1215
+ md = str(session.get("math_detail") or "normal").lower()
1216
+ if md not in ("show_work", "final_only", "normal"):
1217
+ md = "normal"
1218
+ if md == "show_work":
1219
+ lines.append(
1220
+ "When the user asks for math/derivations, show concise intermediate steps and explain symbols briefly."
1221
+ )
1222
+ elif md == "final_only":
1223
+ lines.append(
1224
+ "When the user asks for math/derivations, give the final result directly (no intermediate derivation)."
1225
+ )
1226
+
1227
+ of = str(session.get("output_format") or "normal").lower()
1228
+ if of not in ("json", "plain", "normal"):
1229
+ of = "normal"
1230
+ if of == "json":
1231
+ lines.append(
1232
+ "When appropriate, format the answer as a single JSON object with stable keys; avoid extra prose outside the JSON."
1233
+ )
1234
+ elif of == "plain":
1235
+ lines.append("Do not force JSON or rigid schemas; answer in normal plain text.")
1236
+
1237
+ rp = str(session.get("risk_posture") or "normal").lower()
1238
+ if rp not in ("conservative", "pragmatic", "normal"):
1239
+ rp = "normal"
1240
+ if rp == "conservative":
1241
+ lines.append(
1242
+ "Prefer safer, low-risk recommendations; call out risks and choose options that minimize downside."
1243
+ )
1244
+ elif rp == "pragmatic":
1245
+ lines.append(
1246
+ "Prefer practical, time-efficient recommendations; avoid over-engineering unless clearly needed."
1247
+ )
1248
+
1249
+ actz = str(session.get("actionability") or "normal").lower()
1250
+ if actz not in ("commands", "conceptual", "normal"):
1251
+ actz = "normal"
1252
+ if actz == "commands":
1253
+ lines.append(
1254
+ "When proposing a solution, include runnable commands/snippets/checklists where appropriate."
1255
+ )
1256
+ elif actz == "conceptual":
1257
+ lines.append(
1258
+ "Avoid command dumps; focus on concepts, rationale, and decision points."
1259
+ )
1260
+
1261
+ qs = str(session.get("quote_style") or "normal").lower()
1262
+ if qs not in ("quote", "paraphrase", "normal"):
1263
+ qs = "normal"
1264
+ if qs == "quote":
1265
+ lines.append(
1266
+ "When you rely on an injected **[FAQ excerpt N]**, include a short verbatim quote (a sentence or clause) "
1267
+ "before paraphrasing."
1268
+ )
1269
+ elif qs == "paraphrase":
1270
+ lines.append(
1271
+ "Prefer paraphrasing FAQ excerpts; avoid quoting unless the user asks for exact wording."
1272
+ )
1273
+
1274
+ ts = str(session.get("table_style") or "normal").lower()
1275
+ if ts not in ("prefer", "avoid", "normal"):
1276
+ ts = "normal"
1277
+ if ts == "prefer":
1278
+ lines.append(
1279
+ "When comparing several options, prefer a **markdown table** if it makes the structure clearer."
1280
+ )
1281
+ elif ts == "avoid":
1282
+ lines.append(
1283
+ "Avoid markdown tables; use bullets or short sections instead."
1284
+ )
1285
+
1286
+ es = str(session.get("emoji_style") or "normal").lower()
1287
+ if es not in ("include", "avoid", "normal"):
1288
+ es = "normal"
1289
+ if es == "include":
1290
+ lines.append(
1291
+ "You may use a few tasteful emoji in replies when they help readability (keep it sparse and professional)."
1292
+ )
1293
+ elif es == "avoid":
1294
+ lines.append("Do not use emoji in replies unless the user explicitly uses emoji first.")
1295
+
1296
+ sh = str(session.get("section_headings") or "normal").lower()
1297
+ if sh not in ("prefer", "avoid", "normal"):
1298
+ sh = "normal"
1299
+ if sh == "prefer":
1300
+ lines.append(
1301
+ "For multi-part answers, organize with short **markdown headings** (## / ###) before each major block."
1302
+ )
1303
+ elif sh == "avoid":
1304
+ lines.append(
1305
+ "Avoid leading lines that look like markdown headings (no `#` / `##` title lines); use bold inline labels or paragraphs instead."
1306
+ )
1307
+
1308
+ te = str(session.get("term_emphasis") or "normal").lower()
1309
+ if te not in ("highlight", "minimal", "normal"):
1310
+ te = "normal"
1311
+ if te == "highlight":
1312
+ lines.append(
1313
+ "Use **bold** on a handful of key terms or short phrases (not whole sentences) to help the reader scan."
1314
+ )
1315
+ elif te == "minimal":
1316
+ lines.append(
1317
+ "Keep inline **bold** rare; prefer plain text unless emphasis is truly needed for clarity."
1318
+ )
1319
+
1320
+ cp = str(session.get("counterpoint_tone") or "normal").lower()
1321
+ if cp not in ("challenge", "supportive", "normal"):
1322
+ cp = "normal"
1323
+ if cp == "challenge":
1324
+ lines.append(
1325
+ "Briefly stress-test the user's plan: note plausible failure modes, missing constraints, or stronger "
1326
+ "alternatives—stay respectful and specific."
1327
+ )
1328
+ elif cp == "supportive":
1329
+ lines.append(
1330
+ "Lean supportive: acknowledge effort, frame improvements as next steps, and avoid needless harsh critique."
1331
+ )
1332
+
1333
+ g = str(session.get("faq_grounding") or "normal").lower()
1334
+ if g not in ("strict", "normal", "relaxed"):
1335
+ g = "normal"
1336
+ if g == "strict":
1337
+ lines.append(
1338
+ "FAQ grounding (strict): Treat product/process/policy claims as supported only when clearly stated in "
1339
+ "the FAQ excerpts provided in this turn. If not stated there, say you are unsure or that it is outside "
1340
+ "the provided FAQ. When you rely on an excerpt, cite it as **[FAQ excerpt N]** matching the numbered "
1341
+ "excerpt headings you were given."
1342
+ )
1343
+ elif g == "relaxed":
1344
+ lines.append(
1345
+ "FAQ grounding (relaxed): Prefer the supplied FAQ excerpts for product/support specifics, but you may add "
1346
+ "brief general-knowledge context if you clearly separate it from anything implied by FAQ text."
1347
+ )
1348
+ # "normal": default product behavior --- rely on FAQ block wording without duplicating instructions.
1349
+ if lines:
1350
+ extras.append(
1351
+ "Preferred reply style for this chat session:\n" + "\n".join(f"- {ln}" for ln in lines)
1352
+ )
1353
+
1354
+
1355
  def handle_slash(
1356
  msg: str,
1357
  *,
 
1407
  out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
1408
  return "\n".join(out)
1409
 
1410
+ if cmd == "/similarity":
1411
+ if not encoder:
1412
+ return "Encoder off. Drop `--lm-only` / `--no-encoder`."
1413
+ if "|||" not in rest:
1414
+ return "Usage: `/similarity text A ||| text B`"
1415
+ try:
1416
+ ta, tb = _parse_two_segments(rest)
1417
+ except ValueError as e:
1418
+ return str(e)
1419
+ score = encoder.similarity(ta, tb)
1420
+ return (
1421
+ f"**Similarity:** {score:.4f}\n\n**A:** {_clip(ta, 480)}\n\n**B:** {_clip(tb, 480)}"
1422
+ )
1423
+
1424
+ if cmd in ("/embedding", "/embed"):
1425
+ if not encoder:
1426
+ return "Encoder off. Drop `--lm-only` / `--no-encoder`."
1427
+ if not rest:
1428
+ return f"Usage: `{cmd} <text>`"
1429
+ return _embedding_summary_markdown(encoder, rest)
1430
+
1431
+ if cmd == "/nearest":
1432
+ if not encoder:
1433
+ return "Encoder off. Drop `--lm-only` / `--no-encoder`."
1434
+ if "|||" not in rest:
1435
+ return "Usage: `/nearest query ||| cand1 ||| cand2 ...`"
1436
+ try:
1437
+ qn, cands = _parse_nearest_blob(rest)
1438
+ except ValueError as e:
1439
+ return str(e)
1440
+ k = max(1, min(rag_top_k, len(cands)))
1441
+ return _nearest_markdown(encoder, qn, cands, top_k=k)
1442
+
1443
  if cmd in ("/summarize", "/reformulate", "/grounded"):
1444
  if lm is None:
1445
  return "Generative model not loaded."
 
1663
  print(f"Loading generative model {mid!r} on {dev!r} ...", flush=True)
1664
  lm = load_causal_lm(mid, dev)
1665
  turn_counter = {"n": 0}
1666
+ initial_ub_session = {
1667
+ "trace": not args.no_trace
1668
+ and (encoder is not None or mem_conn is not None or (rag_chunks is not None)),
1669
+ "smart_route": not args.no_smart_route,
1670
+ "rag": rag_chunks is not None,
1671
+ "scope_key": args.memory_scope,
1672
+ "verbosity": "normal",
1673
+ "reply_format": "prose",
1674
+ "faq_grounding": "normal",
1675
+ "audience": "normal",
1676
+ "answer_lead": "normal",
1677
+ "step_style": "normal",
1678
+ "confidence_tone": "normal",
1679
+ "followup_close": "normal",
1680
+ "exposition_order": "normal",
1681
+ "example_density": "normal",
1682
+ "comparison_frame": "normal",
1683
+ "register_tone": "normal",
1684
+ "code_block_style": "normal",
1685
+ "analogy_use": "normal",
1686
+ "acronym_style": "normal",
1687
+ "clarify_first": "normal",
1688
+ "speculation": "normal",
1689
+ "math_detail": "normal",
1690
+ "output_format": "normal",
1691
+ "risk_posture": "normal",
1692
+ "actionability": "normal",
1693
+ "quote_style": "normal",
1694
+ "table_style": "normal",
1695
+ "emoji_style": "normal",
1696
+ "section_headings": "normal",
1697
+ "term_emphasis": "normal",
1698
+ "counterpoint_tone": "normal",
1699
+ }
1700
 
1701
  def respond(
1702
  message: str,
1703
  history: list[dict],
1704
+ ub_session: dict[str, Any],
1705
+ ) -> tuple[str, list[dict], dict[str, Any]]:
1706
  msg = (message or "").strip()
1707
  hist = list(history or [])
1708
  if not msg:
1709
+ return "", hist, ub_session
1710
 
1711
  turn_counter["n"] += 1
1712
  seed = (args.seed + turn_counter["n"]) % (2**31)
1713
 
1714
+ cur_scope = str(ub_session.get("scope_key") or args.memory_scope)
1715
+
1716
  slash_out = handle_slash(
1717
  msg,
1718
  lm=lm,
1719
  mem_conn=mem_conn,
1720
+ scope_key=cur_scope,
1721
  encoder=encoder,
1722
  rag_chunks=rag_chunks,
1723
  rag_top_k=args.rag_top_k,
 
1731
  if slash_out is not None:
1732
  hist.append({"role": "user", "content": msg})
1733
  hist.append({"role": "assistant", "content": slash_out})
1734
+ return "", hist, ub_session
1735
+
1736
+ nl_out = handle_nl_control(
1737
+ msg,
1738
+ ub_session,
1739
+ mem_conn=mem_conn,
1740
+ scope_key=cur_scope,
1741
+ rag_chunks_base=rag_chunks,
1742
+ locked_no_smart_route=args.no_smart_route,
1743
+ )
1744
+ if nl_out is not None:
1745
+ hist.append({"role": "user", "content": msg})
1746
+ hist.append({"role": "assistant", "content": nl_out})
1747
+ return "", hist, ub_session
1748
+
1749
+ effective_rag = (
1750
+ rag_chunks if rag_chunks is not None and ub_session.get("rag") else None
1751
+ )
1752
+ use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
1753
 
1754
  chat_line = msg
1755
+ if use_smart:
1756
  try:
1757
  route = infer_route(
1758
  lm,
 
1769
  msg=msg,
1770
  lm=lm,
1771
  mem_conn=mem_conn,
1772
+ scope_key=cur_scope,
1773
  encoder=encoder,
1774
+ rag_chunks=effective_rag,
1775
  rag_top_k=args.rag_top_k,
1776
  task_max_new_tokens=args.task_max_new_tokens,
1777
  seed=(seed + 11) % (2**31),
 
1784
  foot = f"\n\n---\n*Routed intent:* `{route['intent']}`"
1785
  hist.append({"role": "user", "content": msg})
1786
  hist.append({"role": "assistant", "content": tool_reply + foot})
1787
+ return "", hist, ub_session
1788
 
1789
  chat_line = route["text"] or msg
1790
 
1791
  trace: list[str] = []
1792
  extras: list[str] = []
1793
+ _append_reply_style_hints(extras, ub_session)
1794
 
1795
  if encoder:
1796
  probs = encoder.classify([chat_line])[0]
 
1803
  )
1804
 
1805
  rag_block = ""
1806
+ if encoder and effective_rag:
1807
+ hr = hybrid_retrieve(encoder, chat_line, effective_rag, top_k=args.rag_top_k)
1808
  if hr:
1809
  trace.append(f"RAG:{len(hr)}chunk(s)")
1810
  pieces = []
 
1818
  )
1819
 
1820
  if mem_conn:
1821
+ items = list_for_scope(mem_conn, cur_scope)
1822
  if items:
1823
  trace.append(f"mem:{len(items)}item(s)")
1824
  mem_lines = []
 
1847
  do_sample=True,
1848
  )
1849
  out = reply or "(empty generation)"
1850
+ show_trace_footer = (
1851
+ (not args.no_trace)
1852
+ and bool(ub_session.get("trace"))
1853
+ and (
1854
+ encoder is not None
1855
+ or mem_conn is not None
1856
+ or effective_rag is not None
1857
+ )
1858
+ )
1859
+ if show_trace_footer and trace:
1860
  out += "\n\n---\n*Brain trace:* " + " · ".join(trace)
1861
 
1862
  hist.append({"role": "user", "content": msg})
1863
  hist.append({"role": "assistant", "content": out})
1864
+ return "", hist, ub_session
1865
 
1866
  brain_bits = []
1867
  if encoder:
 
1872
  brain_bits.append("memory")
1873
  brain_label = "+".join(brain_bits) if brain_bits else "LM only"
1874
 
1875
+ _css = """
1876
+ /* Space UX: keep the input compact and predictable. */
1877
+ #ub_input textarea { height: 120px !important; }
1878
+ """
1879
+ with gr.Blocks(title="Universal Brain (chat prototype)", css=_css) as demo:
1880
  gr.Markdown(
1881
  "### Universal Brain — chat prototype\n"
1882
  f"**Generative:** `{mid}` ({lm.device}) · **Brain layers:** {brain_label}\n\n"
1883
  "**NL routing:** the model infers what you want (summarize, FAQ search, save note, …). "
1884
  "Use **`--no-smart-route`** for plain chat-only + slash shortcuts. "
1885
  "`/help` lists slash commands.\n\n"
1886
+ "**NL session controls:** say things like "
1887
+ "**`What is my current scope?`**, **`Start a new private session`**, **`Switch to scope my-key`**, "
1888
+ "**`Be brief`**, **`More detail please`**, **`Use bullet points`**, **`Reset reply style`**, "
1889
+ "**`Strict FAQ`** / **`Relaxed FAQ`** / **`Balanced FAQ`**, "
1890
+ "**`ELI5`** / **`Expert mode`**, **`TLDR first`** / **`Answer directly`**, "
1891
+ "**`Step by step`** / **`No numbered steps`**, **`Flag your assumptions`** / **`Be decisive`**, "
1892
+ "**`Suggest next steps`** / **`No follow-up questions`**, **`Definitions first`** / **`Intuition first`**, "
1893
+ "**`Include examples`** / **`Skip examples`**, **`Use pros and cons`** / **`Compare in flowing prose`**, **`Formal tone`** / **`Casual tone`**, **`Use code fences`** / **`Inline code only`**, "
1894
+ "**`Use analogies`** / **`No analogies`**, **`Spell out acronyms`** / **`Don't expand acronyms`**, "
1895
+ "**`Clarify first`** / **`No clarifying questions`**, **`No speculation`** / **`Brainstorm freely`**, "
1896
+ "**`Show your work`** / **`Final answer only`**, **`Answer in JSON`** / **`Plain text only`**, "
1897
+ "**`Be risk averse`** / **`Be pragmatic`**, **`Give me runnable commands`** / **`No commands`**, "
1898
+ "**`Quote the FAQ excerpts`** / **`Paraphrase only`**, **`Use tables`** / **`No tables`**, "
1899
+ "**`Use emoji`** / **`No emoji`**, **`Use section headings`** / **`Flat answer`**, "
1900
+ "**`Bold key terms`** / **`Minimal bold`**, **`Challenge my assumptions`** / **`Be supportive`**, "
1901
+ "**`Export my memories`**, **`Delete all my memories for this chat`**, **`Clear my session notes`**, "
1902
+ "**`Turn off FAQ context`**, **`Turn off smart routing`**, **`Show the brain trace`** "
1903
+ "(no slash command required). See the repo `README` for more example phrases.\n\n"
1904
  "Encoder topics (Hub TinyModel1 ≈ AG News) still feed context and an optional *Brain trace* line; "
1905
  "use `/classify` or ask naturally to see the full probability table in chat."
1906
  )
1907
  chat = gr.Chatbot(type="messages", height=520, label="Conversation", allow_tags=False)
1908
+ ub_state = gr.State(initial_ub_session)
1909
  with gr.Row():
1910
  inp = gr.Textbox(
1911
+ lines=4,
1912
+ max_lines=8,
1913
  show_label=False,
1914
  placeholder="Ask in plain language, or use /help …",
1915
  scale=9,
1916
+ elem_id="ub_input",
1917
  )
1918
  go = gr.Button("Send", variant="primary", scale=1)
1919
  gr.ClearButton([chat, inp])
1920
 
1921
+ def _submit(
1922
+ m: str,
1923
+ h: list[dict],
1924
+ s: dict[str, Any],
1925
+ ) -> tuple[str, list[dict], dict[str, Any]]:
1926
+ return respond(m, h, s)
1927
+
1928
+ go.click(
1929
+ _submit,
1930
+ [inp, chat, ub_state],
1931
+ [inp, chat, ub_state],
1932
+ api_name="chat",
1933
+ api_description="Universal Brain chat endpoint (routing + optional RAG + memory + classifier context).",
1934
+ )
1935
+ inp.submit(_submit, [inp, chat, ub_state], [inp, chat, ub_state])
1936
 
1937
  demo.queue(default_concurrency_limit=2)
1938
  share = args.share
 
1944
  server_port=args.port,
1945
  share=share,
1946
  ssr_mode=False,
1947
+ show_api=True,
1948
  )
1949
  except ValueError as e:
1950
  err = str(e)