Spaces:
Sleeping
Sleeping
Deploy TinyModel1Space from GitHub Actions
Browse files- scripts/eval_report_routing.py +78 -0
- scripts/horizon2_core.py +52 -8
- scripts/nl_controls.py +652 -0
- scripts/rag_faq_smoke.py +64 -5
- scripts/universal_brain_chat.py +1121 -26
scripts/eval_report_routing.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Read the Phase 2 **`routing`** object from a classifier checkpoint's **`eval_report.json`**.
|
| 3 |
+
|
| 4 |
+
Used by Horizon 1 glue, **rag_faq_smoke**, **embeddings_smoke_test**, **routing_policy** (**`--from-checkpoint`**), **horizon1_route_then_retrieve**, and training/report CLIs so training notes and runtime gates stay aligned."""
|
| 5 |
+
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import json
|
| 9 |
+
import sys
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def load_routing_from_eval_report(model_path: str | Path) -> dict | None:
|
| 14 |
+
"""Return the top-level ``routing`` dict if ``model_path`` is a dir with a valid report."""
|
| 15 |
+
p = Path(model_path)
|
| 16 |
+
if not p.is_dir():
|
| 17 |
+
return None
|
| 18 |
+
er = p / "eval_report.json"
|
| 19 |
+
if not er.is_file():
|
| 20 |
+
return None
|
| 21 |
+
try:
|
| 22 |
+
data = json.loads(er.read_text(encoding="utf-8"))
|
| 23 |
+
except json.JSONDecodeError:
|
| 24 |
+
return None
|
| 25 |
+
r = data.get("routing")
|
| 26 |
+
return r if isinstance(r, dict) else None
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def format_checkpoint_tip_path(
|
| 30 |
+
output_dir: str | Path,
|
| 31 |
+
*,
|
| 32 |
+
cwd: Path | None = None,
|
| 33 |
+
) -> str:
|
| 34 |
+
"""Return a repo-relative checkpoint path when ``output_dir`` is under ``cwd``."""
|
| 35 |
+
p = Path(output_dir).resolve()
|
| 36 |
+
base = (cwd if cwd is not None else Path.cwd()).resolve()
|
| 37 |
+
try:
|
| 38 |
+
return p.relative_to(base).as_posix()
|
| 39 |
+
except ValueError:
|
| 40 |
+
return p.as_posix()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def format_routing_policy_from_checkpoint_command(
|
| 44 |
+
output_dir: str | Path,
|
| 45 |
+
*,
|
| 46 |
+
cwd: Path | None = None,
|
| 47 |
+
) -> str:
|
| 48 |
+
"""Full ``python scripts/routing_policy.py --from-checkpoint …`` line (no shell quoting)."""
|
| 49 |
+
tip = format_checkpoint_tip_path(output_dir, cwd=cwd)
|
| 50 |
+
return f"python scripts/routing_policy.py --from-checkpoint {tip}"
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def print_routing_policy_from_checkpoint_tip(
|
| 54 |
+
output_dir: str | Path,
|
| 55 |
+
*,
|
| 56 |
+
headline: str = "Tip: dump Phase 2 `routing` JSON (no model load):",
|
| 57 |
+
cwd: Path | None = None,
|
| 58 |
+
) -> None:
|
| 59 |
+
"""Print a copy-paste **Tip:** for ``routing_policy`` (shared by train/compare/verify scripts)."""
|
| 60 |
+
cmd = format_routing_policy_from_checkpoint_command(output_dir, cwd=cwd)
|
| 61 |
+
print(f"{headline}\n {cmd}", flush=True)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def maybe_print_routing_section(model_path: str, *, enabled: bool, prog: str) -> None:
|
| 65 |
+
"""If ``enabled``, print ``routing`` JSON or a stderr hint (``prog`` labels the caller)."""
|
| 66 |
+
if not enabled:
|
| 67 |
+
return
|
| 68 |
+
notes = load_routing_from_eval_report(model_path)
|
| 69 |
+
if notes is None:
|
| 70 |
+
print(
|
| 71 |
+
f"{prog}: no eval_report.json with top-level `routing` "
|
| 72 |
+
"(Hub id or missing artifact).",
|
| 73 |
+
file=sys.stderr,
|
| 74 |
+
)
|
| 75 |
+
return
|
| 76 |
+
print("=== eval_report.json routing (Phase 2 training notes) ===\n")
|
| 77 |
+
print(json.dumps(notes, indent=2))
|
| 78 |
+
print()
|
scripts/horizon2_core.py
CHANGED
|
@@ -173,9 +173,26 @@ def load_causal_lm(
|
|
| 173 |
model_id: str,
|
| 174 |
device: str,
|
| 175 |
) -> LoadedLM:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
import torch
|
| 177 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
d = device if device in ("cpu", "cuda", "mps") else "cpu"
|
| 180 |
tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 181 |
if tok.pad_token is None and tok.eos_token is not None:
|
|
@@ -187,15 +204,42 @@ def load_causal_lm(
|
|
| 187 |
)
|
| 188 |
else:
|
| 189 |
dt = torch.float32
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
model.eval()
|
| 200 |
model = model.to(d)
|
| 201 |
return LoadedLM(model=model, tokenizer=tok, device=d)
|
|
|
|
| 173 |
model_id: str,
|
| 174 |
device: str,
|
| 175 |
) -> LoadedLM:
|
| 176 |
+
import os
|
| 177 |
+
import sys
|
| 178 |
+
|
| 179 |
+
# Must run before `import torch` on first use (e.g. horizon2_server on Windows).
|
| 180 |
+
if sys.platform == "win32":
|
| 181 |
+
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
| 182 |
+
os.environ.setdefault("MKL_NUM_THREADS", "1")
|
| 183 |
+
os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
|
| 184 |
+
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 185 |
+
|
| 186 |
import torch
|
| 187 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 188 |
|
| 189 |
+
if sys.platform == "win32":
|
| 190 |
+
torch.set_num_threads(1)
|
| 191 |
+
try:
|
| 192 |
+
torch.set_num_interop_threads(1)
|
| 193 |
+
except RuntimeError:
|
| 194 |
+
pass
|
| 195 |
+
|
| 196 |
d = device if device in ("cpu", "cuda", "mps") else "cpu"
|
| 197 |
tok = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 198 |
if tok.pad_token is None and tok.eos_token is not None:
|
|
|
|
| 204 |
)
|
| 205 |
else:
|
| 206 |
dt = torch.float32
|
| 207 |
+
|
| 208 |
+
def _from_pretrained(extra: dict[str, Any]) -> Any:
|
| 209 |
+
# Prefer `dtype` (newer Transformers); fall back to `torch_dtype` (older).
|
| 210 |
+
try:
|
| 211 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 212 |
+
model_id, trust_remote_code=True, dtype=dt, **extra
|
| 213 |
+
)
|
| 214 |
+
except TypeError:
|
| 215 |
+
return AutoModelForCausalLM.from_pretrained(
|
| 216 |
+
model_id, trust_remote_code=True, torch_dtype=dt, **extra
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
# Retry with progressively fewer options (compat + stability on Windows CPU).
|
| 220 |
+
if d == "cpu":
|
| 221 |
+
extras: tuple[dict[str, Any], ...] = (
|
| 222 |
+
{"low_cpu_mem_usage": True, "attn_implementation": "eager"},
|
| 223 |
+
{"low_cpu_mem_usage": True},
|
| 224 |
+
{},
|
| 225 |
)
|
| 226 |
+
else:
|
| 227 |
+
extras = ({"low_cpu_mem_usage": True}, {})
|
| 228 |
+
|
| 229 |
+
model = None
|
| 230 |
+
last_err: BaseException | None = None
|
| 231 |
+
for extra in extras:
|
| 232 |
+
try:
|
| 233 |
+
model = _from_pretrained(extra)
|
| 234 |
+
break
|
| 235 |
+
except (TypeError, ValueError, OSError) as e:
|
| 236 |
+
last_err = e
|
| 237 |
+
continue
|
| 238 |
+
if model is None:
|
| 239 |
+
raise RuntimeError(
|
| 240 |
+
f"Failed to load causal LM {model_id!r}; last error: {last_err!r}"
|
| 241 |
+
) from last_err
|
| 242 |
+
|
| 243 |
model.eval()
|
| 244 |
model = model.to(d)
|
| 245 |
return LoadedLM(model=model, tokenizer=tok, device=d)
|
scripts/nl_controls.py
ADDED
|
@@ -0,0 +1,652 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Natural-language control phrases for Universal Brain chat.
|
| 2 |
+
|
| 3 |
+
This is a lightweight, deterministic pre-router for actions that should not depend on
|
| 4 |
+
LLM JSON routing (and should work without requiring users to remember slash commands).
|
| 5 |
+
|
| 6 |
+
It is intentionally conservative: it only triggers on fairly explicit phrasing.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
import re
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass(frozen=True)
|
| 16 |
+
class ControlAction:
|
| 17 |
+
name: str
|
| 18 |
+
value: str | None = None
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
_WS = re.compile(r"\s+")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _norm(s: str) -> str:
|
| 25 |
+
return _WS.sub(" ", (s or "").strip().lower())
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def parse_control_action(message: str) -> ControlAction | None:
|
| 29 |
+
"""Return a ControlAction if the message is a natural-language control request."""
|
| 30 |
+
m = _norm(message)
|
| 31 |
+
if not m:
|
| 32 |
+
return None
|
| 33 |
+
|
| 34 |
+
# "What mode is this? What session/scope am I in?"
|
| 35 |
+
if re.search(r"\b(what|show)\b.*\b(my )?(session|scope|settings|mode|status)\b", m) or re.search(
|
| 36 |
+
r"\bwhich\b.*\b(scope|session)\b", m
|
| 37 |
+
):
|
| 38 |
+
return ControlAction("show_session")
|
| 39 |
+
|
| 40 |
+
# Start a fresh private session (new scope key).
|
| 41 |
+
if re.search(r"\b(new|fresh)\b.*\b(private )?(session|scope)\b", m) or re.search(
|
| 42 |
+
r"\b(start|begin)\b.*\b(private )?(session|scope)\b", m
|
| 43 |
+
):
|
| 44 |
+
return ControlAction("new_private_session")
|
| 45 |
+
|
| 46 |
+
# Switch to a named scope in chat, e.g. "use scope abc-123" / "switch to session foo".
|
| 47 |
+
m2 = re.search(r"\b(use|switch to|set)\b.*\b(scope|session)\b\s*[:=]?\s*([a-z0-9][a-z0-9_.:-]{1,63})\b", m)
|
| 48 |
+
if m2:
|
| 49 |
+
return ControlAction("set_scope", m2.group(3))
|
| 50 |
+
|
| 51 |
+
# Memory controls (order matters: list/show before export/download)
|
| 52 |
+
if re.search(
|
| 53 |
+
r"\b(show|list)\b.*\b(my )?(data|memory|memories|notes)\b",
|
| 54 |
+
m,
|
| 55 |
+
):
|
| 56 |
+
return ControlAction("list_memories")
|
| 57 |
+
if re.search(
|
| 58 |
+
r"\b(export|download)\b.*\b(my )?(data|memory|memories|notes)\b",
|
| 59 |
+
m,
|
| 60 |
+
):
|
| 61 |
+
return ControlAction("export_memory")
|
| 62 |
+
if re.search(r"\b(clear|wipe|delete|forget)\b.*\b(session)\b.*\b(memory|memories|notes)?\b", m):
|
| 63 |
+
return ControlAction("clear_session")
|
| 64 |
+
if re.search(r"\b(forget|delete|erase|wipe)\b.*\b(all|everything)\b.*\b(memory|memories|notes|data)\b", m) or re.search(
|
| 65 |
+
r"\b(delete|erase)\b.*\b(my )?(data|account data|data for this chat)\b", m
|
| 66 |
+
):
|
| 67 |
+
return ControlAction("forget_scope")
|
| 68 |
+
|
| 69 |
+
# Session toggles (chat UX)
|
| 70 |
+
if re.search(r"\b(turn on|enable|show)\b.*\b(trace|brain trace|debug)\b", m):
|
| 71 |
+
return ControlAction("set_trace", "on")
|
| 72 |
+
if re.search(r"\b(turn off|disable|hide)\b.*\b(trace|brain trace|debug)\b", m):
|
| 73 |
+
return ControlAction("set_trace", "off")
|
| 74 |
+
|
| 75 |
+
if re.search(r"\b(turn on|enable)\b.*\b(smart routing|auto routing|router)\b", m):
|
| 76 |
+
return ControlAction("set_smart_route", "on")
|
| 77 |
+
if re.search(r"\b(turn off|disable)\b.*\b(smart routing|auto routing|router)\b", m):
|
| 78 |
+
return ControlAction("set_smart_route", "off")
|
| 79 |
+
|
| 80 |
+
if re.search(r"\b(turn on|enable)\b.*\b(faq|rag|retrieval)\b", m):
|
| 81 |
+
return ControlAction("set_rag", "on")
|
| 82 |
+
if re.search(r"\b(turn off|disable)\b.*\b(faq|rag|retrieval)\b", m):
|
| 83 |
+
return ControlAction("set_rag", "off")
|
| 84 |
+
|
| 85 |
+
# Reply style for the generative model (short lines only to avoid hijacking real questions).
|
| 86 |
+
# Require "reply"/"answer" before style|format|length so phrases like "default quote style" / "reset tables"
|
| 87 |
+
# are handled by narrower matchers below.
|
| 88 |
+
if len(m) <= 140 and (
|
| 89 |
+
re.search(r"\breset\b.*\b(reply|answer)\s+(style|format|length)\b", m)
|
| 90 |
+
or re.search(r"\b(default|normal)\b.*\b(reply|answer)\s+(style|format|length)\b", m)
|
| 91 |
+
):
|
| 92 |
+
return ControlAction("reset_reply_style")
|
| 93 |
+
|
| 94 |
+
if len(m) <= 96 and re.search(
|
| 95 |
+
r"\b(be brief|stay brief|keep it short|short answers|answer briefly|concise replies)\b",
|
| 96 |
+
m,
|
| 97 |
+
):
|
| 98 |
+
return ControlAction("set_verbosity", "brief")
|
| 99 |
+
|
| 100 |
+
if len(m) <= 120 and re.search(
|
| 101 |
+
r"\b(more detail|go deeper|in greater detail|explain thoroughly|longer answers|detailed answers)\b",
|
| 102 |
+
m,
|
| 103 |
+
):
|
| 104 |
+
return ControlAction("set_verbosity", "detailed")
|
| 105 |
+
|
| 106 |
+
if len(m) <= 100 and re.search(
|
| 107 |
+
r"\b(normal (answer )?length|default length|balanced length)\b",
|
| 108 |
+
m,
|
| 109 |
+
):
|
| 110 |
+
return ControlAction("set_verbosity", "normal")
|
| 111 |
+
|
| 112 |
+
if len(m) <= 110 and re.search(r"\b(use|prefer)\b", m) and re.search(
|
| 113 |
+
r"\b(bullet points?|numbered lists?)\b",
|
| 114 |
+
m,
|
| 115 |
+
):
|
| 116 |
+
return ControlAction("set_reply_format", "bullets")
|
| 117 |
+
|
| 118 |
+
if len(m) <= 100 and re.search(
|
| 119 |
+
r"\b(no bullets|plain paragraphs?|prose only|stop using lists)\b",
|
| 120 |
+
m,
|
| 121 |
+
):
|
| 122 |
+
return ControlAction("set_reply_format", "prose")
|
| 123 |
+
|
| 124 |
+
# FAQ / RAG grounding hints for the assistant (short control lines).
|
| 125 |
+
if len(m) <= 100 and re.search(
|
| 126 |
+
r"\b(strict faq|faq only|stick to (the )?faq|only use (the )?faq|only trust (the )?faq)\b",
|
| 127 |
+
m,
|
| 128 |
+
):
|
| 129 |
+
return ControlAction("set_faq_grounding", "strict")
|
| 130 |
+
|
| 131 |
+
if len(m) <= 115 and re.search(
|
| 132 |
+
r"\b(balanced faq|normal faq|default faq(\s+grounding)?|default faq mode)\b",
|
| 133 |
+
m,
|
| 134 |
+
):
|
| 135 |
+
return ControlAction("set_faq_grounding", "normal")
|
| 136 |
+
|
| 137 |
+
if len(m) <= 130 and re.search(
|
| 138 |
+
r"\b(relaxed faq|faq plus general knowledge|general knowledge(\s+is)?\s+ok|mix faq and general knowledge)\b",
|
| 139 |
+
m,
|
| 140 |
+
):
|
| 141 |
+
return ControlAction("set_faq_grounding", "relaxed")
|
| 142 |
+
|
| 143 |
+
# Explanation depth (who the answer is for) — short control lines only.
|
| 144 |
+
if (
|
| 145 |
+
(len(m) <= 40 and re.match(r"^(please\s+)?explain simply[\s.!?]*$", m))
|
| 146 |
+
or re.match(r"^(please\s+)?eli5\b[\s.!?]*$", m)
|
| 147 |
+
or (len(m) <= 56 and re.search(r"\b(i'?m\s+a\s+beginner|beginner\s+here)\b", m))
|
| 148 |
+
or re.match(r"^(please\s+)?assume i'?m\s+new\b[\s.!?]*$", m)
|
| 149 |
+
or (len(m) <= 56 and re.search(r"\bi\s+need\s+(the\s+)?basics\b", m))
|
| 150 |
+
):
|
| 151 |
+
return ControlAction("set_audience", "simple")
|
| 152 |
+
|
| 153 |
+
if len(m) <= 72 and (
|
| 154 |
+
re.match(r"^(please\s+)?assume i'?m\s+technical[\s.!?]*$", m)
|
| 155 |
+
or re.match(r"^expert\s+mode[\s.!?]*$", m)
|
| 156 |
+
or re.match(r"^(please\s+)?use jargon freely[\s.!?]*$", m)
|
| 157 |
+
or re.match(r"^technical audience[\s.!?]*$", m)
|
| 158 |
+
or re.match(r"^for experts[\s.!?]*$", m)
|
| 159 |
+
):
|
| 160 |
+
return ControlAction("set_audience", "technical")
|
| 161 |
+
|
| 162 |
+
if len(m) <= 78 and (
|
| 163 |
+
re.match(r"^(please\s+)?(default explanation level|normal explanation level|general audience)[\s.!?]*$", m)
|
| 164 |
+
or re.match(r"^(please\s+)?(reset|default)\s+audience[\s.!?]*$", m)
|
| 165 |
+
):
|
| 166 |
+
return ControlAction("set_audience", "normal")
|
| 167 |
+
|
| 168 |
+
# Answer lead — whether to front-load a TL;DR line (orthogonal to verbosity).
|
| 169 |
+
if len(m) <= 88 and (
|
| 170 |
+
re.match(r"^(please\s+)?(tl;|tl)dr\s+first\b[\s.!?]*$", m)
|
| 171 |
+
or re.match(r"^(please\s+)?(lead|start)\s+with\s+(a\s+)?(short\s+)?summary\b[\s.!?]*$", m)
|
| 172 |
+
or re.match(r"^(please\s+)?summary\s+first\b[\s.!?]*$", m)
|
| 173 |
+
):
|
| 174 |
+
return ControlAction("set_answer_lead", "tldr_first")
|
| 175 |
+
|
| 176 |
+
if len(m) <= 92 and (
|
| 177 |
+
re.match(r"^(please\s+)?no\s+tl;?dr\b[\s.!?]*$", m)
|
| 178 |
+
or re.match(r"^(please\s+)?skip (the\s+)?summary\b[\s.!?]*$", m)
|
| 179 |
+
or re.match(r"^(please\s+)?answer directly\b[\s.!?]*$", m)
|
| 180 |
+
or re.match(r"^(please\s+)?direct answer\s+only\b[\s.!?]*$", m)
|
| 181 |
+
or re.match(r"^(please\s+)?without\s+a\s+tldr\b[\s.!?]*$", m)
|
| 182 |
+
):
|
| 183 |
+
return ControlAction("set_answer_lead", "direct")
|
| 184 |
+
|
| 185 |
+
if len(m) <= 64 and (
|
| 186 |
+
re.match(r"^(please\s+)?(default answer structure|normal answer opening|usual\s+opening)[\s.!?]*$", m)
|
| 187 |
+
or re.match(r"^(please\s+)?reset\s+(answer\s+)?opening[\s.!?]*$", m)
|
| 188 |
+
):
|
| 189 |
+
return ControlAction("set_answer_lead", "normal")
|
| 190 |
+
|
| 191 |
+
# Procedures: numbered steps vs continuous prose (orthogonal to bullets).
|
| 192 |
+
if len(m) <= 88 and (
|
| 193 |
+
re.match(r"^(please\s+)?(step by step|step-by-step)[\s.!?]*$", m)
|
| 194 |
+
or re.match(r"^(please\s+)?use numbered steps[\s.!?]*$", m)
|
| 195 |
+
or re.match(r"^(please\s+)?numbered steps\b[\s.!?]*$", m)
|
| 196 |
+
or re.match(r"^(please\s+)?walk me through( the)? steps\b[\s.!?]*$", m)
|
| 197 |
+
or re.match(r"^(please\s+)?break it into steps[\s.!?]*$", m)
|
| 198 |
+
):
|
| 199 |
+
return ControlAction("set_step_style", "numbered")
|
| 200 |
+
|
| 201 |
+
if len(m) <= 92 and (
|
| 202 |
+
re.match(r"^(please\s+)?(no numbered steps|don'?t number steps|skip step numbers)[\s.!?]*$", m)
|
| 203 |
+
or re.match(r"^(please\s+)?(continuous prose|prose without steps)[\s.!?]*$", m)
|
| 204 |
+
):
|
| 205 |
+
return ControlAction("set_step_style", "continuous")
|
| 206 |
+
|
| 207 |
+
if len(m) <= 64 and re.match(r"^(please\s+)?(default step style|normal steps|reset steps)[\s.!?]*$", m):
|
| 208 |
+
return ControlAction("set_step_style", "normal")
|
| 209 |
+
|
| 210 |
+
# How hard to hedge / flag limits (orthogonal to FAQ strictness).
|
| 211 |
+
if len(m) <= 94 and (
|
| 212 |
+
re.match(r"^(please\s+)?flag your assumptions[\s.!?]*$", m)
|
| 213 |
+
or re.match(r"^(please\s+)?be explicit about uncertainty[\s.!?]*$", m)
|
| 214 |
+
or re.match(r"^(please\s+)?say if you don'?t know[\s.!?]*$", m)
|
| 215 |
+
or re.match(r"^(please\s+)?tell me when you(?:'?re|\s+are)\s+unsure[\s.!?]*$", m)
|
| 216 |
+
or re.match(r"^(please\s+)?say when you(?:'?re|\s+are)\s+unsure[\s.!?]*$", m)
|
| 217 |
+
):
|
| 218 |
+
return ControlAction("set_confidence_tone", "transparent")
|
| 219 |
+
|
| 220 |
+
if len(m) <= 72 and (
|
| 221 |
+
re.match(r"^(please\s+)?be decisive[\s.!?]*$", m)
|
| 222 |
+
or re.match(r"^(please\s+)?don'?t hedge[\s.!?]*$", m)
|
| 223 |
+
or re.match(r"^(please\s+)?give firm answers[\s.!?]*$", m)
|
| 224 |
+
):
|
| 225 |
+
return ControlAction("set_confidence_tone", "assertive")
|
| 226 |
+
|
| 227 |
+
if len(m) <= 80 and re.match(
|
| 228 |
+
r"^(please\s+)?(default confidence tone|normal confidence|reset uncertainty)[\s.!?]*$",
|
| 229 |
+
m,
|
| 230 |
+
):
|
| 231 |
+
return ControlAction("set_confidence_tone", "normal")
|
| 232 |
+
|
| 233 |
+
# Whether to offer follow-ups / next steps at the end of answers.
|
| 234 |
+
if len(m) <= 96 and (
|
| 235 |
+
re.match(r"^(please\s+)?suggest next steps[\s.!?]*$", m)
|
| 236 |
+
or re.match(r"^(please\s+)?offer follow[- ]up questions[\s.!?]*$", m)
|
| 237 |
+
or re.match(r"^(please\s+)?end with (optional )?next steps[\s.!?]*$", m)
|
| 238 |
+
):
|
| 239 |
+
return ControlAction("set_followup_close", "suggest")
|
| 240 |
+
|
| 241 |
+
if len(m) <= 100 and (
|
| 242 |
+
re.match(r"^(please\s+)?no follow[- ]up questions[\s.!?]*$", m)
|
| 243 |
+
or re.match(r"^(please\s+)?don'?t ask follow[- ]up questions[\s.!?]*$", m)
|
| 244 |
+
or re.match(r"^(please\s+)?no questions at the end[\s.!?]*$", m)
|
| 245 |
+
):
|
| 246 |
+
return ControlAction("set_followup_close", "minimal")
|
| 247 |
+
|
| 248 |
+
if len(m) <= 78 and (
|
| 249 |
+
re.match(r"^(please\s+)?(default follow[- ]ups?|reset follow[- ]ups?|normal follow[- ]ups?)[\s.!?]*$", m)
|
| 250 |
+
):
|
| 251 |
+
return ControlAction("set_followup_close", "normal")
|
| 252 |
+
|
| 253 |
+
# Teach order: define terms vs motivate first (orthogonal to TL;DR / steps).
|
| 254 |
+
if len(m) <= 80 and (
|
| 255 |
+
re.match(r"^(please\s+)?definitions first[\s.!?]*$", m)
|
| 256 |
+
or re.match(r"^(please\s+)?start with definitions[\s.!?]*$", m)
|
| 257 |
+
or re.match(r"^(please\s+)?define terms first[\s.!?]*$", m)
|
| 258 |
+
):
|
| 259 |
+
return ControlAction("set_exposition_order", "definitions_first")
|
| 260 |
+
|
| 261 |
+
if len(m) <= 96 and (
|
| 262 |
+
re.match(r"^(please\s+)?intuition first[\s.!?]*$", m)
|
| 263 |
+
or re.match(r"^(please\s+)?big picture first[\s.!?]*$", m)
|
| 264 |
+
or re.match(r"^(please\s+)?start with the big picture[\s.!?]*$", m)
|
| 265 |
+
):
|
| 266 |
+
return ControlAction("set_exposition_order", "intuition_first")
|
| 267 |
+
|
| 268 |
+
if len(m) <= 88 and re.match(
|
| 269 |
+
r"^(please\s+)?(default explanation order|reset explanation order|normal explanation order)[\s.!?]*$",
|
| 270 |
+
m,
|
| 271 |
+
):
|
| 272 |
+
return ControlAction("set_exposition_order", "normal")
|
| 273 |
+
|
| 274 |
+
# Examples vs terse explanations when comparing or teaching.
|
| 275 |
+
if len(m) <= 76 and (
|
| 276 |
+
re.match(r"^(please\s+)?include examples[\s.!?]*$", m)
|
| 277 |
+
or re.match(r"^(please\s+)?use concrete examples[\s.!?]*$", m)
|
| 278 |
+
or re.match(r"^(please\s+)?illustrate with examples[\s.!?]*$", m)
|
| 279 |
+
):
|
| 280 |
+
return ControlAction("set_example_density", "rich")
|
| 281 |
+
|
| 282 |
+
if len(m) <= 92 and (
|
| 283 |
+
re.match(r"^(please\s+)?skip examples[\s.!?]*$", m)
|
| 284 |
+
or re.match(r"^(please\s+)?don'?t add examples[\s.!?]*$", m)
|
| 285 |
+
or re.match(r"^(please\s+)?no examples unless i ask[\s.!?]*$", m)
|
| 286 |
+
):
|
| 287 |
+
return ControlAction("set_example_density", "sparse")
|
| 288 |
+
|
| 289 |
+
if len(m) <= 68 and re.match(
|
| 290 |
+
r"^(please\s+)?(default examples|normal examples|reset examples)[\s.!?]*$",
|
| 291 |
+
m,
|
| 292 |
+
):
|
| 293 |
+
return ControlAction("set_example_density", "normal")
|
| 294 |
+
|
| 295 |
+
# Compare/contrast presentation.
|
| 296 |
+
if len(m) <= 96 and (
|
| 297 |
+
re.match(r"^(please\s+)?use pros and cons[\s.!?]*$", m)
|
| 298 |
+
or re.match(r"^(please\s+)?pros and cons sections[\s.!?]*$", m)
|
| 299 |
+
or re.match(r"^(please\s+)?compare with pros and cons[\s.!?]*$", m)
|
| 300 |
+
):
|
| 301 |
+
return ControlAction("set_comparison_frame", "pros_cons")
|
| 302 |
+
|
| 303 |
+
if len(m) <= 100 and (
|
| 304 |
+
re.match(r"^(please\s+)?compare in flowing prose[\s.!?]*$", m)
|
| 305 |
+
or re.match(r"^(please\s+)?prose comparison only[\s.!?]*$", m)
|
| 306 |
+
or re.match(r"^(please\s+)?no pros and cons sections[\s.!?]*$", m)
|
| 307 |
+
):
|
| 308 |
+
return ControlAction("set_comparison_frame", "narrative")
|
| 309 |
+
|
| 310 |
+
if len(m) <= 82 and re.match(
|
| 311 |
+
r"^(please\s+)?(default comparison style|normal comparison|reset comparison)[\s.!?]*$",
|
| 312 |
+
m,
|
| 313 |
+
):
|
| 314 |
+
return ControlAction("set_comparison_frame", "normal")
|
| 315 |
+
|
| 316 |
+
# Professional vs conversational wording (orthogonal to verbosity).
|
| 317 |
+
if len(m) <= 92 and (
|
| 318 |
+
re.match(r"^(please\s+)?formal tone[\s.!?]*$", m)
|
| 319 |
+
or re.match(r"^(please\s+)?professional register[\s.!?]*$", m)
|
| 320 |
+
or re.match(r"^(please\s+)?business writing style[\s.!?]*$", m)
|
| 321 |
+
):
|
| 322 |
+
return ControlAction("set_register_tone", "formal")
|
| 323 |
+
|
| 324 |
+
if len(m) <= 96 and (
|
| 325 |
+
re.match(r"^(please\s+)?casual tone[\s.!?]*$", m)
|
| 326 |
+
or re.match(r"^(please\s+)?friendly casual style[\s.!?]*$", m)
|
| 327 |
+
or re.match(r"^(please\s+)?speak casually[\s.!?]*$", m)
|
| 328 |
+
):
|
| 329 |
+
return ControlAction("set_register_tone", "casual")
|
| 330 |
+
|
| 331 |
+
if len(m) <= 76 and re.match(
|
| 332 |
+
r"^(please\s+)?(default tone|neutral tone|reset tone)[\s.!?]*$",
|
| 333 |
+
m,
|
| 334 |
+
):
|
| 335 |
+
return ControlAction("set_register_tone", "normal")
|
| 336 |
+
|
| 337 |
+
# Markdown code snippet layout.
|
| 338 |
+
if len(m) <= 100 and (
|
| 339 |
+
re.match(r"^(please\s+)?use code fences[\s.!?]*$", m)
|
| 340 |
+
or re.match(r"^(please\s+)?fenced code blocks[\s.!?]*$", m)
|
| 341 |
+
or re.match(r"^(please\s+)?markdown code fences[\s.!?]*$", m)
|
| 342 |
+
):
|
| 343 |
+
return ControlAction("set_code_block_style", "fenced")
|
| 344 |
+
|
| 345 |
+
if len(m) <= 104 and (
|
| 346 |
+
re.match(r"^(please\s+)?inline code only[\s.!?]*$", m)
|
| 347 |
+
or re.match(r"^(please\s+)?no triple backticks[\s.!?]*$", m)
|
| 348 |
+
or re.match(r"^(please\s+)?no fenced code blocks[\s.!?]*$", m)
|
| 349 |
+
):
|
| 350 |
+
return ControlAction("set_code_block_style", "inline")
|
| 351 |
+
|
| 352 |
+
if len(m) <= 96 and re.match(
|
| 353 |
+
r"^(please\s+)?(default code formatting|reset code style|normal code blocks)[\s.!?]*$",
|
| 354 |
+
m,
|
| 355 |
+
):
|
| 356 |
+
return ControlAction("set_code_block_style", "normal")
|
| 357 |
+
|
| 358 |
+
# Analogies / metaphors vs literal explanations only.
|
| 359 |
+
if len(m) <= 92 and (
|
| 360 |
+
re.match(r"^(please\s+)?use analogies[\s.!?]*$", m)
|
| 361 |
+
or re.match(r"^(please\s+)?analogies when helpful[\s.!?]*$", m)
|
| 362 |
+
or re.match(r"^(please\s+)?metaphors are ok[\s.!?]*$", m)
|
| 363 |
+
):
|
| 364 |
+
return ControlAction("set_analogy_use", "prefer")
|
| 365 |
+
|
| 366 |
+
if len(m) <= 100 and (
|
| 367 |
+
re.match(r"^(please\s+)?no analogies[\s.!?]*$", m)
|
| 368 |
+
or re.match(r"^(please\s+)?skip metaphors[\s.!?]*$", m)
|
| 369 |
+
or re.match(r"^(please\s+)?literal explanations only[\s.!?]*$", m)
|
| 370 |
+
):
|
| 371 |
+
return ControlAction("set_analogy_use", "avoid")
|
| 372 |
+
|
| 373 |
+
if len(m) <= 82 and re.match(
|
| 374 |
+
r"^(please\s+)?(default analogy style|reset analogies|normal analogies)[\s.!?]*$",
|
| 375 |
+
m,
|
| 376 |
+
):
|
| 377 |
+
return ControlAction("set_analogy_use", "normal")
|
| 378 |
+
|
| 379 |
+
# Expand vs terse acronym handling on first introduce.
|
| 380 |
+
if len(m) <= 112 and (
|
| 381 |
+
re.match(r"^(please\s+)?spell out acronyms[\s.!?]*$", m)
|
| 382 |
+
or re.match(r"^(please\s+)?expand acronyms on first use[\s.!?]*$", m)
|
| 383 |
+
or re.match(r"^(please\s+)?define acronyms when you use them[\s.!?]*$", m)
|
| 384 |
+
):
|
| 385 |
+
return ControlAction("set_acronym_style", "spell_out")
|
| 386 |
+
|
| 387 |
+
if len(m) <= 112 and (
|
| 388 |
+
re.match(r"^(please\s+)?assume i know acronyms[\s.!?]*$", m)
|
| 389 |
+
or re.match(r"^(please\s+)?don'?t expand acronyms[\s.!?]*$", m)
|
| 390 |
+
or re.match(r"^(please\s+)?keep acronyms as is[\s.!?]*$", m)
|
| 391 |
+
):
|
| 392 |
+
return ControlAction("set_acronym_style", "terse")
|
| 393 |
+
|
| 394 |
+
if len(m) <= 92 and re.match(
|
| 395 |
+
r"^(please\s+)?(default acronym style|reset acronyms|normal acronyms)[\s.!?]*$",
|
| 396 |
+
m,
|
| 397 |
+
):
|
| 398 |
+
return ControlAction("set_acronym_style", "normal")
|
| 399 |
+
|
| 400 |
+
# Clarify-first: ask brief questions before answering if key info is missing.
|
| 401 |
+
if len(m) <= 110 and (
|
| 402 |
+
re.match(r"^(please\s+)?ask clarifying questions first[\s.!?]*$", m)
|
| 403 |
+
or re.match(r"^(please\s+)?clarify first[\s.!?]*$", m)
|
| 404 |
+
or re.match(r"^(please\s+)?ask me questions before answering[\s.!?]*$", m)
|
| 405 |
+
):
|
| 406 |
+
return ControlAction("set_clarify_first", "on")
|
| 407 |
+
|
| 408 |
+
if len(m) <= 110 and (
|
| 409 |
+
re.match(r"^(please\s+)?no clarifying questions[\s.!?]*$", m)
|
| 410 |
+
or re.match(r"^(please\s+)?just answer without questions[\s.!?]*$", m)
|
| 411 |
+
or re.match(r"^(please\s+)?answer without asking questions[\s.!?]*$", m)
|
| 412 |
+
):
|
| 413 |
+
return ControlAction("set_clarify_first", "off")
|
| 414 |
+
|
| 415 |
+
if len(m) <= 96 and re.match(
|
| 416 |
+
r"^(please\s+)?(default clarify mode|reset clarify mode|normal clarify mode)[\s.!?]*$",
|
| 417 |
+
m,
|
| 418 |
+
):
|
| 419 |
+
return ControlAction("set_clarify_first", "normal")
|
| 420 |
+
|
| 421 |
+
# Speculation level: strict factual vs brainstorming.
|
| 422 |
+
if len(m) <= 110 and (
|
| 423 |
+
re.match(r"^(please\s+)?no speculation[\s.!?]*$", m)
|
| 424 |
+
or re.match(r"^(please\s+)?stick to high confidence only[\s.!?]*$", m)
|
| 425 |
+
or re.match(r"^(please\s+)?avoid guessing[\s.!?]*$", m)
|
| 426 |
+
):
|
| 427 |
+
return ControlAction("set_speculation", "strict")
|
| 428 |
+
|
| 429 |
+
if len(m) <= 110 and (
|
| 430 |
+
re.match(r"^(please\s+)?brainstorm freely[\s.!?]*$", m)
|
| 431 |
+
or re.match(r"^(please\s+)?speculate freely[\s.!?]*$", m)
|
| 432 |
+
or re.match(r"^(please\s+)?wild ideas ok[\s.!?]*$", m)
|
| 433 |
+
):
|
| 434 |
+
return ControlAction("set_speculation", "creative")
|
| 435 |
+
|
| 436 |
+
if len(m) <= 100 and re.match(
|
| 437 |
+
r"^(please\s+)?(default speculation|normal speculation|reset speculation)[\s.!?]*$",
|
| 438 |
+
m,
|
| 439 |
+
):
|
| 440 |
+
return ControlAction("set_speculation", "normal")
|
| 441 |
+
|
| 442 |
+
# Math/explanations: show work vs final-only.
|
| 443 |
+
if len(m) <= 110 and (
|
| 444 |
+
re.match(r"^(please\s+)?show your work[\s.!?]*$", m)
|
| 445 |
+
or re.match(r"^(please\s+)?show the derivation[\s.!?]*$", m)
|
| 446 |
+
or re.match(r"^(please\s+)?include steps in math[\s.!?]*$", m)
|
| 447 |
+
):
|
| 448 |
+
return ControlAction("set_math_detail", "show_work")
|
| 449 |
+
|
| 450 |
+
if len(m) <= 110 and (
|
| 451 |
+
re.match(r"^(please\s+)?final answer only[\s.!?]*$", m)
|
| 452 |
+
or re.match(r"^(please\s+)?no derivation[\s.!?]*$", m)
|
| 453 |
+
or re.match(r"^(please\s+)?skip the steps[\s.!?]*$", m)
|
| 454 |
+
):
|
| 455 |
+
return ControlAction("set_math_detail", "final_only")
|
| 456 |
+
|
| 457 |
+
if len(m) <= 110 and re.match(
|
| 458 |
+
r"^(please\s+)?(default math detail|normal math detail|reset math detail)[\s.!?]*$",
|
| 459 |
+
m,
|
| 460 |
+
):
|
| 461 |
+
return ControlAction("set_math_detail", "normal")
|
| 462 |
+
|
| 463 |
+
# Output structure: JSON-shaped vs normal prose.
|
| 464 |
+
if len(m) <= 110 and (
|
| 465 |
+
re.match(r"^(please\s+)?answer in json[\s.!?]*$", m)
|
| 466 |
+
or re.match(r"^(please\s+)?json output[\s.!?]*$", m)
|
| 467 |
+
or re.match(r"^(please\s+)?structured json[\s.!?]*$", m)
|
| 468 |
+
):
|
| 469 |
+
return ControlAction("set_output_format", "json")
|
| 470 |
+
|
| 471 |
+
if len(m) <= 110 and (
|
| 472 |
+
re.match(r"^(please\s+)?plain text only[\s.!?]*$", m)
|
| 473 |
+
or re.match(r"^(please\s+)?no json[\s.!?]*$", m)
|
| 474 |
+
or re.match(r"^(please\s+)?no structured output[\s.!?]*$", m)
|
| 475 |
+
):
|
| 476 |
+
return ControlAction("set_output_format", "plain")
|
| 477 |
+
|
| 478 |
+
if len(m) <= 110 and re.match(
|
| 479 |
+
r"^(please\s+)?(default output format|normal output format|reset output format)[\s.!?]*$",
|
| 480 |
+
m,
|
| 481 |
+
):
|
| 482 |
+
return ControlAction("set_output_format", "normal")
|
| 483 |
+
|
| 484 |
+
# Safety/risk posture for recommendations.
|
| 485 |
+
if len(m) <= 110 and (
|
| 486 |
+
re.match(r"^(please\s+)?be risk averse[\s.!?]*$", m)
|
| 487 |
+
or re.match(r"^(please\s+)?be conservative[\s.!?]*$", m)
|
| 488 |
+
or re.match(r"^(please\s+)?err on the side of safety[\s.!?]*$", m)
|
| 489 |
+
):
|
| 490 |
+
return ControlAction("set_risk_posture", "conservative")
|
| 491 |
+
|
| 492 |
+
if len(m) <= 110 and (
|
| 493 |
+
re.match(r"^(please\s+)?be pragmatic[\s.!?]*$", m)
|
| 494 |
+
or re.match(r"^(please\s+)?optimize for speed[\s.!?]*$", m)
|
| 495 |
+
or re.match(r"^(please\s+)?good enough is fine[\s.!?]*$", m)
|
| 496 |
+
):
|
| 497 |
+
return ControlAction("set_risk_posture", "pragmatic")
|
| 498 |
+
|
| 499 |
+
if len(m) <= 110 and re.match(
|
| 500 |
+
r"^(please\s+)?(default risk posture|normal risk posture|reset risk posture)[\s.!?]*$",
|
| 501 |
+
m,
|
| 502 |
+
):
|
| 503 |
+
return ControlAction("set_risk_posture", "normal")
|
| 504 |
+
|
| 505 |
+
# Actionability: runnable steps vs conceptual explanation.
|
| 506 |
+
if len(m) <= 110 and (
|
| 507 |
+
re.match(r"^(please\s+)?give me runnable commands[\s.!?]*$", m)
|
| 508 |
+
or re.match(r"^(please\s+)?include commands[\s.!?]*$", m)
|
| 509 |
+
or re.match(r"^(please\s+)?make it actionable[\s.!?]*$", m)
|
| 510 |
+
):
|
| 511 |
+
return ControlAction("set_actionability", "commands")
|
| 512 |
+
|
| 513 |
+
if len(m) <= 110 and (
|
| 514 |
+
re.match(r"^(please\s+)?no commands[\s.!?]*$", m)
|
| 515 |
+
or re.match(r"^(please\s+)?conceptual only[\s.!?]*$", m)
|
| 516 |
+
or re.match(r"^(please\s+)?high level only[\s.!?]*$", m)
|
| 517 |
+
):
|
| 518 |
+
return ControlAction("set_actionability", "conceptual")
|
| 519 |
+
|
| 520 |
+
if len(m) <= 110 and re.match(
|
| 521 |
+
r"^(please\s+)?(default actionability|normal actionability|reset actionability)[\s.!?]*$",
|
| 522 |
+
m,
|
| 523 |
+
):
|
| 524 |
+
return ControlAction("set_actionability", "normal")
|
| 525 |
+
|
| 526 |
+
# Quote/citation preference when using supplied excerpts.
|
| 527 |
+
if len(m) <= 110 and (
|
| 528 |
+
re.match(r"^(please\s+)?quote the faq excerpts[\s.!?]*$", m)
|
| 529 |
+
or re.match(r"^(please\s+)?use direct quotes[\s.!?]*$", m)
|
| 530 |
+
or re.match(r"^(please\s+)?cite with quotes[\s.!?]*$", m)
|
| 531 |
+
):
|
| 532 |
+
return ControlAction("set_quote_style", "quote")
|
| 533 |
+
|
| 534 |
+
if len(m) <= 110 and (
|
| 535 |
+
re.match(r"^(please\s+)?no quotes[\s.!?]*$", m)
|
| 536 |
+
or re.match(r"^(please\s+)?don'?t quote excerpts[\s.!?]*$", m)
|
| 537 |
+
or re.match(r"^(please\s+)?paraphrase only[\s.!?]*$", m)
|
| 538 |
+
):
|
| 539 |
+
return ControlAction("set_quote_style", "paraphrase")
|
| 540 |
+
|
| 541 |
+
if len(m) <= 110 and re.match(
|
| 542 |
+
r"^(please\s+)?(default quote style|normal quote style|reset quote style)[\s.!?]*$",
|
| 543 |
+
m,
|
| 544 |
+
):
|
| 545 |
+
return ControlAction("set_quote_style", "normal")
|
| 546 |
+
|
| 547 |
+
# Tables: prefer markdown tables vs avoid.
|
| 548 |
+
if len(m) <= 110 and (
|
| 549 |
+
re.match(r"^(please\s+)?use tables[\s.!?]*$", m)
|
| 550 |
+
or re.match(r"^(please\s+)?markdown tables[\s.!?]*$", m)
|
| 551 |
+
or re.match(r"^(please\s+)?tabular format[\s.!?]*$", m)
|
| 552 |
+
):
|
| 553 |
+
return ControlAction("set_table_style", "prefer")
|
| 554 |
+
|
| 555 |
+
if len(m) <= 110 and (
|
| 556 |
+
re.match(r"^(please\s+)?no tables[\s.!?]*$", m)
|
| 557 |
+
or re.match(r"^(please\s+)?avoid tables[\s.!?]*$", m)
|
| 558 |
+
or re.match(r"^(please\s+)?no markdown tables[\s.!?]*$", m)
|
| 559 |
+
):
|
| 560 |
+
return ControlAction("set_table_style", "avoid")
|
| 561 |
+
|
| 562 |
+
if len(m) <= 110 and re.match(
|
| 563 |
+
r"^(please\s+)?(default table style|normal tables|reset tables)[\s.!?]*$",
|
| 564 |
+
m,
|
| 565 |
+
):
|
| 566 |
+
return ControlAction("set_table_style", "normal")
|
| 567 |
+
|
| 568 |
+
# Emoji in assistant replies (short lines; conservative wording).
|
| 569 |
+
if len(m) <= 110 and (
|
| 570 |
+
re.match(r"^(please\s+)?(use emoji|emoji ok|emoji welcome|include emoji)[\s.!?]*$", m)
|
| 571 |
+
or re.match(r"^(please\s+)?add (a few )?emoji[\s.!?]*$", m)
|
| 572 |
+
):
|
| 573 |
+
return ControlAction("set_emoji_style", "include")
|
| 574 |
+
|
| 575 |
+
if len(m) <= 110 and (
|
| 576 |
+
re.match(r"^(please\s+)?no emojis?[\s.!?]*$", m)
|
| 577 |
+
or re.match(r"^(please\s+)?avoid emoji[\s.!?]*$", m)
|
| 578 |
+
or re.match(r"^(please\s+)?don'?t use emoji[\s.!?]*$", m)
|
| 579 |
+
):
|
| 580 |
+
return ControlAction("set_emoji_style", "avoid")
|
| 581 |
+
|
| 582 |
+
if len(m) <= 110 and re.match(
|
| 583 |
+
r"^(please\s+)?(default emoji style|normal emoji|reset emoji)[\s.!?]*$",
|
| 584 |
+
m,
|
| 585 |
+
):
|
| 586 |
+
return ControlAction("set_emoji_style", "normal")
|
| 587 |
+
|
| 588 |
+
# Markdown section headings (## / ###) vs flat prose.
|
| 589 |
+
if len(m) <= 110 and (
|
| 590 |
+
re.match(r"^(please\s+)?use section headings[\s.!?]*$", m)
|
| 591 |
+
or re.match(r"^(please\s+)?organize with headings[\s.!?]*$", m)
|
| 592 |
+
or re.match(r"^(please\s+)?use markdown headings[\s.!?]*$", m)
|
| 593 |
+
):
|
| 594 |
+
return ControlAction("set_section_headings", "prefer")
|
| 595 |
+
|
| 596 |
+
if len(m) <= 110 and (
|
| 597 |
+
re.match(r"^(please\s+)?no section headings[\s.!?]*$", m)
|
| 598 |
+
or re.match(r"^(please\s+)?avoid markdown headings[\s.!?]*$", m)
|
| 599 |
+
or re.match(r"^(please\s+)?flat (answer|prose)( please)?[\s.!?]*$", m)
|
| 600 |
+
):
|
| 601 |
+
return ControlAction("set_section_headings", "avoid")
|
| 602 |
+
|
| 603 |
+
if len(m) <= 110 and re.match(
|
| 604 |
+
r"^(please\s+)?(default section headings|normal headings|reset headings)[\s.!?]*$",
|
| 605 |
+
m,
|
| 606 |
+
):
|
| 607 |
+
return ControlAction("set_section_headings", "normal")
|
| 608 |
+
|
| 609 |
+
# Inline emphasis: bold a few key terms vs keep markdown minimal.
|
| 610 |
+
if len(m) <= 110 and (
|
| 611 |
+
re.match(r"^(please\s+)?bold key terms[\s.!?]*$", m)
|
| 612 |
+
or re.match(r"^(please\s+)?highlight important terms[\s.!?]*$", m)
|
| 613 |
+
or re.match(r"^(please\s+)?emphasize keywords[\s.!?]*$", m)
|
| 614 |
+
):
|
| 615 |
+
return ControlAction("set_term_emphasis", "highlight")
|
| 616 |
+
|
| 617 |
+
if len(m) <= 110 and (
|
| 618 |
+
re.match(r"^(please\s+)?minimal bold[\s.!?]*$", m)
|
| 619 |
+
or re.match(r"^(please\s+)?don'?t overuse bold[\s.!?]*$", m)
|
| 620 |
+
or re.match(r"^(please\s+)?avoid excessive bold[\s.!?]*$", m)
|
| 621 |
+
):
|
| 622 |
+
return ControlAction("set_term_emphasis", "minimal")
|
| 623 |
+
|
| 624 |
+
if len(m) <= 110 and re.match(
|
| 625 |
+
r"^(please\s+)?(default emphasis|normal bold|reset emphasis)[\s.!?]*$",
|
| 626 |
+
m,
|
| 627 |
+
):
|
| 628 |
+
return ControlAction("set_term_emphasis", "normal")
|
| 629 |
+
|
| 630 |
+
# Counterpoint tone: supportive vs challenge assumptions (short lines).
|
| 631 |
+
if len(m) <= 110 and (
|
| 632 |
+
re.match(r"^(please\s+)?challenge my assumptions[\s.!?]*$", m)
|
| 633 |
+
or re.match(r"^(please\s+)?play devils advocate[\s.!?]*$", m)
|
| 634 |
+
or re.match(r"^(please\s+)?push back on weak points[\s.!?]*$", m)
|
| 635 |
+
):
|
| 636 |
+
return ControlAction("set_counterpoint_tone", "challenge")
|
| 637 |
+
|
| 638 |
+
if len(m) <= 110 and (
|
| 639 |
+
re.match(r"^(please\s+)?be supportive[\s.!?]*$", m)
|
| 640 |
+
or re.match(r"^(please\s+)?assume good intent[\s.!?]*$", m)
|
| 641 |
+
or re.match(r"^(please\s+)?encourage my ideas[\s.!?]*$", m)
|
| 642 |
+
):
|
| 643 |
+
return ControlAction("set_counterpoint_tone", "supportive")
|
| 644 |
+
|
| 645 |
+
if len(m) <= 110 and re.match(
|
| 646 |
+
r"^(please\s+)?(default counterpoints|normal pushback|reset counterpoints)[\s.!?]*$",
|
| 647 |
+
m,
|
| 648 |
+
):
|
| 649 |
+
return ControlAction("set_counterpoint_tone", "normal")
|
| 650 |
+
|
| 651 |
+
return None
|
| 652 |
+
|
scripts/rag_faq_smoke.py
CHANGED
|
@@ -3,7 +3,9 @@
|
|
| 3 |
|
| 4 |
Chunks a FAQ markdown corpus by `##` sections, embeds with TinyModelRuntime, retrieves top
|
| 5 |
matches for a query, and reports **keyword overlap** in the top hit as a cheap faithfulness
|
| 6 |
-
proxy (not neural entailment).
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from __future__ import annotations
|
| 9 |
|
|
@@ -11,12 +13,15 @@ import argparse
|
|
| 11 |
import re
|
| 12 |
import sys
|
| 13 |
from pathlib import Path
|
|
|
|
| 14 |
|
| 15 |
_scripts = Path(__file__).resolve().parent
|
| 16 |
if str(_scripts) not in sys.path:
|
| 17 |
sys.path.insert(0, str(_scripts))
|
| 18 |
|
| 19 |
-
from
|
|
|
|
|
|
|
| 20 |
|
| 21 |
_STOP = frozenset(
|
| 22 |
"a an the to of and or for in on at is are was be as it with from by not"
|
|
@@ -58,8 +63,22 @@ def _pick_model(explicit: str | None) -> str:
|
|
| 58 |
return explicit # Hub id, e.g. HyperlinksSpace/TinyModel1
|
| 59 |
|
| 60 |
|
| 61 |
-
def
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
p.add_argument(
|
| 64 |
"--model",
|
| 65 |
type=str,
|
|
@@ -82,7 +101,25 @@ def parse_args() -> argparse.Namespace:
|
|
| 82 |
action="store_true",
|
| 83 |
help="Use only TinyModelRuntime.retrieve (stricter; tiny encoders may fail on short FAQ chunks).",
|
| 84 |
)
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
|
| 88 |
def load_chunks(corpus: Path) -> list[str]:
|
|
@@ -164,7 +201,29 @@ def main() -> None:
|
|
| 164 |
raise SystemExit(1)
|
| 165 |
|
| 166 |
chunks = load_chunks(corpus)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 167 |
rt = TinyModelRuntime(model_id, device="cpu", max_length=128)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
print("=== RAG FAQ smoke (retrieval) ===\n")
|
| 169 |
# (query, substring that must appear in top-1 chunk for a pass — citation-style check)
|
| 170 |
samples: list[tuple[str, str]] = [
|
|
|
|
| 3 |
|
| 4 |
Chunks a FAQ markdown corpus by `##` sections, embeds with TinyModelRuntime, retrieves top
|
| 5 |
matches for a query, and reports **keyword overlap** in the top hit as a cheap faithfulness
|
| 6 |
+
proxy (not neural entailment). Optional **--show-train-routing** prints Phase 2 **`routing`**
|
| 7 |
+
notes from the checkpoint's **eval_report.json** (same helper as **embeddings_smoke_test** /
|
| 8 |
+
**horizon1_route_then_retrieve**)."""
|
| 9 |
|
| 10 |
from __future__ import annotations
|
| 11 |
|
|
|
|
| 13 |
import re
|
| 14 |
import sys
|
| 15 |
from pathlib import Path
|
| 16 |
+
from typing import Any
|
| 17 |
|
| 18 |
_scripts = Path(__file__).resolve().parent
|
| 19 |
if str(_scripts) not in sys.path:
|
| 20 |
sys.path.insert(0, str(_scripts))
|
| 21 |
|
| 22 |
+
from eval_report_routing import maybe_print_routing_section
|
| 23 |
+
|
| 24 |
+
_PROG = "rag_faq_smoke"
|
| 25 |
|
| 26 |
_STOP = frozenset(
|
| 27 |
"a an the to of and or for in on at is are was be as it with from by not"
|
|
|
|
| 63 |
return explicit # Hub id, e.g. HyperlinksSpace/TinyModel1
|
| 64 |
|
| 65 |
|
| 66 |
+
def build_parser() -> argparse.ArgumentParser:
|
| 67 |
+
epilog = (
|
| 68 |
+
"Examples:\n"
|
| 69 |
+
" python scripts/rag_faq_smoke.py\n"
|
| 70 |
+
" python scripts/rag_faq_smoke.py --query \"How do I get a refund?\" --top-k 3\n"
|
| 71 |
+
" python scripts/rag_faq_smoke.py --model artifacts/phase1/runs/smoke/ag_news/scratch "
|
| 72 |
+
"--show-train-routing\n"
|
| 73 |
+
"If --model is omitted, the first default checkpoint dir with config.json is used, "
|
| 74 |
+
f"else {_DEFAULT_HUB!r} (see --model above)."
|
| 75 |
+
)
|
| 76 |
+
p = argparse.ArgumentParser(
|
| 77 |
+
prog=_PROG,
|
| 78 |
+
description=__doc__,
|
| 79 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 80 |
+
epilog=epilog,
|
| 81 |
+
)
|
| 82 |
p.add_argument(
|
| 83 |
"--model",
|
| 84 |
type=str,
|
|
|
|
| 101 |
action="store_true",
|
| 102 |
help="Use only TinyModelRuntime.retrieve (stricter; tiny encoders may fail on short FAQ chunks).",
|
| 103 |
)
|
| 104 |
+
p.add_argument(
|
| 105 |
+
"--query",
|
| 106 |
+
type=str,
|
| 107 |
+
default=None,
|
| 108 |
+
help=(
|
| 109 |
+
"If set, run a single retrieval for this query and print top-k chunks with scores "
|
| 110 |
+
"(citation-style index into the chunk list). Skips the built-in smoke assertions."
|
| 111 |
+
),
|
| 112 |
+
)
|
| 113 |
+
p.add_argument(
|
| 114 |
+
"--show-train-routing",
|
| 115 |
+
action="store_true",
|
| 116 |
+
help="Print eval_report.json top-level routing (Phase 2 notes) before retrieval output.",
|
| 117 |
+
)
|
| 118 |
+
return p
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def parse_args() -> argparse.Namespace:
|
| 122 |
+
return build_parser().parse_args()
|
| 123 |
|
| 124 |
|
| 125 |
def load_chunks(corpus: Path) -> list[str]:
|
|
|
|
| 201 |
raise SystemExit(1)
|
| 202 |
|
| 203 |
chunks = load_chunks(corpus)
|
| 204 |
+
maybe_print_routing_section(
|
| 205 |
+
model_id, enabled=args.show_train_routing, prog=_PROG,
|
| 206 |
+
)
|
| 207 |
+
from tinymodel_runtime import TinyModelRuntime
|
| 208 |
+
|
| 209 |
rt = TinyModelRuntime(model_id, device="cpu", max_length=128)
|
| 210 |
+
|
| 211 |
+
if args.query:
|
| 212 |
+
q = args.query.strip()
|
| 213 |
+
print("=== RAG FAQ (single query) ===\n")
|
| 214 |
+
print(f"model={model_id!r}\ncorpus={corpus}\nquery={q!r}\n")
|
| 215 |
+
if args.semantic_only:
|
| 216 |
+
hits = rt.retrieve(q, chunks, top_k=args.top_k)
|
| 217 |
+
for rank, h in enumerate(hits, 1):
|
| 218 |
+
prev = h.text[:240].replace("\n", " ")
|
| 219 |
+
print(f" #{rank} idx={h.index} score={h.score:.4f} {prev!r}...")
|
| 220 |
+
else:
|
| 221 |
+
hr = hybrid_retrieve(rt, q, chunks, top_k=args.top_k)
|
| 222 |
+
for rank, (score, idx, text) in enumerate(hr, 1):
|
| 223 |
+
prev = text[:240].replace("\n", " ")
|
| 224 |
+
print(f" #{rank} idx={idx} hybrid_score={score:.4f} {prev!r}...")
|
| 225 |
+
return
|
| 226 |
+
|
| 227 |
print("=== RAG FAQ smoke (retrieval) ===\n")
|
| 228 |
# (query, substring that must appear in top-1 chunk for a pass — citation-style check)
|
| 229 |
samples: list[tuple[str, str]] = [
|
scripts/universal_brain_chat.py
CHANGED
|
@@ -25,8 +25,27 @@ import json
|
|
| 25 |
import os
|
| 26 |
import sqlite3
|
| 27 |
import sys
|
|
|
|
| 28 |
import warnings
|
| 29 |
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
_scripts = Path(__file__).resolve().parent
|
| 32 |
_REPO = _scripts.parent
|
|
@@ -46,13 +65,59 @@ from horizon2_core import ( # noqa: E402
|
|
| 46 |
load_causal_lm,
|
| 47 |
pick_device,
|
| 48 |
)
|
| 49 |
-
from horizon3_store import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
|
| 51 |
from tinymodel_runtime import TinyModelRuntime # noqa: E402
|
| 52 |
|
| 53 |
HELP_TEXT = """**How to use**
|
| 54 |
- **Normal language:** ask in plain English (or mixed); the app **infers** what you want (summarize, search FAQ, save a note, etc.).
|
| 55 |
-
- **
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
**Intents the router understands** (examples, not exact wording):
|
| 58 |
- Ordinary chat / questions
|
|
@@ -61,6 +126,9 @@ HELP_TEXT = """**How to use**
|
|
| 61 |
- **Answer using only** these facts — include both facts and question
|
| 62 |
- **Search** the FAQ / **find** in the knowledge base
|
| 63 |
- **Classify** (topic model) this paragraph
|
|
|
|
|
|
|
|
|
|
| 64 |
- **Remember** / note / store: **long-term** vs **this session only**
|
| 65 |
- **Show** saved notes; **clear** session notes
|
| 66 |
- **Status** of loaded models
|
|
@@ -81,6 +149,9 @@ intent must be one of:
|
|
| 81 |
- grounded — answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
|
| 82 |
- retrieve — search FAQ/knowledge; put search query in "text"
|
| 83 |
- classify — show topic-classifier probabilities; put passage in "text"
|
|
|
|
|
|
|
|
|
|
| 84 |
- remember — save a durable note; put note body in "text"
|
| 85 |
- session_note — save a session-only note; put note in "text"
|
| 86 |
- list_memories — user wants to see saved notes
|
|
@@ -101,6 +172,9 @@ VALID_INTENTS = frozenset(
|
|
| 101 |
"grounded",
|
| 102 |
"retrieve",
|
| 103 |
"classify",
|
|
|
|
|
|
|
|
|
|
| 104 |
"remember",
|
| 105 |
"session_note",
|
| 106 |
"list_memories",
|
|
@@ -117,9 +191,69 @@ _INTENT_ALIASES = {
|
|
| 117 |
"search": "retrieve",
|
| 118 |
"faq": "retrieve",
|
| 119 |
"lookup": "retrieve",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
}
|
| 121 |
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def _classifier_result_markdown(probs: dict[str, float]) -> str:
|
| 124 |
ranked = sorted(probs.items(), key=lambda x: -x[1])
|
| 125 |
top_lab, top_p = ranked[0]
|
|
@@ -323,6 +457,45 @@ def run_routed_tool(
|
|
| 323 |
out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
|
| 324 |
return "\n".join(out)
|
| 325 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
if intent in ("summarize", "reformulate", "grounded"):
|
| 327 |
if intent == "grounded":
|
| 328 |
qn = question or text
|
|
@@ -387,6 +560,798 @@ def run_routed_tool(
|
|
| 387 |
return ""
|
| 388 |
|
| 389 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
def handle_slash(
|
| 391 |
msg: str,
|
| 392 |
*,
|
|
@@ -442,6 +1407,39 @@ def handle_slash(
|
|
| 442 |
out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
|
| 443 |
return "\n".join(out)
|
| 444 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 445 |
if cmd in ("/summarize", "/reformulate", "/grounded"):
|
| 446 |
if lm is None:
|
| 447 |
return "Generative model not loaded."
|
|
@@ -665,27 +1663,61 @@ def main() -> None:
|
|
| 665 |
print(f"Loading generative model {mid!r} on {dev!r} ...", flush=True)
|
| 666 |
lm = load_causal_lm(mid, dev)
|
| 667 |
turn_counter = {"n": 0}
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 671 |
|
| 672 |
def respond(
|
| 673 |
message: str,
|
| 674 |
history: list[dict],
|
| 675 |
-
|
|
|
|
| 676 |
msg = (message or "").strip()
|
| 677 |
hist = list(history or [])
|
| 678 |
if not msg:
|
| 679 |
-
return "", hist
|
| 680 |
|
| 681 |
turn_counter["n"] += 1
|
| 682 |
seed = (args.seed + turn_counter["n"]) % (2**31)
|
| 683 |
|
|
|
|
|
|
|
| 684 |
slash_out = handle_slash(
|
| 685 |
msg,
|
| 686 |
lm=lm,
|
| 687 |
mem_conn=mem_conn,
|
| 688 |
-
scope_key=
|
| 689 |
encoder=encoder,
|
| 690 |
rag_chunks=rag_chunks,
|
| 691 |
rag_top_k=args.rag_top_k,
|
|
@@ -699,10 +1731,28 @@ def main() -> None:
|
|
| 699 |
if slash_out is not None:
|
| 700 |
hist.append({"role": "user", "content": msg})
|
| 701 |
hist.append({"role": "assistant", "content": slash_out})
|
| 702 |
-
return "", hist
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 703 |
|
| 704 |
chat_line = msg
|
| 705 |
-
if
|
| 706 |
try:
|
| 707 |
route = infer_route(
|
| 708 |
lm,
|
|
@@ -719,9 +1769,9 @@ def main() -> None:
|
|
| 719 |
msg=msg,
|
| 720 |
lm=lm,
|
| 721 |
mem_conn=mem_conn,
|
| 722 |
-
scope_key=
|
| 723 |
encoder=encoder,
|
| 724 |
-
rag_chunks=
|
| 725 |
rag_top_k=args.rag_top_k,
|
| 726 |
task_max_new_tokens=args.task_max_new_tokens,
|
| 727 |
seed=(seed + 11) % (2**31),
|
|
@@ -734,12 +1784,13 @@ def main() -> None:
|
|
| 734 |
foot = f"\n\n---\n*Routed intent:* `{route['intent']}`"
|
| 735 |
hist.append({"role": "user", "content": msg})
|
| 736 |
hist.append({"role": "assistant", "content": tool_reply + foot})
|
| 737 |
-
return "", hist
|
| 738 |
|
| 739 |
chat_line = route["text"] or msg
|
| 740 |
|
| 741 |
trace: list[str] = []
|
| 742 |
extras: list[str] = []
|
|
|
|
| 743 |
|
| 744 |
if encoder:
|
| 745 |
probs = encoder.classify([chat_line])[0]
|
|
@@ -752,8 +1803,8 @@ def main() -> None:
|
|
| 752 |
)
|
| 753 |
|
| 754 |
rag_block = ""
|
| 755 |
-
if encoder and
|
| 756 |
-
hr = hybrid_retrieve(encoder, chat_line,
|
| 757 |
if hr:
|
| 758 |
trace.append(f"RAG:{len(hr)}chunk(s)")
|
| 759 |
pieces = []
|
|
@@ -767,7 +1818,7 @@ def main() -> None:
|
|
| 767 |
)
|
| 768 |
|
| 769 |
if mem_conn:
|
| 770 |
-
items = list_for_scope(mem_conn,
|
| 771 |
if items:
|
| 772 |
trace.append(f"mem:{len(items)}item(s)")
|
| 773 |
mem_lines = []
|
|
@@ -796,12 +1847,21 @@ def main() -> None:
|
|
| 796 |
do_sample=True,
|
| 797 |
)
|
| 798 |
out = reply or "(empty generation)"
|
| 799 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 800 |
out += "\n\n---\n*Brain trace:* " + " · ".join(trace)
|
| 801 |
|
| 802 |
hist.append({"role": "user", "content": msg})
|
| 803 |
hist.append({"role": "assistant", "content": out})
|
| 804 |
-
return "", hist
|
| 805 |
|
| 806 |
brain_bits = []
|
| 807 |
if encoder:
|
|
@@ -812,33 +1872,67 @@ def main() -> None:
|
|
| 812 |
brain_bits.append("memory")
|
| 813 |
brain_label = "+".join(brain_bits) if brain_bits else "LM only"
|
| 814 |
|
| 815 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 816 |
gr.Markdown(
|
| 817 |
"### Universal Brain — chat prototype\n"
|
| 818 |
f"**Generative:** `{mid}` ({lm.device}) · **Brain layers:** {brain_label}\n\n"
|
| 819 |
"**NL routing:** the model infers what you want (summarize, FAQ search, save note, …). "
|
| 820 |
"Use **`--no-smart-route`** for plain chat-only + slash shortcuts. "
|
| 821 |
"`/help` lists slash commands.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
"Encoder topics (Hub TinyModel1 ≈ AG News) still feed context and an optional *Brain trace* line; "
|
| 823 |
"use `/classify` or ask naturally to see the full probability table in chat."
|
| 824 |
)
|
| 825 |
chat = gr.Chatbot(type="messages", height=520, label="Conversation", allow_tags=False)
|
|
|
|
| 826 |
with gr.Row():
|
| 827 |
inp = gr.Textbox(
|
| 828 |
-
lines=
|
| 829 |
-
max_lines=
|
| 830 |
show_label=False,
|
| 831 |
placeholder="Ask in plain language, or use /help …",
|
| 832 |
scale=9,
|
|
|
|
| 833 |
)
|
| 834 |
go = gr.Button("Send", variant="primary", scale=1)
|
| 835 |
gr.ClearButton([chat, inp])
|
| 836 |
|
| 837 |
-
def _submit(
|
| 838 |
-
|
| 839 |
-
|
| 840 |
-
|
| 841 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 842 |
|
| 843 |
demo.queue(default_concurrency_limit=2)
|
| 844 |
share = args.share
|
|
@@ -850,6 +1944,7 @@ def main() -> None:
|
|
| 850 |
server_port=args.port,
|
| 851 |
share=share,
|
| 852 |
ssr_mode=False,
|
|
|
|
| 853 |
)
|
| 854 |
except ValueError as e:
|
| 855 |
err = str(e)
|
|
|
|
| 25 |
import os
|
| 26 |
import sqlite3
|
| 27 |
import sys
|
| 28 |
+
import uuid
|
| 29 |
import warnings
|
| 30 |
from pathlib import Path
|
| 31 |
+
from typing import Any
|
| 32 |
+
|
| 33 |
+
# Windows: avoid OpenMP/MKL oversubscription and duplicate CRT issues that can
|
| 34 |
+
# segfault during large `from_pretrained` CPU loads (common with torch+transformers).
|
| 35 |
+
if sys.platform == "win32":
|
| 36 |
+
os.environ.setdefault("OMP_NUM_THREADS", "1")
|
| 37 |
+
os.environ.setdefault("MKL_NUM_THREADS", "1")
|
| 38 |
+
os.environ.setdefault("KMP_DUPLICATE_LIB_OK", "TRUE")
|
| 39 |
+
os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
|
| 40 |
+
|
| 41 |
+
import torch
|
| 42 |
+
|
| 43 |
+
if sys.platform == "win32":
|
| 44 |
+
torch.set_num_threads(1)
|
| 45 |
+
try:
|
| 46 |
+
torch.set_num_interop_threads(1)
|
| 47 |
+
except RuntimeError:
|
| 48 |
+
pass
|
| 49 |
|
| 50 |
_scripts = Path(__file__).resolve().parent
|
| 51 |
_REPO = _scripts.parent
|
|
|
|
| 65 |
load_causal_lm,
|
| 66 |
pick_device,
|
| 67 |
)
|
| 68 |
+
from horizon3_store import ( # noqa: E402
|
| 69 |
+
clear_session,
|
| 70 |
+
connect,
|
| 71 |
+
export_scope_json,
|
| 72 |
+
forget_scope,
|
| 73 |
+
init_schema,
|
| 74 |
+
list_for_scope,
|
| 75 |
+
put,
|
| 76 |
+
)
|
| 77 |
+
from nl_controls import parse_control_action # noqa: E402
|
| 78 |
from rag_faq_smoke import _pick_model, hybrid_retrieve, load_chunks # noqa: E402
|
| 79 |
from tinymodel_runtime import TinyModelRuntime # noqa: E402
|
| 80 |
|
| 81 |
HELP_TEXT = """**How to use**
|
| 82 |
- **Normal language:** ask in plain English (or mixed); the app **infers** what you want (summarize, search FAQ, save a note, etc.).
|
| 83 |
+
- **Session controls (say it in chat, no slash command):**
|
| 84 |
+
- *What is my current scope?*, *Show my session settings* -> prints scope + toggles (FAQ context, routing, trace)
|
| 85 |
+
- *Start a new private session*, *Begin a fresh scope* -> generates a **new memory scope key** so notes are isolated from the shared default demo scope
|
| 86 |
+
- *Switch to scope my-team-123* / *Use session demo-key* -> set the Horizon 3 **`scope_key`** from chat (ASCII id)
|
| 87 |
+
- *Be brief* / *More detail please* / *Use bullet points* / *No bullets, plain paragraphs* -> soft **reply-style** hints (injected into the assistant system context; short control lines only)
|
| 88 |
+
- *Strict FAQ* / *FAQ only* / *Stick to the FAQ* vs *Relaxed FAQ* / *FAQ plus general knowledge* vs *Balanced FAQ* / *Normal FAQ* -> **FAQ grounding** hints for how tightly to treat injected FAQ excerpts vs general knowledge
|
| 89 |
+
- *Explain simply* / *ELI5* / *I'm a beginner* vs *Expert mode* / *Assume I'm technical* vs *Normal explanation level* -> **audience depth** hints (simple vs technical vs default)
|
| 90 |
+
- *TLDR first* / *Lead with a summary* vs *No TLDR* / *Answer directly* vs *Default answer structure* -> **answer opening** style (short upfront summary vs dive straight in)
|
| 91 |
+
- *Step by step* / *Numbered steps* vs *No numbered steps* / *Continuous prose* vs *Default step style* -> **procedure layout** (numbered steps vs flowing paragraphs)
|
| 92 |
+
- *Flag your assumptions* / *Be explicit about uncertainty* vs *Be decisive* / *Don't hedge* vs *Reset uncertainty* -> **confidence tone** hints
|
| 93 |
+
- *Suggest next steps* / *Offer follow-up questions* vs *No follow-up questions* / *No questions at the end* vs *Default follow-ups* -> **closing** style at end of answers
|
| 94 |
+
- *Definitions first* / *Define terms first* vs *Intuition first* / *Big picture first* vs *Default explanation order* -> **concept order** in explanations
|
| 95 |
+
- *Include examples* / *Use concrete examples* vs *Skip examples* / *No examples unless I ask* vs *Default examples* -> **example density**
|
| 96 |
+
- *Use pros and cons* / *Pros and cons sections* vs *Compare in flowing prose* / *No pros and cons sections* vs *Default comparison style* -> **comparison layout** for trade-offs
|
| 97 |
+
- *Formal tone* / *Professional register* vs *Casual tone* / *Speak casually* vs *Default tone* -> **writing register**
|
| 98 |
+
- *Use code fences* / *Fenced code blocks* vs *Inline code only* / *No fenced code blocks* vs *Default code formatting* -> **markdown code layout**
|
| 99 |
+
- *Use analogies* / *Analogies when helpful* vs *No analogies* / *Literal explanations only* vs *Default analogy style* -> **analogy / metaphor** usage
|
| 100 |
+
- *Spell out acronyms* / *Expand acronyms on first use* vs *Assume I know acronyms* / *Don't expand acronyms* vs *Default acronym style* -> **acronym verbosity**
|
| 101 |
+
- *Ask clarifying questions first* / *Clarify first* vs *No clarifying questions* / *Just answer without questions* vs *Default clarify mode* -> whether the assistant should ask for missing info before answering
|
| 102 |
+
- *No speculation* / *Stick to high confidence only* vs *Brainstorm freely* / *Wild ideas ok* vs *Default speculation* -> how strictly to avoid guessing vs allow ideation
|
| 103 |
+
- *Show your work* / *Show the derivation* vs *Final answer only* / *No derivation* vs *Default math detail* -> how much intermediate reasoning to show for math-like answers
|
| 104 |
+
- *Answer in JSON* / *JSON output* vs *Plain text only* / *No JSON* vs *Default output format* -> structured output preference
|
| 105 |
+
- *Be risk averse* / *Err on the side of safety* vs *Be pragmatic* / *Optimize for speed* vs *Default risk posture* -> conservative vs practical recommendations
|
| 106 |
+
- *Give me runnable commands* / *Make it actionable* vs *No commands* / *Conceptual only* vs *Default actionability* -> how command-heavy responses should be
|
| 107 |
+
- *Quote the FAQ excerpts* / *Use direct quotes* vs *Paraphrase only* / *Don't quote excerpts* vs *Default quote style* -> quoting vs paraphrasing when relying on injected excerpts
|
| 108 |
+
- *Use tables* / *Tabular format* vs *No tables* / *Avoid tables* vs *Default table style* -> whether markdown tables are preferred
|
| 109 |
+
- *Use emoji* / *Emoji ok* vs *No emoji* / *Avoid emoji* vs *Default emoji style* -> light **emoji** usage in answers
|
| 110 |
+
- *Use section headings* / *Organize with headings* vs *No section headings* / *Flat answer* vs *Default section headings* -> **markdown headings** vs flat prose
|
| 111 |
+
- *Bold key terms* / *Highlight important terms* vs *Minimal bold* / *Don't overuse bold* vs *Default emphasis* -> **inline bold** for key phrases vs sparse formatting
|
| 112 |
+
- *Challenge my assumptions* / *Play devils advocate* vs *Be supportive* / *Assume good intent* vs *Default counterpoints* -> how much to **push back** vs stay encouraging
|
| 113 |
+
- *Reset reply style* -> back to defaults for length + prose + balanced FAQ grounding + audience + opening + steps + confidence tone + follow-ups + concept order + examples + comparisons + register + code layout + analogy + acronym style + clarify + speculation + math detail + output format + risk posture + actionability + quote style + table style + emoji + section headings + term emphasis + counterpoints
|
| 114 |
+
- *Export my memories*, *Download my notes as JSON* -> returns a Horizon 3 export blob for **this Space session scope**
|
| 115 |
+
- *Delete all my memories for this chat* / *Erase everything you stored about me here* -> **forget-scope** wipe for this scope (**long-term + session** rows)
|
| 116 |
+
- *Clear my session notes* -> wipes **session** notes only
|
| 117 |
+
- *Turn off the FAQ context*, *Disable RAG snippets*, *Turn FAQ back on* -> toggles whether FAQ excerpts are injected into the chat system context
|
| 118 |
+
- *Turn off smart routing*, *Go back to normal chat only* -> disables the JSON intent router (slash commands still work)
|
| 119 |
+
- *Show the brain trace*, *Hide debug trace* -> toggles the optional *Brain trace* footer on replies
|
| 120 |
+
- **Shortcuts:** `/help`, `/status`, `/classify`, `/retrieve`, `/summarize`, `/reformulate`, `/grounded q ||| ctx`, `/remember`, `/session`, `/memories`, `/clear-session`, **`/similarity a ||| b`**, **`/embed` / `/embedding`**, **`/nearest q ||| c1 ||| c2`**.
|
| 121 |
|
| 122 |
**Intents the router understands** (examples, not exact wording):
|
| 123 |
- Ordinary chat / questions
|
|
|
|
| 126 |
- **Answer using only** these facts — include both facts and question
|
| 127 |
- **Search** the FAQ / **find** in the knowledge base
|
| 128 |
- **Classify** (topic model) this paragraph
|
| 129 |
+
- **Similarity:** are these two snippets close in meaning? (encoder cosine)
|
| 130 |
+
- **Embedding** stats for a passage (dimension, norm, preview)
|
| 131 |
+
- **Nearest** among several options: which candidate is closest to a query? (`query ||| opt1 ||| opt2 …`)
|
| 132 |
- **Remember** / note / store: **long-term** vs **this session only**
|
| 133 |
- **Show** saved notes; **clear** session notes
|
| 134 |
- **Status** of loaded models
|
|
|
|
| 149 |
- grounded — answer only from given facts; put QUESTION in "question", FACTS in "context" (if user mixes both in one blob, split sensibly)
|
| 150 |
- retrieve — search FAQ/knowledge; put search query in "text"
|
| 151 |
- classify — show topic-classifier probabilities; put passage in "text"
|
| 152 |
+
- similarity — cosine similarity between two texts; put "text_a ||| text_b" in "text"
|
| 153 |
+
- embedding — embedding vector summary for one passage; put passage in "text"
|
| 154 |
+
- nearest — encoder top-k over candidates; put "query ||| candidate1 ||| candidate2 ||| …" in "text" (at least one candidate)
|
| 155 |
- remember — save a durable note; put note body in "text"
|
| 156 |
- session_note — save a session-only note; put note in "text"
|
| 157 |
- list_memories — user wants to see saved notes
|
|
|
|
| 172 |
"grounded",
|
| 173 |
"retrieve",
|
| 174 |
"classify",
|
| 175 |
+
"similarity",
|
| 176 |
+
"embedding",
|
| 177 |
+
"nearest",
|
| 178 |
"remember",
|
| 179 |
"session_note",
|
| 180 |
"list_memories",
|
|
|
|
| 191 |
"search": "retrieve",
|
| 192 |
"faq": "retrieve",
|
| 193 |
"lookup": "retrieve",
|
| 194 |
+
"similar": "similarity",
|
| 195 |
+
"cosine": "similarity",
|
| 196 |
+
"embed": "embedding",
|
| 197 |
+
"embeddings": "embedding",
|
| 198 |
+
"knn": "nearest",
|
| 199 |
+
"triage": "nearest",
|
| 200 |
+
"encoder_retrieve": "nearest",
|
| 201 |
}
|
| 202 |
|
| 203 |
|
| 204 |
+
def _parse_two_segments(blob: str) -> tuple[str, str]:
|
| 205 |
+
if "|||" not in blob:
|
| 206 |
+
raise ValueError("Need two segments separated by `|||` (e.g. `text A ||| text B`).")
|
| 207 |
+
a, _, b = blob.partition("|||")
|
| 208 |
+
a, b = a.strip(), b.strip()
|
| 209 |
+
if not a or not b:
|
| 210 |
+
raise ValueError("Both sides of `|||` must be non-empty.")
|
| 211 |
+
return a, b
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
def _parse_nearest_blob(blob: str) -> tuple[str, list[str]]:
|
| 215 |
+
parts = [p.strip() for p in blob.split("|||") if p.strip()]
|
| 216 |
+
if len(parts) < 2:
|
| 217 |
+
raise ValueError(
|
| 218 |
+
"Need `query ||| candidate1 ||| candidate2` (at least one candidate after `|||`)."
|
| 219 |
+
)
|
| 220 |
+
return parts[0], parts[1:]
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def _embedding_summary_markdown(encoder: TinyModelRuntime, passage: str) -> str:
|
| 224 |
+
vec = encoder.embed([passage], normalize=False)[0]
|
| 225 |
+
dim = int(vec.shape[0])
|
| 226 |
+
norm = float(torch.linalg.vector_norm(vec))
|
| 227 |
+
k = min(8, dim)
|
| 228 |
+
head = ", ".join(f"{float(vec[i]):.4f}" for i in range(k))
|
| 229 |
+
return "\n".join(
|
| 230 |
+
[
|
| 231 |
+
"### Encoder embedding (raw [CLS], not L2-normalized)\n",
|
| 232 |
+
f"- **dim:** {dim}",
|
| 233 |
+
f"- **L2 norm:** {norm:.4f}",
|
| 234 |
+
f"- **first {k} values:** {head}",
|
| 235 |
+
]
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _nearest_markdown(
|
| 240 |
+
encoder: TinyModelRuntime,
|
| 241 |
+
query: str,
|
| 242 |
+
candidates: list[str],
|
| 243 |
+
*,
|
| 244 |
+
top_k: int,
|
| 245 |
+
) -> str:
|
| 246 |
+
hits = encoder.retrieve(query, candidates, top_k=top_k)
|
| 247 |
+
if not hits:
|
| 248 |
+
return "(No candidates.)"
|
| 249 |
+
lines = ["### Encoder nearest neighbors (cosine on pooled embeddings)\n"]
|
| 250 |
+
for rank, h in enumerate(hits, 1):
|
| 251 |
+
lines.append(
|
| 252 |
+
f"**#{rank}** score={h.score:.4f} · index={h.index}\n{_clip(h.text, 700)}\n"
|
| 253 |
+
)
|
| 254 |
+
return "\n".join(lines)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
def _classifier_result_markdown(probs: dict[str, float]) -> str:
|
| 258 |
ranked = sorted(probs.items(), key=lambda x: -x[1])
|
| 259 |
top_lab, top_p = ranked[0]
|
|
|
|
| 457 |
out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
|
| 458 |
return "\n".join(out)
|
| 459 |
|
| 460 |
+
if intent == "similarity":
|
| 461 |
+
if not encoder:
|
| 462 |
+
return "Similarity needs the encoder (drop `--lm-only` / `--no-encoder`)."
|
| 463 |
+
blob = (text or msg).strip()
|
| 464 |
+
if not blob:
|
| 465 |
+
return "Provide two texts: `first ||| second`."
|
| 466 |
+
try:
|
| 467 |
+
ta, tb = _parse_two_segments(blob)
|
| 468 |
+
except ValueError as e:
|
| 469 |
+
return str(e)
|
| 470 |
+
score = encoder.similarity(ta, tb)
|
| 471 |
+
return (
|
| 472 |
+
"### Similarity (encoder cosine)\n"
|
| 473 |
+
f"**Score:** {score:.4f}\n\n"
|
| 474 |
+
f"**A:** {_clip(ta, 480)}\n\n"
|
| 475 |
+
f"**B:** {_clip(tb, 480)}"
|
| 476 |
+
)
|
| 477 |
+
|
| 478 |
+
if intent == "embedding":
|
| 479 |
+
if not encoder:
|
| 480 |
+
return "Embedding stats need the encoder (drop `--lm-only` / `--no-encoder`)."
|
| 481 |
+
passage = (text or msg).strip()
|
| 482 |
+
if not passage:
|
| 483 |
+
return "What text should I embed?"
|
| 484 |
+
return _embedding_summary_markdown(encoder, passage)
|
| 485 |
+
|
| 486 |
+
if intent == "nearest":
|
| 487 |
+
if not encoder:
|
| 488 |
+
return "Nearest-neighbor search needs the encoder (drop `--lm-only` / `--no-encoder`)."
|
| 489 |
+
blob = (text or msg).strip()
|
| 490 |
+
if not blob:
|
| 491 |
+
return "Usage: `query ||| option1 ||| option2 ...`"
|
| 492 |
+
try:
|
| 493 |
+
query, cands = _parse_nearest_blob(blob)
|
| 494 |
+
except ValueError as e:
|
| 495 |
+
return str(e)
|
| 496 |
+
k = max(1, min(rag_top_k, len(cands)))
|
| 497 |
+
return _nearest_markdown(encoder, query, cands, top_k=k)
|
| 498 |
+
|
| 499 |
if intent in ("summarize", "reformulate", "grounded"):
|
| 500 |
if intent == "grounded":
|
| 501 |
qn = question or text
|
|
|
|
| 560 |
return ""
|
| 561 |
|
| 562 |
|
| 563 |
+
def handle_nl_control(
|
| 564 |
+
msg: str,
|
| 565 |
+
session: dict[str, Any],
|
| 566 |
+
*,
|
| 567 |
+
mem_conn: sqlite3.Connection | None,
|
| 568 |
+
scope_key: str,
|
| 569 |
+
rag_chunks_base: list[str] | None,
|
| 570 |
+
locked_no_smart_route: bool,
|
| 571 |
+
) -> str | None:
|
| 572 |
+
act = parse_control_action(msg)
|
| 573 |
+
if act is None:
|
| 574 |
+
return None
|
| 575 |
+
|
| 576 |
+
if act.name == "show_session":
|
| 577 |
+
bits = [
|
| 578 |
+
f"- scope: `{scope_key}`",
|
| 579 |
+
f"- smart routing: **{'on' if session.get('smart_route') and not locked_no_smart_route else 'off'}**",
|
| 580 |
+
f"- FAQ context: **{'on' if session.get('rag') and rag_chunks_base is not None else 'off'}**",
|
| 581 |
+
f"- brain trace footer: **{'on' if session.get('trace') else 'off'}**",
|
| 582 |
+
f"- memory store: **{'on' if mem_conn is not None else 'off'}**",
|
| 583 |
+
f"- reply length: **{session.get('verbosity', 'normal')}**",
|
| 584 |
+
f"- lists: **{'bullets when helpful' if session.get('reply_format') == 'bullets' else 'prose'}**",
|
| 585 |
+
f"- FAQ grounding: **{session.get('faq_grounding', 'normal')}**",
|
| 586 |
+
f"- audience: **{session.get('audience', 'normal')}**",
|
| 587 |
+
f"- answer opening: **{session.get('answer_lead', 'normal')}**",
|
| 588 |
+
f"- procedure steps: **{session.get('step_style', 'normal')}**",
|
| 589 |
+
f"- confidence tone: **{session.get('confidence_tone', 'normal')}**",
|
| 590 |
+
f"- follow-up ending: **{session.get('followup_close', 'normal')}**",
|
| 591 |
+
f"- concept order: **{session.get('exposition_order', 'normal')}**",
|
| 592 |
+
f"- examples: **{session.get('example_density', 'normal')}**",
|
| 593 |
+
f"- comparisons: **{session.get('comparison_frame', 'normal')}**",
|
| 594 |
+
f"- register: **{session.get('register_tone', 'normal')}**",
|
| 595 |
+
f"- code blocks: **{session.get('code_block_style', 'normal')}**",
|
| 596 |
+
f"- analogies: **{session.get('analogy_use', 'normal')}**",
|
| 597 |
+
f"- acronyms: **{session.get('acronym_style', 'normal')}**",
|
| 598 |
+
f"- clarify-first: **{session.get('clarify_first', 'normal')}**",
|
| 599 |
+
f"- speculation: **{session.get('speculation', 'normal')}**",
|
| 600 |
+
f"- math detail: **{session.get('math_detail', 'normal')}**",
|
| 601 |
+
f"- output format: **{session.get('output_format', 'normal')}**",
|
| 602 |
+
f"- risk posture: **{session.get('risk_posture', 'normal')}**",
|
| 603 |
+
f"- actionability: **{session.get('actionability', 'normal')}**",
|
| 604 |
+
f"- quote style: **{session.get('quote_style', 'normal')}**",
|
| 605 |
+
f"- tables: **{session.get('table_style', 'normal')}**",
|
| 606 |
+
f"- emoji: **{session.get('emoji_style', 'normal')}**",
|
| 607 |
+
f"- section headings: **{session.get('section_headings', 'normal')}**",
|
| 608 |
+
f"- term emphasis: **{session.get('term_emphasis', 'normal')}**",
|
| 609 |
+
f"- counterpoints: **{session.get('counterpoint_tone', 'normal')}**",
|
| 610 |
+
]
|
| 611 |
+
return "### Session settings\n" + "\n".join(bits)
|
| 612 |
+
|
| 613 |
+
if act.name == "new_private_session":
|
| 614 |
+
# Keep it readable and low-collision; not a secret, just a scope id.
|
| 615 |
+
new_scope = f"ub-{uuid.uuid4().hex[:8]}"
|
| 616 |
+
session["scope_key"] = new_scope
|
| 617 |
+
return (
|
| 618 |
+
f"**Started a new private session scope.**\n\n"
|
| 619 |
+
f"Current scope is now `{new_scope}`.\n"
|
| 620 |
+
"Memory operations (remember/export/forget) will apply to this new scope."
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
if act.name == "set_scope":
|
| 624 |
+
if not act.value:
|
| 625 |
+
return "Tell me the scope key, e.g. `Switch to scope demo-123`."
|
| 626 |
+
session["scope_key"] = act.value
|
| 627 |
+
return f"Switched session scope to `{act.value}`."
|
| 628 |
+
|
| 629 |
+
if act.name == "export_memory":
|
| 630 |
+
if mem_conn is None:
|
| 631 |
+
return "Memory is off for this Space (no SQLite store); nothing to export."
|
| 632 |
+
blob = export_scope_json(mem_conn, scope_key)
|
| 633 |
+
js = json.dumps(blob, indent=2, ensure_ascii=False)
|
| 634 |
+
max_chars = 48_000
|
| 635 |
+
if len(js) > max_chars:
|
| 636 |
+
js = js[:max_chars] + "\n…(truncated for chat; schema is horizon3_export/1.0)…"
|
| 637 |
+
return f"### Memory export (`{scope_key}`)\nPaste/save externally if needed.\n\n```json\n{js}\n```"
|
| 638 |
+
|
| 639 |
+
if act.name == "forget_scope":
|
| 640 |
+
if mem_conn is None:
|
| 641 |
+
return "Memory is off; nothing to delete."
|
| 642 |
+
n = forget_scope(mem_conn, scope_key)
|
| 643 |
+
return (
|
| 644 |
+
f"**Erased stored memory for this Space session.**\n\n"
|
| 645 |
+
f"Deleted **{n}** row(s) (**session + long-term**) for `{scope_key}`."
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
if act.name == "list_memories":
|
| 649 |
+
if mem_conn is None:
|
| 650 |
+
return "Memory is off."
|
| 651 |
+
items = list_for_scope(mem_conn, scope_key)
|
| 652 |
+
if not items:
|
| 653 |
+
return "(No saved notes for this scope.)"
|
| 654 |
+
lines = [f"- **{it.kind}** · {_clip(it.content, 320)}" for it in items[:24]]
|
| 655 |
+
extra = f"\n\n… {len(items) - 24} more" if len(items) > 24 else ""
|
| 656 |
+
return "**Saved notes:**\n" + "\n".join(lines) + extra
|
| 657 |
+
|
| 658 |
+
if act.name == "clear_session":
|
| 659 |
+
if mem_conn is None:
|
| 660 |
+
return "Memory is off."
|
| 661 |
+
n = clear_session(mem_conn, scope_key)
|
| 662 |
+
return f"Cleared **{n}** session note(s). Long-term notes unchanged."
|
| 663 |
+
|
| 664 |
+
if act.name == "set_trace":
|
| 665 |
+
session["trace"] = act.value == "on"
|
| 666 |
+
return f"**Brain trace** is now **{'on' if session['trace'] else 'off'}** (footer on assistant replies)."
|
| 667 |
+
|
| 668 |
+
if act.name == "set_smart_route":
|
| 669 |
+
if locked_no_smart_route:
|
| 670 |
+
return "Smart routing is **locked off** for this server (`--no-smart-route`)."
|
| 671 |
+
session["smart_route"] = act.value == "on"
|
| 672 |
+
return (
|
| 673 |
+
f"**Smart routing** is now **{'on' if session['smart_route'] else 'off'}** "
|
| 674 |
+
"(off = plain chat + FAQ context injection + slash shortcuts only)."
|
| 675 |
+
)
|
| 676 |
+
|
| 677 |
+
if act.name == "set_rag":
|
| 678 |
+
if rag_chunks_base is None:
|
| 679 |
+
return "FAQ/RAG corpus is **not loaded** on this deployment; nothing to toggle."
|
| 680 |
+
session["rag"] = act.value == "on"
|
| 681 |
+
return (
|
| 682 |
+
f"**FAQ/RAG excerpts in prompts** are now **{'on' if session['rag'] else 'off'}**."
|
| 683 |
+
)
|
| 684 |
+
|
| 685 |
+
if act.name == "reset_reply_style":
|
| 686 |
+
session["verbosity"] = "normal"
|
| 687 |
+
session["reply_format"] = "prose"
|
| 688 |
+
session["faq_grounding"] = "normal"
|
| 689 |
+
session["audience"] = "normal"
|
| 690 |
+
session["answer_lead"] = "normal"
|
| 691 |
+
session["step_style"] = "normal"
|
| 692 |
+
session["confidence_tone"] = "normal"
|
| 693 |
+
session["followup_close"] = "normal"
|
| 694 |
+
session["exposition_order"] = "normal"
|
| 695 |
+
session["example_density"] = "normal"
|
| 696 |
+
session["comparison_frame"] = "normal"
|
| 697 |
+
session["register_tone"] = "normal"
|
| 698 |
+
session["code_block_style"] = "normal"
|
| 699 |
+
session["analogy_use"] = "normal"
|
| 700 |
+
session["acronym_style"] = "normal"
|
| 701 |
+
session["clarify_first"] = "normal"
|
| 702 |
+
session["speculation"] = "normal"
|
| 703 |
+
session["math_detail"] = "normal"
|
| 704 |
+
session["output_format"] = "normal"
|
| 705 |
+
session["risk_posture"] = "normal"
|
| 706 |
+
session["actionability"] = "normal"
|
| 707 |
+
session["quote_style"] = "normal"
|
| 708 |
+
session["table_style"] = "normal"
|
| 709 |
+
session["emoji_style"] = "normal"
|
| 710 |
+
session["section_headings"] = "normal"
|
| 711 |
+
session["term_emphasis"] = "normal"
|
| 712 |
+
session["counterpoint_tone"] = "normal"
|
| 713 |
+
return (
|
| 714 |
+
"**Reply style reset:** normal length, prose, balanced FAQ grounding, general audience, "
|
| 715 |
+
"default opening, default steps, normal confidence tone, default follow-ups, default concept order, "
|
| 716 |
+
"default examples, default comparisons, default register, default code blocks, default analogies, "
|
| 717 |
+
"default acronyms, default clarify mode, default speculation, default math detail, default output format, "
|
| 718 |
+
"default risk posture, default actionability, default quote style, default tables, default emoji, "
|
| 719 |
+
"default section headings, default term emphasis, default counterpoints."
|
| 720 |
+
)
|
| 721 |
+
|
| 722 |
+
if act.name == "set_verbosity":
|
| 723 |
+
v = (act.value or "normal").lower()
|
| 724 |
+
if v not in ("brief", "normal", "detailed"):
|
| 725 |
+
v = "normal"
|
| 726 |
+
session["verbosity"] = v
|
| 727 |
+
return f"**Reply length** is now **{v}** (applies to assistant chat replies)."
|
| 728 |
+
|
| 729 |
+
if act.name == "set_reply_format":
|
| 730 |
+
f = (act.value or "prose").lower()
|
| 731 |
+
if f not in ("prose", "bullets"):
|
| 732 |
+
f = "prose"
|
| 733 |
+
session["reply_format"] = f
|
| 734 |
+
return f"**List formatting** is now **{f}** (how the assistant structures multi-point answers)."
|
| 735 |
+
|
| 736 |
+
if act.name == "set_faq_grounding":
|
| 737 |
+
mode = (act.value or "normal").lower()
|
| 738 |
+
if mode not in ("strict", "normal", "relaxed"):
|
| 739 |
+
mode = "normal"
|
| 740 |
+
session["faq_grounding"] = mode
|
| 741 |
+
extra = ""
|
| 742 |
+
if rag_chunks_base is None or not session.get("rag", True):
|
| 743 |
+
extra = (
|
| 744 |
+
"\n\n**Note:** FAQ excerpt injection is currently **off** in this chat session "
|
| 745 |
+
"(or no FAQ corpus loaded). Grounding hints apply whenever FAQ snippets are present."
|
| 746 |
+
)
|
| 747 |
+
return f"**FAQ grounding** is now **{mode}**.{extra}"
|
| 748 |
+
|
| 749 |
+
if act.name == "set_audience":
|
| 750 |
+
aud = (act.value or "normal").lower()
|
| 751 |
+
if aud not in ("simple", "normal", "technical"):
|
| 752 |
+
aud = "normal"
|
| 753 |
+
session["audience"] = aud
|
| 754 |
+
label = {"simple": "beginner-friendly", "normal": "general", "technical": "technical"}.get(aud, aud)
|
| 755 |
+
return f"**Audience** is now **{label}** (how deep or jargon-heavy explanations should feel)."
|
| 756 |
+
|
| 757 |
+
if act.name == "set_answer_lead":
|
| 758 |
+
lead = (act.value or "normal").lower()
|
| 759 |
+
if lead not in ("tldr_first", "direct", "normal"):
|
| 760 |
+
lead = "normal"
|
| 761 |
+
session["answer_lead"] = lead
|
| 762 |
+
human = {"tldr_first": "TL;DR first line", "direct": "straight in (no TL;DR line)", "normal": "default"}.get(
|
| 763 |
+
lead, lead
|
| 764 |
+
)
|
| 765 |
+
return f"**Answer opening** is now **{human}**."
|
| 766 |
+
|
| 767 |
+
if act.name == "set_step_style":
|
| 768 |
+
st = (act.value or "normal").lower()
|
| 769 |
+
if st not in ("numbered", "continuous", "normal"):
|
| 770 |
+
st = "normal"
|
| 771 |
+
session["step_style"] = st
|
| 772 |
+
human = {
|
| 773 |
+
"numbered": "numbered steps when explaining procedures",
|
| 774 |
+
"continuous": "continuous prose (avoid numbered step lists)",
|
| 775 |
+
"normal": "default",
|
| 776 |
+
}.get(st, st)
|
| 777 |
+
return f"**Procedure layout** is now **{human}**."
|
| 778 |
+
|
| 779 |
+
if act.name == "set_confidence_tone":
|
| 780 |
+
ct = (act.value or "normal").lower()
|
| 781 |
+
if ct not in ("transparent", "assertive", "normal"):
|
| 782 |
+
ct = "normal"
|
| 783 |
+
session["confidence_tone"] = ct
|
| 784 |
+
human = {
|
| 785 |
+
"transparent": "flag limits and assumptions",
|
| 786 |
+
"assertive": "decisive, minimal hedging",
|
| 787 |
+
"normal": "default",
|
| 788 |
+
}.get(ct, ct)
|
| 789 |
+
return f"**Confidence tone** is now **{human}**."
|
| 790 |
+
|
| 791 |
+
if act.name == "set_followup_close":
|
| 792 |
+
fu = (act.value or "normal").lower()
|
| 793 |
+
if fu not in ("suggest", "minimal", "normal"):
|
| 794 |
+
fu = "normal"
|
| 795 |
+
session["followup_close"] = fu
|
| 796 |
+
human = {
|
| 797 |
+
"suggest": "offer brief next steps / follow-ups when useful",
|
| 798 |
+
"minimal": "no rhetorical closing questions",
|
| 799 |
+
"normal": "default",
|
| 800 |
+
}.get(fu, fu)
|
| 801 |
+
return f"**Follow-up closing** is now **{human}**."
|
| 802 |
+
|
| 803 |
+
if act.name == "set_exposition_order":
|
| 804 |
+
eo = (act.value or "normal").lower()
|
| 805 |
+
if eo not in ("definitions_first", "intuition_first", "normal"):
|
| 806 |
+
eo = "normal"
|
| 807 |
+
session["exposition_order"] = eo
|
| 808 |
+
human = {
|
| 809 |
+
"definitions_first": "definitions and terms before intuition",
|
| 810 |
+
"intuition_first": "big-picture intuition before formal detail",
|
| 811 |
+
"normal": "default",
|
| 812 |
+
}.get(eo, eo)
|
| 813 |
+
return f"**Concept order** is now **{human}**."
|
| 814 |
+
|
| 815 |
+
if act.name == "set_example_density":
|
| 816 |
+
ed = (act.value or "normal").lower()
|
| 817 |
+
if ed not in ("rich", "sparse", "normal"):
|
| 818 |
+
ed = "normal"
|
| 819 |
+
session["example_density"] = ed
|
| 820 |
+
human = {
|
| 821 |
+
"rich": "include concrete examples when they help",
|
| 822 |
+
"sparse": "minimal examples unless asked",
|
| 823 |
+
"normal": "default",
|
| 824 |
+
}.get(ed, ed)
|
| 825 |
+
return f"**Examples** preference is now **{human}**."
|
| 826 |
+
|
| 827 |
+
if act.name == "set_comparison_frame":
|
| 828 |
+
cf = (act.value or "normal").lower()
|
| 829 |
+
if cf not in ("pros_cons", "narrative", "normal"):
|
| 830 |
+
cf = "normal"
|
| 831 |
+
session["comparison_frame"] = cf
|
| 832 |
+
human = {
|
| 833 |
+
"pros_cons": "explicit Pros / Cons sections for trade-offs",
|
| 834 |
+
"narrative": "flowing prose comparisons (no rigid Pros/Cons headings)",
|
| 835 |
+
"normal": "default",
|
| 836 |
+
}.get(cf, cf)
|
| 837 |
+
return f"**Comparison layout** is now **{human}**."
|
| 838 |
+
|
| 839 |
+
if act.name == "set_register_tone":
|
| 840 |
+
rt = (act.value or "normal").lower()
|
| 841 |
+
if rt not in ("formal", "casual", "normal"):
|
| 842 |
+
rt = "normal"
|
| 843 |
+
session["register_tone"] = rt
|
| 844 |
+
human = {
|
| 845 |
+
"formal": "professional / polished wording",
|
| 846 |
+
"casual": "friendly conversational wording",
|
| 847 |
+
"normal": "default",
|
| 848 |
+
}.get(rt, rt)
|
| 849 |
+
return f"**Register** is now **{human}**."
|
| 850 |
+
|
| 851 |
+
if act.name == "set_code_block_style":
|
| 852 |
+
cs = (act.value or "normal").lower()
|
| 853 |
+
if cs not in ("fenced", "inline", "normal"):
|
| 854 |
+
cs = "normal"
|
| 855 |
+
session["code_block_style"] = cs
|
| 856 |
+
human = {
|
| 857 |
+
"fenced": "use ``` fenced blocks for multi-line code",
|
| 858 |
+
"inline": "prefer inline `backticks`, avoid large fences",
|
| 859 |
+
"normal": "default",
|
| 860 |
+
}.get(cs, cs)
|
| 861 |
+
return f"**Code markdown** is now **{human}**."
|
| 862 |
+
|
| 863 |
+
if act.name == "set_analogy_use":
|
| 864 |
+
au = (act.value or "normal").lower()
|
| 865 |
+
if au not in ("prefer", "avoid", "normal"):
|
| 866 |
+
au = "normal"
|
| 867 |
+
session["analogy_use"] = au
|
| 868 |
+
human = {
|
| 869 |
+
"prefer": "use concise analogies when they clarify",
|
| 870 |
+
"avoid": "literal wording; skip analogies and metaphors",
|
| 871 |
+
"normal": "default",
|
| 872 |
+
}.get(au, au)
|
| 873 |
+
return f"**Analogy usage** is now **{human}**."
|
| 874 |
+
|
| 875 |
+
if act.name == "set_acronym_style":
|
| 876 |
+
ac = (act.value or "normal").lower()
|
| 877 |
+
if ac not in ("spell_out", "terse", "normal"):
|
| 878 |
+
ac = "normal"
|
| 879 |
+
session["acronym_style"] = ac
|
| 880 |
+
human = {
|
| 881 |
+
"spell_out": "expand unfamiliar acronyms on first mention",
|
| 882 |
+
"terse": "keep acronym forms without spelling them out first",
|
| 883 |
+
"normal": "default",
|
| 884 |
+
}.get(ac, ac)
|
| 885 |
+
return f"**Acronym style** is now **{human}**."
|
| 886 |
+
|
| 887 |
+
if act.name == "set_clarify_first":
|
| 888 |
+
cf = (act.value or "normal").lower()
|
| 889 |
+
if cf not in ("on", "off", "normal"):
|
| 890 |
+
cf = "normal"
|
| 891 |
+
session["clarify_first"] = cf
|
| 892 |
+
human = {
|
| 893 |
+
"on": "ask 1–3 targeted clarifying questions before answering when info is missing",
|
| 894 |
+
"off": "answer immediately; do not ask clarifying questions first",
|
| 895 |
+
"normal": "default",
|
| 896 |
+
}.get(cf, cf)
|
| 897 |
+
return f"**Clarify-first** is now **{human}**."
|
| 898 |
+
|
| 899 |
+
if act.name == "set_speculation":
|
| 900 |
+
sp = (act.value or "normal").lower()
|
| 901 |
+
if sp not in ("strict", "creative", "normal"):
|
| 902 |
+
sp = "normal"
|
| 903 |
+
session["speculation"] = sp
|
| 904 |
+
human = {
|
| 905 |
+
"strict": "avoid guessing; stick to high-confidence statements",
|
| 906 |
+
"creative": "brainstorm and speculate (label assumptions clearly)",
|
| 907 |
+
"normal": "default",
|
| 908 |
+
}.get(sp, sp)
|
| 909 |
+
return f"**Speculation level** is now **{human}**."
|
| 910 |
+
|
| 911 |
+
if act.name == "set_math_detail":
|
| 912 |
+
md = (act.value or "normal").lower()
|
| 913 |
+
if md not in ("show_work", "final_only", "normal"):
|
| 914 |
+
md = "normal"
|
| 915 |
+
session["math_detail"] = md
|
| 916 |
+
human = {
|
| 917 |
+
"show_work": "show intermediate steps/derivation when doing math-like reasoning",
|
| 918 |
+
"final_only": "final results only (no derivation/steps)",
|
| 919 |
+
"normal": "default",
|
| 920 |
+
}.get(md, md)
|
| 921 |
+
return f"**Math detail** is now **{human}**."
|
| 922 |
+
|
| 923 |
+
if act.name == "set_output_format":
|
| 924 |
+
of = (act.value or "normal").lower()
|
| 925 |
+
if of not in ("json", "plain", "normal"):
|
| 926 |
+
of = "normal"
|
| 927 |
+
session["output_format"] = of
|
| 928 |
+
human = {
|
| 929 |
+
"json": "reply in a JSON-shaped object when possible",
|
| 930 |
+
"plain": "plain text (no forced JSON structure)",
|
| 931 |
+
"normal": "default",
|
| 932 |
+
}.get(of, of)
|
| 933 |
+
return f"**Output format** is now **{human}**."
|
| 934 |
+
|
| 935 |
+
if act.name == "set_risk_posture":
|
| 936 |
+
rp = (act.value or "normal").lower()
|
| 937 |
+
if rp not in ("conservative", "pragmatic", "normal"):
|
| 938 |
+
rp = "normal"
|
| 939 |
+
session["risk_posture"] = rp
|
| 940 |
+
human = {
|
| 941 |
+
"conservative": "risk-averse / safety-first recommendations",
|
| 942 |
+
"pragmatic": "practical, speed-oriented recommendations",
|
| 943 |
+
"normal": "default",
|
| 944 |
+
}.get(rp, rp)
|
| 945 |
+
return f"**Risk posture** is now **{human}**."
|
| 946 |
+
|
| 947 |
+
if act.name == "set_actionability":
|
| 948 |
+
ac = (act.value or "normal").lower()
|
| 949 |
+
if ac not in ("commands", "conceptual", "normal"):
|
| 950 |
+
ac = "normal"
|
| 951 |
+
session["actionability"] = ac
|
| 952 |
+
human = {
|
| 953 |
+
"commands": "include runnable commands/snippets when possible",
|
| 954 |
+
"conceptual": "avoid commands; stay conceptual/high-level",
|
| 955 |
+
"normal": "default",
|
| 956 |
+
}.get(ac, ac)
|
| 957 |
+
return f"**Actionability** is now **{human}**."
|
| 958 |
+
|
| 959 |
+
if act.name == "set_quote_style":
|
| 960 |
+
qs = (act.value or "normal").lower()
|
| 961 |
+
if qs not in ("quote", "paraphrase", "normal"):
|
| 962 |
+
qs = "normal"
|
| 963 |
+
session["quote_style"] = qs
|
| 964 |
+
human = {
|
| 965 |
+
"quote": "prefer short direct quotes when relying on FAQ excerpts",
|
| 966 |
+
"paraphrase": "paraphrase excerpts; avoid quoting",
|
| 967 |
+
"normal": "default",
|
| 968 |
+
}.get(qs, qs)
|
| 969 |
+
return f"**Quote style** is now **{human}**."
|
| 970 |
+
|
| 971 |
+
if act.name == "set_table_style":
|
| 972 |
+
ts = (act.value or "normal").lower()
|
| 973 |
+
if ts not in ("prefer", "avoid", "normal"):
|
| 974 |
+
ts = "normal"
|
| 975 |
+
session["table_style"] = ts
|
| 976 |
+
human = {
|
| 977 |
+
"prefer": "use markdown tables when presenting structured comparisons",
|
| 978 |
+
"avoid": "avoid tables; use bullets/prose instead",
|
| 979 |
+
"normal": "default",
|
| 980 |
+
}.get(ts, ts)
|
| 981 |
+
return f"**Tables** preference is now **{human}**."
|
| 982 |
+
|
| 983 |
+
if act.name == "set_emoji_style":
|
| 984 |
+
es = (act.value or "normal").lower()
|
| 985 |
+
if es not in ("include", "avoid", "normal"):
|
| 986 |
+
es = "normal"
|
| 987 |
+
session["emoji_style"] = es
|
| 988 |
+
human = {
|
| 989 |
+
"include": "a few tasteful emoji are welcome when they aid scanning",
|
| 990 |
+
"avoid": "no emoji unless the user uses them first",
|
| 991 |
+
"normal": "default",
|
| 992 |
+
}.get(es, es)
|
| 993 |
+
return f"**Emoji style** is now **{human}**."
|
| 994 |
+
|
| 995 |
+
if act.name == "set_section_headings":
|
| 996 |
+
sh = (act.value or "normal").lower()
|
| 997 |
+
if sh not in ("prefer", "avoid", "normal"):
|
| 998 |
+
sh = "normal"
|
| 999 |
+
session["section_headings"] = sh
|
| 1000 |
+
human = {
|
| 1001 |
+
"prefer": "use markdown ##/### headings to structure longer answers",
|
| 1002 |
+
"avoid": "avoid markdown heading lines; keep flowing paragraphs/lists",
|
| 1003 |
+
"normal": "default",
|
| 1004 |
+
}.get(sh, sh)
|
| 1005 |
+
return f"**Section headings** preference is now **{human}**."
|
| 1006 |
+
|
| 1007 |
+
if act.name == "set_term_emphasis":
|
| 1008 |
+
te = (act.value or "normal").lower()
|
| 1009 |
+
if te not in ("highlight", "minimal", "normal"):
|
| 1010 |
+
te = "normal"
|
| 1011 |
+
session["term_emphasis"] = te
|
| 1012 |
+
human = {
|
| 1013 |
+
"highlight": "bold a few crucial terms/phrases for scanability",
|
| 1014 |
+
"minimal": "avoid decorative bold; use it sparingly",
|
| 1015 |
+
"normal": "default",
|
| 1016 |
+
}.get(te, te)
|
| 1017 |
+
return f"**Term emphasis** is now **{human}**."
|
| 1018 |
+
|
| 1019 |
+
if act.name == "set_counterpoint_tone":
|
| 1020 |
+
cp = (act.value or "normal").lower()
|
| 1021 |
+
if cp not in ("challenge", "supportive", "normal"):
|
| 1022 |
+
cp = "normal"
|
| 1023 |
+
session["counterpoint_tone"] = cp
|
| 1024 |
+
human = {
|
| 1025 |
+
"challenge": "look for gaps; name risks and counterarguments respectfully",
|
| 1026 |
+
"supportive": "prioritize encouragement and constructive framing",
|
| 1027 |
+
"normal": "default",
|
| 1028 |
+
}.get(cp, cp)
|
| 1029 |
+
return f"**Counterpoint tone** is now **{human}**."
|
| 1030 |
+
|
| 1031 |
+
return None
|
| 1032 |
+
|
| 1033 |
+
|
| 1034 |
+
def _append_reply_style_hints(extras: list[str], session: dict[str, Any]) -> None:
|
| 1035 |
+
verbosity = str(session.get("verbosity") or "normal").lower()
|
| 1036 |
+
rformat = str(session.get("reply_format") or "prose").lower()
|
| 1037 |
+
if verbosity not in ("brief", "normal", "detailed"):
|
| 1038 |
+
verbosity = "normal"
|
| 1039 |
+
if rformat not in ("prose", "bullets"):
|
| 1040 |
+
rformat = "prose"
|
| 1041 |
+
lines: list[str] = []
|
| 1042 |
+
if verbosity == "brief":
|
| 1043 |
+
lines.append(
|
| 1044 |
+
"Keep replies concise (about a short paragraph or less) unless the user explicitly asks for depth."
|
| 1045 |
+
)
|
| 1046 |
+
elif verbosity == "detailed":
|
| 1047 |
+
lines.append("Prefer fuller, well-structured explanations when they help the user.")
|
| 1048 |
+
if rformat == "bullets":
|
| 1049 |
+
lines.append("When listing multiple points, use markdown bullet or numbered lists.")
|
| 1050 |
+
audience = str(session.get("audience") or "normal").lower()
|
| 1051 |
+
if audience not in ("simple", "normal", "technical"):
|
| 1052 |
+
audience = "normal"
|
| 1053 |
+
if audience == "simple":
|
| 1054 |
+
lines.append(
|
| 1055 |
+
"Assume the reader is new to the topic: define jargon when you use it, prefer plain language and small steps."
|
| 1056 |
+
)
|
| 1057 |
+
elif audience == "technical":
|
| 1058 |
+
lines.append(
|
| 1059 |
+
"Assume a technical reader: standard domain terms and shorthand are fine; prioritize precision over hand-holding."
|
| 1060 |
+
)
|
| 1061 |
+
lead = str(session.get("answer_lead") or "normal").lower()
|
| 1062 |
+
if lead not in ("tldr_first", "direct", "normal"):
|
| 1063 |
+
lead = "normal"
|
| 1064 |
+
if lead == "tldr_first":
|
| 1065 |
+
lines.append(
|
| 1066 |
+
"Start substantive answers with one short **TL;DR:** line (one sentence), then elaborate."
|
| 1067 |
+
)
|
| 1068 |
+
elif lead == "direct":
|
| 1069 |
+
lines.append(
|
| 1070 |
+
"Do not add a standalone TL;DR/summary prelude; answer immediately in-flow (still use lists if configured)."
|
| 1071 |
+
)
|
| 1072 |
+
steps = str(session.get("step_style") or "normal").lower()
|
| 1073 |
+
if steps not in ("numbered", "continuous", "normal"):
|
| 1074 |
+
steps = "normal"
|
| 1075 |
+
if steps == "numbered":
|
| 1076 |
+
lines.append(
|
| 1077 |
+
"When explaining procedures or multi-part how-tos, structure the answer with clear **numbered steps** "
|
| 1078 |
+
"(1. 2. 3.) and one action per step when practical."
|
| 1079 |
+
)
|
| 1080 |
+
elif steps == "continuous":
|
| 1081 |
+
lines.append(
|
| 1082 |
+
"Avoid numbered step lists; explain procedures as **connected paragraphs** unless the user explicitly "
|
| 1083 |
+
"asks for steps."
|
| 1084 |
+
)
|
| 1085 |
+
conf = str(session.get("confidence_tone") or "normal").lower()
|
| 1086 |
+
if conf not in ("transparent", "assertive", "normal"):
|
| 1087 |
+
conf = "normal"
|
| 1088 |
+
if conf == "transparent":
|
| 1089 |
+
lines.append(
|
| 1090 |
+
"Be explicit about uncertainty: say when you are guessing, label key assumptions, and avoid overstating "
|
| 1091 |
+
"facts you cannot support from the prompt or supplied excerpts."
|
| 1092 |
+
)
|
| 1093 |
+
elif conf == "assertive":
|
| 1094 |
+
lines.append(
|
| 1095 |
+
"Answer in a direct, confident tone: minimize throat-clearing and hedging unless a short disclaimer is "
|
| 1096 |
+
"truly necessary for safety or policy."
|
| 1097 |
+
)
|
| 1098 |
+
fu = str(session.get("followup_close") or "normal").lower()
|
| 1099 |
+
if fu not in ("suggest", "minimal", "normal"):
|
| 1100 |
+
fu = "normal"
|
| 1101 |
+
if fu == "suggest":
|
| 1102 |
+
lines.append(
|
| 1103 |
+
"When helpful, end with concise **optional next steps** or a short **follow-up invitation** "
|
| 1104 |
+
'(e.g., one line like "Want me to drill into X?" — optional, not repetitive).'
|
| 1105 |
+
)
|
| 1106 |
+
elif fu == "minimal":
|
| 1107 |
+
lines.append(
|
| 1108 |
+
"Avoid stock closers such as prompting whether the user needs anything else unless they explicitly invite it; "
|
| 1109 |
+
"finish crisply after the core answer."
|
| 1110 |
+
)
|
| 1111 |
+
expo = str(session.get("exposition_order") or "normal").lower()
|
| 1112 |
+
if expo not in ("definitions_first", "intuition_first", "normal"):
|
| 1113 |
+
expo = "normal"
|
| 1114 |
+
if expo == "definitions_first":
|
| 1115 |
+
lines.append(
|
| 1116 |
+
"Prefer stating **definitions and key terms upfront**, then intuition, analogies, and examples."
|
| 1117 |
+
)
|
| 1118 |
+
elif expo == "intuition_first":
|
| 1119 |
+
lines.append(
|
| 1120 |
+
"Prefer a short **motivation / big-picture intuition** section first, then formal definitions and details."
|
| 1121 |
+
)
|
| 1122 |
+
ex_density = str(session.get("example_density") or "normal").lower()
|
| 1123 |
+
if ex_density not in ("rich", "sparse", "normal"):
|
| 1124 |
+
ex_density = "normal"
|
| 1125 |
+
if ex_density == "rich":
|
| 1126 |
+
lines.append(
|
| 1127 |
+
"When it clarifies the answer, include at least one **short concrete example** or miniature scenario."
|
| 1128 |
+
)
|
| 1129 |
+
elif ex_density == "sparse":
|
| 1130 |
+
lines.append(
|
| 1131 |
+
"Unless the user explicitly requests an example, keep answers **example-free** (no illustrative stories)."
|
| 1132 |
+
)
|
| 1133 |
+
comp = str(session.get("comparison_frame") or "normal").lower()
|
| 1134 |
+
if comp not in ("pros_cons", "narrative", "normal"):
|
| 1135 |
+
comp = "normal"
|
| 1136 |
+
if comp == "pros_cons":
|
| 1137 |
+
lines.append(
|
| 1138 |
+
"For trade-offs or comparing options, use markdown subheadings **Pros** and **Cons** (short bullets under each)."
|
| 1139 |
+
)
|
| 1140 |
+
elif comp == "narrative":
|
| 1141 |
+
lines.append(
|
| 1142 |
+
"For trade-offs or comparing options, weave pros/cons into **continuous prose** rather than labeled sections."
|
| 1143 |
+
)
|
| 1144 |
+
reg = str(session.get("register_tone") or "normal").lower()
|
| 1145 |
+
if reg not in ("formal", "casual", "normal"):
|
| 1146 |
+
reg = "normal"
|
| 1147 |
+
if reg == "formal":
|
| 1148 |
+
lines.append(
|
| 1149 |
+
"Use a **polished professional register**: clear sentences, minimal slang/emoji unless the topic demands it."
|
| 1150 |
+
)
|
| 1151 |
+
elif reg == "casual":
|
| 1152 |
+
lines.append(
|
| 1153 |
+
"**Conversational register** is preferred: contractions and light phrasing are fine; sound like a helpful teammate."
|
| 1154 |
+
)
|
| 1155 |
+
cb = str(session.get("code_block_style") or "normal").lower()
|
| 1156 |
+
if cb not in ("fenced", "inline", "normal"):
|
| 1157 |
+
cb = "normal"
|
| 1158 |
+
if cb == "fenced":
|
| 1159 |
+
lines.append(
|
| 1160 |
+
"For multi-line commands or code, use **markdown fenced code blocks** with a language hint when recognizable."
|
| 1161 |
+
)
|
| 1162 |
+
elif cb == "inline":
|
| 1163 |
+
lines.append(
|
| 1164 |
+
"Prefer **inline backticks** for short snippets; **avoid triple-backtick fences** unless the user pastes a block."
|
| 1165 |
+
)
|
| 1166 |
+
an = str(session.get("analogy_use") or "normal").lower()
|
| 1167 |
+
if an not in ("prefer", "avoid", "normal"):
|
| 1168 |
+
an = "normal"
|
| 1169 |
+
if an == "prefer":
|
| 1170 |
+
lines.append(
|
| 1171 |
+
"When stuck on an abstract concept, optionally add **one tight analogy/metaphor** (label it plainly; keep it respectful)."
|
| 1172 |
+
)
|
| 1173 |
+
elif an == "avoid":
|
| 1174 |
+
lines.append(
|
| 1175 |
+
"Keep explanations **literal and direct**: do **not** use analogies, metaphors, or cute comparisons."
|
| 1176 |
+
)
|
| 1177 |
+
acr = str(session.get("acronym_style") or "normal").lower()
|
| 1178 |
+
if acr not in ("spell_out", "terse", "normal"):
|
| 1179 |
+
acr = "normal"
|
| 1180 |
+
if acr == "spell_out":
|
| 1181 |
+
lines.append(
|
| 1182 |
+
'On **first substantive mention** of a non-obvious acronym/title-case initialism (e.g. API, SLA), '
|
| 1183 |
+
'write the **expanded form once** (`Long Form (ACRONYM)`), then use the acronym afterwards.'
|
| 1184 |
+
)
|
| 1185 |
+
elif acr == "terse":
|
| 1186 |
+
lines.append(
|
| 1187 |
+
"Assume the reader is acronym-literate: **reuse acronyms** as written without mandatory expansion."
|
| 1188 |
+
)
|
| 1189 |
+
|
| 1190 |
+
clarify = str(session.get("clarify_first") or "normal").lower()
|
| 1191 |
+
if clarify not in ("on", "off", "normal"):
|
| 1192 |
+
clarify = "normal"
|
| 1193 |
+
if clarify == "on":
|
| 1194 |
+
lines.append(
|
| 1195 |
+
"If the request is underspecified, ask **1–3 short clarifying questions first** (only the minimum needed), "
|
| 1196 |
+
"then wait for the user's answers before giving a full solution."
|
| 1197 |
+
)
|
| 1198 |
+
elif clarify == "off":
|
| 1199 |
+
lines.append(
|
| 1200 |
+
"Do not pause to ask clarifying questions first; provide the best answer immediately and note assumptions briefly."
|
| 1201 |
+
)
|
| 1202 |
+
|
| 1203 |
+
spec = str(session.get("speculation") or "normal").lower()
|
| 1204 |
+
if spec not in ("strict", "creative", "normal"):
|
| 1205 |
+
spec = "normal"
|
| 1206 |
+
if spec == "strict":
|
| 1207 |
+
lines.append(
|
| 1208 |
+
"Avoid speculation: prefer high-confidence statements, and say when something is unknown or not supported by the prompt."
|
| 1209 |
+
)
|
| 1210 |
+
elif spec == "creative":
|
| 1211 |
+
lines.append(
|
| 1212 |
+
"Brainstorming is allowed: you may propose speculative ideas, but label assumptions and uncertainty clearly."
|
| 1213 |
+
)
|
| 1214 |
+
|
| 1215 |
+
md = str(session.get("math_detail") or "normal").lower()
|
| 1216 |
+
if md not in ("show_work", "final_only", "normal"):
|
| 1217 |
+
md = "normal"
|
| 1218 |
+
if md == "show_work":
|
| 1219 |
+
lines.append(
|
| 1220 |
+
"When the user asks for math/derivations, show concise intermediate steps and explain symbols briefly."
|
| 1221 |
+
)
|
| 1222 |
+
elif md == "final_only":
|
| 1223 |
+
lines.append(
|
| 1224 |
+
"When the user asks for math/derivations, give the final result directly (no intermediate derivation)."
|
| 1225 |
+
)
|
| 1226 |
+
|
| 1227 |
+
of = str(session.get("output_format") or "normal").lower()
|
| 1228 |
+
if of not in ("json", "plain", "normal"):
|
| 1229 |
+
of = "normal"
|
| 1230 |
+
if of == "json":
|
| 1231 |
+
lines.append(
|
| 1232 |
+
"When appropriate, format the answer as a single JSON object with stable keys; avoid extra prose outside the JSON."
|
| 1233 |
+
)
|
| 1234 |
+
elif of == "plain":
|
| 1235 |
+
lines.append("Do not force JSON or rigid schemas; answer in normal plain text.")
|
| 1236 |
+
|
| 1237 |
+
rp = str(session.get("risk_posture") or "normal").lower()
|
| 1238 |
+
if rp not in ("conservative", "pragmatic", "normal"):
|
| 1239 |
+
rp = "normal"
|
| 1240 |
+
if rp == "conservative":
|
| 1241 |
+
lines.append(
|
| 1242 |
+
"Prefer safer, low-risk recommendations; call out risks and choose options that minimize downside."
|
| 1243 |
+
)
|
| 1244 |
+
elif rp == "pragmatic":
|
| 1245 |
+
lines.append(
|
| 1246 |
+
"Prefer practical, time-efficient recommendations; avoid over-engineering unless clearly needed."
|
| 1247 |
+
)
|
| 1248 |
+
|
| 1249 |
+
actz = str(session.get("actionability") or "normal").lower()
|
| 1250 |
+
if actz not in ("commands", "conceptual", "normal"):
|
| 1251 |
+
actz = "normal"
|
| 1252 |
+
if actz == "commands":
|
| 1253 |
+
lines.append(
|
| 1254 |
+
"When proposing a solution, include runnable commands/snippets/checklists where appropriate."
|
| 1255 |
+
)
|
| 1256 |
+
elif actz == "conceptual":
|
| 1257 |
+
lines.append(
|
| 1258 |
+
"Avoid command dumps; focus on concepts, rationale, and decision points."
|
| 1259 |
+
)
|
| 1260 |
+
|
| 1261 |
+
qs = str(session.get("quote_style") or "normal").lower()
|
| 1262 |
+
if qs not in ("quote", "paraphrase", "normal"):
|
| 1263 |
+
qs = "normal"
|
| 1264 |
+
if qs == "quote":
|
| 1265 |
+
lines.append(
|
| 1266 |
+
"When you rely on an injected **[FAQ excerpt N]**, include a short verbatim quote (a sentence or clause) "
|
| 1267 |
+
"before paraphrasing."
|
| 1268 |
+
)
|
| 1269 |
+
elif qs == "paraphrase":
|
| 1270 |
+
lines.append(
|
| 1271 |
+
"Prefer paraphrasing FAQ excerpts; avoid quoting unless the user asks for exact wording."
|
| 1272 |
+
)
|
| 1273 |
+
|
| 1274 |
+
ts = str(session.get("table_style") or "normal").lower()
|
| 1275 |
+
if ts not in ("prefer", "avoid", "normal"):
|
| 1276 |
+
ts = "normal"
|
| 1277 |
+
if ts == "prefer":
|
| 1278 |
+
lines.append(
|
| 1279 |
+
"When comparing several options, prefer a **markdown table** if it makes the structure clearer."
|
| 1280 |
+
)
|
| 1281 |
+
elif ts == "avoid":
|
| 1282 |
+
lines.append(
|
| 1283 |
+
"Avoid markdown tables; use bullets or short sections instead."
|
| 1284 |
+
)
|
| 1285 |
+
|
| 1286 |
+
es = str(session.get("emoji_style") or "normal").lower()
|
| 1287 |
+
if es not in ("include", "avoid", "normal"):
|
| 1288 |
+
es = "normal"
|
| 1289 |
+
if es == "include":
|
| 1290 |
+
lines.append(
|
| 1291 |
+
"You may use a few tasteful emoji in replies when they help readability (keep it sparse and professional)."
|
| 1292 |
+
)
|
| 1293 |
+
elif es == "avoid":
|
| 1294 |
+
lines.append("Do not use emoji in replies unless the user explicitly uses emoji first.")
|
| 1295 |
+
|
| 1296 |
+
sh = str(session.get("section_headings") or "normal").lower()
|
| 1297 |
+
if sh not in ("prefer", "avoid", "normal"):
|
| 1298 |
+
sh = "normal"
|
| 1299 |
+
if sh == "prefer":
|
| 1300 |
+
lines.append(
|
| 1301 |
+
"For multi-part answers, organize with short **markdown headings** (## / ###) before each major block."
|
| 1302 |
+
)
|
| 1303 |
+
elif sh == "avoid":
|
| 1304 |
+
lines.append(
|
| 1305 |
+
"Avoid leading lines that look like markdown headings (no `#` / `##` title lines); use bold inline labels or paragraphs instead."
|
| 1306 |
+
)
|
| 1307 |
+
|
| 1308 |
+
te = str(session.get("term_emphasis") or "normal").lower()
|
| 1309 |
+
if te not in ("highlight", "minimal", "normal"):
|
| 1310 |
+
te = "normal"
|
| 1311 |
+
if te == "highlight":
|
| 1312 |
+
lines.append(
|
| 1313 |
+
"Use **bold** on a handful of key terms or short phrases (not whole sentences) to help the reader scan."
|
| 1314 |
+
)
|
| 1315 |
+
elif te == "minimal":
|
| 1316 |
+
lines.append(
|
| 1317 |
+
"Keep inline **bold** rare; prefer plain text unless emphasis is truly needed for clarity."
|
| 1318 |
+
)
|
| 1319 |
+
|
| 1320 |
+
cp = str(session.get("counterpoint_tone") or "normal").lower()
|
| 1321 |
+
if cp not in ("challenge", "supportive", "normal"):
|
| 1322 |
+
cp = "normal"
|
| 1323 |
+
if cp == "challenge":
|
| 1324 |
+
lines.append(
|
| 1325 |
+
"Briefly stress-test the user's plan: note plausible failure modes, missing constraints, or stronger "
|
| 1326 |
+
"alternatives—stay respectful and specific."
|
| 1327 |
+
)
|
| 1328 |
+
elif cp == "supportive":
|
| 1329 |
+
lines.append(
|
| 1330 |
+
"Lean supportive: acknowledge effort, frame improvements as next steps, and avoid needless harsh critique."
|
| 1331 |
+
)
|
| 1332 |
+
|
| 1333 |
+
g = str(session.get("faq_grounding") or "normal").lower()
|
| 1334 |
+
if g not in ("strict", "normal", "relaxed"):
|
| 1335 |
+
g = "normal"
|
| 1336 |
+
if g == "strict":
|
| 1337 |
+
lines.append(
|
| 1338 |
+
"FAQ grounding (strict): Treat product/process/policy claims as supported only when clearly stated in "
|
| 1339 |
+
"the FAQ excerpts provided in this turn. If not stated there, say you are unsure or that it is outside "
|
| 1340 |
+
"the provided FAQ. When you rely on an excerpt, cite it as **[FAQ excerpt N]** matching the numbered "
|
| 1341 |
+
"excerpt headings you were given."
|
| 1342 |
+
)
|
| 1343 |
+
elif g == "relaxed":
|
| 1344 |
+
lines.append(
|
| 1345 |
+
"FAQ grounding (relaxed): Prefer the supplied FAQ excerpts for product/support specifics, but you may add "
|
| 1346 |
+
"brief general-knowledge context if you clearly separate it from anything implied by FAQ text."
|
| 1347 |
+
)
|
| 1348 |
+
# "normal": default product behavior --- rely on FAQ block wording without duplicating instructions.
|
| 1349 |
+
if lines:
|
| 1350 |
+
extras.append(
|
| 1351 |
+
"Preferred reply style for this chat session:\n" + "\n".join(f"- {ln}" for ln in lines)
|
| 1352 |
+
)
|
| 1353 |
+
|
| 1354 |
+
|
| 1355 |
def handle_slash(
|
| 1356 |
msg: str,
|
| 1357 |
*,
|
|
|
|
| 1407 |
out.append(f"**#{i}** score={sc:.4f}\n{_clip(txt, 700)}\n")
|
| 1408 |
return "\n".join(out)
|
| 1409 |
|
| 1410 |
+
if cmd == "/similarity":
|
| 1411 |
+
if not encoder:
|
| 1412 |
+
return "Encoder off. Drop `--lm-only` / `--no-encoder`."
|
| 1413 |
+
if "|||" not in rest:
|
| 1414 |
+
return "Usage: `/similarity text A ||| text B`"
|
| 1415 |
+
try:
|
| 1416 |
+
ta, tb = _parse_two_segments(rest)
|
| 1417 |
+
except ValueError as e:
|
| 1418 |
+
return str(e)
|
| 1419 |
+
score = encoder.similarity(ta, tb)
|
| 1420 |
+
return (
|
| 1421 |
+
f"**Similarity:** {score:.4f}\n\n**A:** {_clip(ta, 480)}\n\n**B:** {_clip(tb, 480)}"
|
| 1422 |
+
)
|
| 1423 |
+
|
| 1424 |
+
if cmd in ("/embedding", "/embed"):
|
| 1425 |
+
if not encoder:
|
| 1426 |
+
return "Encoder off. Drop `--lm-only` / `--no-encoder`."
|
| 1427 |
+
if not rest:
|
| 1428 |
+
return f"Usage: `{cmd} <text>`"
|
| 1429 |
+
return _embedding_summary_markdown(encoder, rest)
|
| 1430 |
+
|
| 1431 |
+
if cmd == "/nearest":
|
| 1432 |
+
if not encoder:
|
| 1433 |
+
return "Encoder off. Drop `--lm-only` / `--no-encoder`."
|
| 1434 |
+
if "|||" not in rest:
|
| 1435 |
+
return "Usage: `/nearest query ||| cand1 ||| cand2 ...`"
|
| 1436 |
+
try:
|
| 1437 |
+
qn, cands = _parse_nearest_blob(rest)
|
| 1438 |
+
except ValueError as e:
|
| 1439 |
+
return str(e)
|
| 1440 |
+
k = max(1, min(rag_top_k, len(cands)))
|
| 1441 |
+
return _nearest_markdown(encoder, qn, cands, top_k=k)
|
| 1442 |
+
|
| 1443 |
if cmd in ("/summarize", "/reformulate", "/grounded"):
|
| 1444 |
if lm is None:
|
| 1445 |
return "Generative model not loaded."
|
|
|
|
| 1663 |
print(f"Loading generative model {mid!r} on {dev!r} ...", flush=True)
|
| 1664 |
lm = load_causal_lm(mid, dev)
|
| 1665 |
turn_counter = {"n": 0}
|
| 1666 |
+
initial_ub_session = {
|
| 1667 |
+
"trace": not args.no_trace
|
| 1668 |
+
and (encoder is not None or mem_conn is not None or (rag_chunks is not None)),
|
| 1669 |
+
"smart_route": not args.no_smart_route,
|
| 1670 |
+
"rag": rag_chunks is not None,
|
| 1671 |
+
"scope_key": args.memory_scope,
|
| 1672 |
+
"verbosity": "normal",
|
| 1673 |
+
"reply_format": "prose",
|
| 1674 |
+
"faq_grounding": "normal",
|
| 1675 |
+
"audience": "normal",
|
| 1676 |
+
"answer_lead": "normal",
|
| 1677 |
+
"step_style": "normal",
|
| 1678 |
+
"confidence_tone": "normal",
|
| 1679 |
+
"followup_close": "normal",
|
| 1680 |
+
"exposition_order": "normal",
|
| 1681 |
+
"example_density": "normal",
|
| 1682 |
+
"comparison_frame": "normal",
|
| 1683 |
+
"register_tone": "normal",
|
| 1684 |
+
"code_block_style": "normal",
|
| 1685 |
+
"analogy_use": "normal",
|
| 1686 |
+
"acronym_style": "normal",
|
| 1687 |
+
"clarify_first": "normal",
|
| 1688 |
+
"speculation": "normal",
|
| 1689 |
+
"math_detail": "normal",
|
| 1690 |
+
"output_format": "normal",
|
| 1691 |
+
"risk_posture": "normal",
|
| 1692 |
+
"actionability": "normal",
|
| 1693 |
+
"quote_style": "normal",
|
| 1694 |
+
"table_style": "normal",
|
| 1695 |
+
"emoji_style": "normal",
|
| 1696 |
+
"section_headings": "normal",
|
| 1697 |
+
"term_emphasis": "normal",
|
| 1698 |
+
"counterpoint_tone": "normal",
|
| 1699 |
+
}
|
| 1700 |
|
| 1701 |
def respond(
|
| 1702 |
message: str,
|
| 1703 |
history: list[dict],
|
| 1704 |
+
ub_session: dict[str, Any],
|
| 1705 |
+
) -> tuple[str, list[dict], dict[str, Any]]:
|
| 1706 |
msg = (message or "").strip()
|
| 1707 |
hist = list(history or [])
|
| 1708 |
if not msg:
|
| 1709 |
+
return "", hist, ub_session
|
| 1710 |
|
| 1711 |
turn_counter["n"] += 1
|
| 1712 |
seed = (args.seed + turn_counter["n"]) % (2**31)
|
| 1713 |
|
| 1714 |
+
cur_scope = str(ub_session.get("scope_key") or args.memory_scope)
|
| 1715 |
+
|
| 1716 |
slash_out = handle_slash(
|
| 1717 |
msg,
|
| 1718 |
lm=lm,
|
| 1719 |
mem_conn=mem_conn,
|
| 1720 |
+
scope_key=cur_scope,
|
| 1721 |
encoder=encoder,
|
| 1722 |
rag_chunks=rag_chunks,
|
| 1723 |
rag_top_k=args.rag_top_k,
|
|
|
|
| 1731 |
if slash_out is not None:
|
| 1732 |
hist.append({"role": "user", "content": msg})
|
| 1733 |
hist.append({"role": "assistant", "content": slash_out})
|
| 1734 |
+
return "", hist, ub_session
|
| 1735 |
+
|
| 1736 |
+
nl_out = handle_nl_control(
|
| 1737 |
+
msg,
|
| 1738 |
+
ub_session,
|
| 1739 |
+
mem_conn=mem_conn,
|
| 1740 |
+
scope_key=cur_scope,
|
| 1741 |
+
rag_chunks_base=rag_chunks,
|
| 1742 |
+
locked_no_smart_route=args.no_smart_route,
|
| 1743 |
+
)
|
| 1744 |
+
if nl_out is not None:
|
| 1745 |
+
hist.append({"role": "user", "content": msg})
|
| 1746 |
+
hist.append({"role": "assistant", "content": nl_out})
|
| 1747 |
+
return "", hist, ub_session
|
| 1748 |
+
|
| 1749 |
+
effective_rag = (
|
| 1750 |
+
rag_chunks if rag_chunks is not None and ub_session.get("rag") else None
|
| 1751 |
+
)
|
| 1752 |
+
use_smart = bool(ub_session.get("smart_route")) and not args.no_smart_route
|
| 1753 |
|
| 1754 |
chat_line = msg
|
| 1755 |
+
if use_smart:
|
| 1756 |
try:
|
| 1757 |
route = infer_route(
|
| 1758 |
lm,
|
|
|
|
| 1769 |
msg=msg,
|
| 1770 |
lm=lm,
|
| 1771 |
mem_conn=mem_conn,
|
| 1772 |
+
scope_key=cur_scope,
|
| 1773 |
encoder=encoder,
|
| 1774 |
+
rag_chunks=effective_rag,
|
| 1775 |
rag_top_k=args.rag_top_k,
|
| 1776 |
task_max_new_tokens=args.task_max_new_tokens,
|
| 1777 |
seed=(seed + 11) % (2**31),
|
|
|
|
| 1784 |
foot = f"\n\n---\n*Routed intent:* `{route['intent']}`"
|
| 1785 |
hist.append({"role": "user", "content": msg})
|
| 1786 |
hist.append({"role": "assistant", "content": tool_reply + foot})
|
| 1787 |
+
return "", hist, ub_session
|
| 1788 |
|
| 1789 |
chat_line = route["text"] or msg
|
| 1790 |
|
| 1791 |
trace: list[str] = []
|
| 1792 |
extras: list[str] = []
|
| 1793 |
+
_append_reply_style_hints(extras, ub_session)
|
| 1794 |
|
| 1795 |
if encoder:
|
| 1796 |
probs = encoder.classify([chat_line])[0]
|
|
|
|
| 1803 |
)
|
| 1804 |
|
| 1805 |
rag_block = ""
|
| 1806 |
+
if encoder and effective_rag:
|
| 1807 |
+
hr = hybrid_retrieve(encoder, chat_line, effective_rag, top_k=args.rag_top_k)
|
| 1808 |
if hr:
|
| 1809 |
trace.append(f"RAG:{len(hr)}chunk(s)")
|
| 1810 |
pieces = []
|
|
|
|
| 1818 |
)
|
| 1819 |
|
| 1820 |
if mem_conn:
|
| 1821 |
+
items = list_for_scope(mem_conn, cur_scope)
|
| 1822 |
if items:
|
| 1823 |
trace.append(f"mem:{len(items)}item(s)")
|
| 1824 |
mem_lines = []
|
|
|
|
| 1847 |
do_sample=True,
|
| 1848 |
)
|
| 1849 |
out = reply or "(empty generation)"
|
| 1850 |
+
show_trace_footer = (
|
| 1851 |
+
(not args.no_trace)
|
| 1852 |
+
and bool(ub_session.get("trace"))
|
| 1853 |
+
and (
|
| 1854 |
+
encoder is not None
|
| 1855 |
+
or mem_conn is not None
|
| 1856 |
+
or effective_rag is not None
|
| 1857 |
+
)
|
| 1858 |
+
)
|
| 1859 |
+
if show_trace_footer and trace:
|
| 1860 |
out += "\n\n---\n*Brain trace:* " + " · ".join(trace)
|
| 1861 |
|
| 1862 |
hist.append({"role": "user", "content": msg})
|
| 1863 |
hist.append({"role": "assistant", "content": out})
|
| 1864 |
+
return "", hist, ub_session
|
| 1865 |
|
| 1866 |
brain_bits = []
|
| 1867 |
if encoder:
|
|
|
|
| 1872 |
brain_bits.append("memory")
|
| 1873 |
brain_label = "+".join(brain_bits) if brain_bits else "LM only"
|
| 1874 |
|
| 1875 |
+
_css = """
|
| 1876 |
+
/* Space UX: keep the input compact and predictable. */
|
| 1877 |
+
#ub_input textarea { height: 120px !important; }
|
| 1878 |
+
"""
|
| 1879 |
+
with gr.Blocks(title="Universal Brain (chat prototype)", css=_css) as demo:
|
| 1880 |
gr.Markdown(
|
| 1881 |
"### Universal Brain — chat prototype\n"
|
| 1882 |
f"**Generative:** `{mid}` ({lm.device}) · **Brain layers:** {brain_label}\n\n"
|
| 1883 |
"**NL routing:** the model infers what you want (summarize, FAQ search, save note, …). "
|
| 1884 |
"Use **`--no-smart-route`** for plain chat-only + slash shortcuts. "
|
| 1885 |
"`/help` lists slash commands.\n\n"
|
| 1886 |
+
"**NL session controls:** say things like "
|
| 1887 |
+
"**`What is my current scope?`**, **`Start a new private session`**, **`Switch to scope my-key`**, "
|
| 1888 |
+
"**`Be brief`**, **`More detail please`**, **`Use bullet points`**, **`Reset reply style`**, "
|
| 1889 |
+
"**`Strict FAQ`** / **`Relaxed FAQ`** / **`Balanced FAQ`**, "
|
| 1890 |
+
"**`ELI5`** / **`Expert mode`**, **`TLDR first`** / **`Answer directly`**, "
|
| 1891 |
+
"**`Step by step`** / **`No numbered steps`**, **`Flag your assumptions`** / **`Be decisive`**, "
|
| 1892 |
+
"**`Suggest next steps`** / **`No follow-up questions`**, **`Definitions first`** / **`Intuition first`**, "
|
| 1893 |
+
"**`Include examples`** / **`Skip examples`**, **`Use pros and cons`** / **`Compare in flowing prose`**, **`Formal tone`** / **`Casual tone`**, **`Use code fences`** / **`Inline code only`**, "
|
| 1894 |
+
"**`Use analogies`** / **`No analogies`**, **`Spell out acronyms`** / **`Don't expand acronyms`**, "
|
| 1895 |
+
"**`Clarify first`** / **`No clarifying questions`**, **`No speculation`** / **`Brainstorm freely`**, "
|
| 1896 |
+
"**`Show your work`** / **`Final answer only`**, **`Answer in JSON`** / **`Plain text only`**, "
|
| 1897 |
+
"**`Be risk averse`** / **`Be pragmatic`**, **`Give me runnable commands`** / **`No commands`**, "
|
| 1898 |
+
"**`Quote the FAQ excerpts`** / **`Paraphrase only`**, **`Use tables`** / **`No tables`**, "
|
| 1899 |
+
"**`Use emoji`** / **`No emoji`**, **`Use section headings`** / **`Flat answer`**, "
|
| 1900 |
+
"**`Bold key terms`** / **`Minimal bold`**, **`Challenge my assumptions`** / **`Be supportive`**, "
|
| 1901 |
+
"**`Export my memories`**, **`Delete all my memories for this chat`**, **`Clear my session notes`**, "
|
| 1902 |
+
"**`Turn off FAQ context`**, **`Turn off smart routing`**, **`Show the brain trace`** "
|
| 1903 |
+
"(no slash command required). See the repo `README` for more example phrases.\n\n"
|
| 1904 |
"Encoder topics (Hub TinyModel1 ≈ AG News) still feed context and an optional *Brain trace* line; "
|
| 1905 |
"use `/classify` or ask naturally to see the full probability table in chat."
|
| 1906 |
)
|
| 1907 |
chat = gr.Chatbot(type="messages", height=520, label="Conversation", allow_tags=False)
|
| 1908 |
+
ub_state = gr.State(initial_ub_session)
|
| 1909 |
with gr.Row():
|
| 1910 |
inp = gr.Textbox(
|
| 1911 |
+
lines=4,
|
| 1912 |
+
max_lines=8,
|
| 1913 |
show_label=False,
|
| 1914 |
placeholder="Ask in plain language, or use /help …",
|
| 1915 |
scale=9,
|
| 1916 |
+
elem_id="ub_input",
|
| 1917 |
)
|
| 1918 |
go = gr.Button("Send", variant="primary", scale=1)
|
| 1919 |
gr.ClearButton([chat, inp])
|
| 1920 |
|
| 1921 |
+
def _submit(
|
| 1922 |
+
m: str,
|
| 1923 |
+
h: list[dict],
|
| 1924 |
+
s: dict[str, Any],
|
| 1925 |
+
) -> tuple[str, list[dict], dict[str, Any]]:
|
| 1926 |
+
return respond(m, h, s)
|
| 1927 |
+
|
| 1928 |
+
go.click(
|
| 1929 |
+
_submit,
|
| 1930 |
+
[inp, chat, ub_state],
|
| 1931 |
+
[inp, chat, ub_state],
|
| 1932 |
+
api_name="chat",
|
| 1933 |
+
api_description="Universal Brain chat endpoint (routing + optional RAG + memory + classifier context).",
|
| 1934 |
+
)
|
| 1935 |
+
inp.submit(_submit, [inp, chat, ub_state], [inp, chat, ub_state])
|
| 1936 |
|
| 1937 |
demo.queue(default_concurrency_limit=2)
|
| 1938 |
share = args.share
|
|
|
|
| 1944 |
server_port=args.port,
|
| 1945 |
share=share,
|
| 1946 |
ssr_mode=False,
|
| 1947 |
+
show_api=True,
|
| 1948 |
)
|
| 1949 |
except ValueError as e:
|
| 1950 |
err = str(e)
|