Spaces:

build-small-hackathon
/

sidechat

Sleeping

App Files Files Community

Port steganacrostics to a Gradio app; retarget to MiniCPM5-1B

by lsb - opened 12 days ago

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+1479

-59

Files changed (12) hide show

.gitignore +16 -0
README.md +50 -5
app.py +334 -54
classifier.py +124 -0
crossing_search.py +292 -0
eval_classifier.py +185 -0
grammar.py +169 -0
logits.py +84 -0
masking.py +53 -0
requirements.txt +5 -0
sweep_minicpm.py +121 -0
tokinfo.py +46 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# Python venv (huge: torch, transformers, …) — never commit
+.venv/
+venv/
+# Byte-compiled / caches
+__pycache__/
+*.py[cod]
+*.egg-info/
+.ipynb_checkpoints/
+# Local model / HF caches (models are downloaded at runtime)
+.cache/
+hf_cache/
+# OS cruft
+.DS_Store

README.md CHANGED Viewed

@@ -4,14 +4,59 @@ emoji: 💬
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
-sdk_version: 6.5.1
 app_file: app.py
 pinned: false
-hf_oauth: true
-hf_oauth_scopes:
-- inference-api
 license: apache-2.0
 short_description: Completely normal text assistant, with talking on the side
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 colorFrom: yellow
 colorTo: purple
 sdk: gradio
+sdk_version: 6.18.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 short_description: Completely normal text assistant, with talking on the side
+tags:
+  - track:wood
+  - sponsor:openbmb
+  - achievement:offgrid
+  - achievement:sharing
+  - achievement:fieldnotes
 ---
+# Side chat
+A Gradio port of the browser steganacrostics app. A completely normal text
+assistant — except every line of the answer secretly starts with the next
+letter of a hidden **secret** word (an acrostic). It does this with
+grammar-constrained decoding over a small local model (`openbmb/MiniCPM5-1B` by
+default; set `SIDECHAT_MODEL=LiquidAI/LFM2.5-350M` for the smaller, faster
+original), running on **CPU** via PyTorch `transformers`.
+What's ported from the JavaScript original (`../../src/`):
+- **Grammar engine** (`grammar.py`) — a tiny NFA that pins each line to its
+  forced first letter, with optional ` * ` bullets and a max line length.
+- **Constrained generation** (`logits.py` + `masking.py`) — a `LogitsProcessor`
+  that masks every token that would break the acrostic; EOS only at an accept
+  state. A state-keyed cache makes the per-step vocab scan cheap.
+- **List-vs-prose classifier** (`classifier.py`) — an optimized prompt,
+  grammar-constrained to `list.` / `story.`, that auto-picks the render mode.
+  The prompt is tuned per model: failure modes are model-specific, so
+  `eval_classifier.py` (50 list + 50 prose prompts) and `sweep_minicpm.py`
+  re-optimize it for whatever model is in use.
+- **Local-crossing search** (`crossing_search.py`) — the "extra attention at
+  the constraint": generate each prose line greedily, then choose where to
+  break it so a short window straddling the crossing (last *k* tokens + forced
+  letter + next *j* tokens) reads best. Plus stealth lowercase casing and a
+  minimum line length.
+Run locally:
+```
+pip install -r requirements.txt
+python app.py
+```
+Then open the printed URL, type a prompt, set a secret in ⚙️ Settings, and click
+Generate. The list-vs-prose classifier runs automatically on each Generate (turn
+it off in ⚙️ Settings to set the render mode by hand, or use 🔎 Detect to preview
+it). Because everything runs on CPU, generation takes seconds (more for the
+larger model); the crossing search trades extra time for smoother prose.
+The model is downloaded from the Hugging Face Hub on first run. Custom logits
+processing requires the model to run in-process, so this app does not use the
+remote Inference API.

app.py CHANGED Viewed

@@ -1,69 +1,349 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
-    demo.launch()

+"""Side chat — a Gradio port of the browser steganacrostics app.
+Completely normal text assistant, with a secret talking on the side: every line
+of the answer starts with successive letters of a hidden "secret" word (an
+acrostic), produced by grammar-constrained decoding. A list-vs-prose classifier
+auto-picks the render mode, and an optional local-crossing search spends extra
+attention at each constraint cliff so the forced letters read as the natural
+next word.
+Runs the model locally on CPU with PyTorch transformers (the remote Inference
+API can't do custom logits processing, which is the whole point here).
+"""
+from __future__ import annotations
+import os
+import re
+import threading
+import queue
+import time
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    LogitsProcessorList,
+    TextIteratorStreamer,
+)
 import gradio as gr
+from grammar import compile_acrostic, union_grammars
+from logits import GrammarLogitsProcessor, build_token_text_table
+from tokinfo import build_tok_info
+from classifier import classify, DEFAULT_VARIANT
+from crossing_search import generate_crossing_search
+# Default to MiniCPM5-1B (OpenBMB); override with SIDECHAT_MODEL, e.g.
+# SIDECHAT_MODEL=LiquidAI/LFM2.5-350M for the smaller, faster original.
+MODEL_ID = os.environ.get("SIDECHAT_MODEL", "openbmb/MiniCPM5-1B")
+DEVICE = "cpu"  # pure CPU by request
+LIST_SYSTEM = (
+    "You are a helpful assistant. Answer as a plain bulleted list — one short "
+    "item per line. Do not use markdown, bold text, headings, code, or numbered "
+    "lists."
+)
+PROSE_SYSTEM = (
+    "You are a helpful assistant. Answer in plain prose. Do not use markdown, "
+    "bold text, headings, code, or bulleted/numbered lists."
+)
+class Context:
+    """Everything the generation + classifier code needs, built once at startup."""
+    def __init__(self):
+        print(f"loading {MODEL_ID} on {DEVICE}…", flush=True)
+        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+        self.model = (
+            AutoModelForCausalLM.from_pretrained(MODEL_ID, dtype=torch.float32)
+            .to(DEVICE)
+            .eval()
+        )
+        self.model.device  # noqa: B018 (touch to confirm)
+        vocab = self.model.config.vocab_size
+        t0 = time.perf_counter()
+        self.token_text = build_token_text_table(self.tokenizer, vocab)
+        print(f"token table built in {time.perf_counter() - t0:.1f}s ({vocab} tokens)", flush=True)
+        eos = set()
+        def add_eos(x):
+            if x is None:
+                return
+            if isinstance(x, (list, tuple)):
+                for y in x:
+                    add_eos(y)
+            else:
+                eos.add(int(x))
+        add_eos(self.tokenizer.eos_token_id)
+        add_eos(getattr(self.model.config, "eos_token_id", None))
+        add_eos(getattr(self.model.generation_config, "eos_token_id", None))
+        self.eos_token_ids = sorted(eos)
+        pad = self.tokenizer.pad_token_id
+        if pad is None:
+            pad = getattr(self.model.generation_config, "pad_token_id", None)
+        if pad is None:
+            pad = self.eos_token_ids[0]
+        self.pad_token_id = int(pad)
+        self.tok_info = build_tok_info(self.token_text, self.eos_token_ids)
+        print("context ready.", flush=True)
+CTX = Context()
+# Case-insensitive acrostic; in list mode the very first ` * ` prefix is optional
+# (some models start with a preamble-free letter, others don't).
+def build_grammar(secret, list_mode, max_line):
+    if not list_mode:
+        return compile_acrostic(secret, list_prefix="", max_line=max_line, case_insensitive=True)
+    with_prefix = compile_acrostic(
+        secret, list_prefix=" * ", max_line=max_line, case_insensitive=True, first_line_prefix=True
+    )
+    without_prefix = compile_acrostic(
+        secret, list_prefix=" * ", max_line=max_line, case_insensitive=True, first_line_prefix=False
+    )
+    return union_grammars([with_prefix, without_prefix])
+def check_acrostic(output, secret):
+    """Find a window of line-initial letters matching the secret (case-insensitive,
+    bullets stripped). Returns (ok, firsts)."""
+    lines = [l.strip() for l in output.split("\n")]
+    lines = [l for l in lines if l]
+    def strip(l):
+        return re.sub(r"^\*?\s*", "", l)
+    firsts = [(strip(l)[:1] or "") for l in lines]
+    n = len(secret)
+    for i in range(0, len(firsts) - n + 1):
+        if all(firsts[i + j].lower() == secret[j].lower() for j in range(n)):
+            return True, "".join(firsts[i:i + n])
+    return False, "".join(firsts)
+# --- Classifier: drive the list/prose checkbox ------------------------------
+def classify_fn(prompt):
+    if not (prompt or "").strip():
+        return gr.update(), "enter a prompt to detect list vs. prose"
+    pred, raw = classify(CTX, prompt, DEFAULT_VARIANT)
+    label = "list" if pred else "prose"
+    return pred, f"detected **{label}** (classifier raw: {raw!r})"
+def maybe_detect(prompt, list_mode, auto_detect):
+    """Runs before Generate: when auto-detect is on, classify the prompt and set
+    the list/prose checkbox from it. Otherwise leave the manual choice alone."""
+    if auto_detect and (prompt or "").strip():
+        pred, raw = classify(CTX, prompt, DEFAULT_VARIANT)
+        return pred, f"detected **{'list' if pred else 'prose'}** (raw {raw!r}) — generating…"
+    return list_mode, gr.update()
+# --- Generation -------------------------------------------------------------
+def _run_in_thread(target):
+    """Run target() in a daemon thread; return a queue it pushes to. target
+    receives the queue and must push a None sentinel when finished."""
+    q = queue.Queue()
+    threading.Thread(target=target, args=(q,), daemon=True).start()
+    return q
+def generate_fn(prompt, secret, list_mode, max_line, crossing, k, j, R, min_line):
+    # Strip spaces: a multi-word secret spells its letters across lines; spaces
+    # would force odd punctuation-prefixed "word-break" lines. The field still
+    # shows the spaced version; the acrostic uses only the letters.
+    secret = re.sub(r"\s+", "", (secret or "").strip())
+    if not secret:
+        yield "(secret is empty — open ⚙️ Settings and set one)", "", ""
+        return
+    list_mode = bool(list_mode)
+    max_line = max(1, int(max_line or 80))
+    system_prompt = LIST_SYSTEM if list_mode else PROSE_SYSTEM
+    try:
+        grammar = build_grammar(secret, list_mode, max_line)
+    except Exception as e:  # noqa: BLE001
+        yield f"grammar build error: {e}", "", ""
+        return
+    # --- Local-crossing search (prose only) ---------------------------------
+    if crossing and not list_mode:
+        k = max(0, int(k or 4))
+        j = max(0, int(j or 3))
+        R = max(0, int(R or 4))
+        min_line = min(max_line, max(0, int(min_line or 30)))
+        status = f"generating (local-crossing search · k={k}, j={j}, R={R}, minLine={min_line})…"
+        committed = [""]
+        t0 = time.perf_counter()
+        def worker(q):
+            def on_line(line_text, info):
+                committed[0] += line_text
+                q.put(committed[0])
+            try:
+                res = generate_crossing_search(
+                    CTX, grammar, secret, max_line, prompt, system_prompt,
+                    k=k, j=j, R=R, min_line=min_line, on_line=on_line,
+                )
+                q.put(("done", res))
+            except Exception as e:  # noqa: BLE001
+                q.put(("error", str(e)))
+            q.put(None)
+        q = _run_in_thread(worker)
+        result = None
+        yield "", "", status
+        while True:
+            item = q.get()
+            if item is None:
+                break
+            if isinstance(item, tuple) and item[0] == "done":
+                result = item[1]
+            elif isinstance(item, tuple) and item[0] == "error":
+                yield committed[0], f"error: {item[1]}", "error"
+            else:
+                yield item, "", status
+        elapsed = time.perf_counter() - t0
+        text = result["text"] if result else committed[0]
+        per_line = result["per_line"] if result else []
+        n_moved = sum(1 for p in per_line if p.get("r", 0) > 0)
+        ok, firsts = check_acrostic(text, secret)
+        metrics = (
+            f"local-crossing · {elapsed:.2f}s · {len(per_line)} lines · "
+            f"{n_moved} breaks moved · acrostic {'OK' if ok else 'MISS'} ({firsts})"
+        )
+        yield text, metrics, "done (local-crossing search)."
+        return
+    # --- Plain grammar-constrained greedy (token-streamed) ------------------
+    proc = GrammarLogitsProcessor(grammar, CTX.tokenizer, CTX.token_text, CTX.eos_token_ids)
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    enc = CTX.tokenizer.apply_chat_template(
+        messages, add_generation_prompt=True, return_tensors="pt", return_dict=True
+    ).to(DEVICE)
+    streamer = TextIteratorStreamer(CTX.tokenizer, skip_prompt=True, skip_special_tokens=True)
+    gen_kwargs = dict(
+        **enc,
+        max_new_tokens=400,
+        do_sample=False,
+        logits_processor=LogitsProcessorList([proc]),
+        streamer=streamer,
+        pad_token_id=CTX.pad_token_id,
+    )
+    t0 = time.perf_counter()
+    thread = threading.Thread(target=CTX.model.generate, kwargs=gen_kwargs, daemon=True)
+    thread.start()
+    acc = ""
+    t_first = None
+    tokens = 0
+    chars = 0
+    yield "", "", "generating (grammar-constrained)…"
+    for chunk in streamer:
+        if not chunk:
+            continue
+        if t_first is None:
+            t_first = time.perf_counter()
+        tokens += 1
+        chars += len(chunk)
+        acc += chunk
+        gen_s = max(0.001, time.perf_counter() - t_first)
+        tps = tokens / gen_s
+        ttft = (t_first - t0)
+        yield acc, f"TTFT {ttft:.2f}s · ~{tps:.1f} tok/s · {tokens} tokens · {chars} chars", "generating…"
+    thread.join()
+    wall = time.perf_counter() - t0
+    s = proc.stats
+    proc_ms = s["total_ms"]
+    ttft = (t_first - t0) if t_first else 0.0
+    ok, firsts = check_acrostic(acc, secret)
+    metrics = (
+        f"TTFT {ttft:.2f}s · {tokens} tokens · {chars} chars · wall {wall:.2f}s · "
+        f"mask {proc_ms:.0f}ms ({(proc_ms/1000)/wall*100:.0f}%) · "
+        f"acrostic {'OK' if ok else 'MISS'} ({firsts})"
+    )
+    yield acc, metrics, "done. edit the secret and/or prompt and click Generate again."
+# --- UI ---------------------------------------------------------------------
+with gr.Blocks(title="Side chat") as demo:
+    gr.Markdown("# Side chat")
+    gr.Markdown(
+        "Completely normal text assistant, with talking on the side. Each line "
+        "of the answer secretly starts with the next letter of your **secret** "
+        f"word — grammar-constrained decoding on `{MODEL_ID}`, running locally on CPU."
+    )
+    prompt = gr.Textbox(label="Prompt", value="what are some easy-to-make home recipes?", lines=2)
+    gr.Examples(
+        examples=[
+            ["what are some easy-to-make home recipes?"],
+            ["please write a few sentences about regular expressions"],
+        ],
+        inputs=prompt,
+        label="Demo prompts (one detects as a list, one as prose)",
+    )
+    run = gr.Button("Generate", variant="primary")
+    output = gr.Textbox(label="Output", lines=10, interactive=False)
+    metrics = gr.Markdown("")
+    with gr.Accordion("⚙️ Settings", open=False):
+        secret = gr.Textbox(
+            label="Secret (each line will start with these letters)", value="subtle"
+        )
+        auto_detect = gr.Checkbox(
+            label="auto-detect list vs. prose on Generate (LLM classifier)",
+            value=True,
+        )
+        list_mode = gr.Checkbox(
+            label="render as bulleted list (each line prefixed with ` * `) — "
+            "set by auto-detect; uncheck auto-detect to set it manually",
+            value=True,
+        )
+        # Manual preview: run the classifier without generating (debug aid).
+        detect = gr.Button("🔎 Detect list / prose (preview only)", size="sm")
+        max_line = gr.Number(label="Max chars per line (after the prefix + letter)", value=80, precision=0)
+        gr.Markdown("**Local-crossing search** (prose only) — extra attention at each constraint cliff")
+        crossing = gr.Checkbox(
+            label="enable local-crossing search (greedy line, then pick the break "
+            "that makes the crossing read best; list mode stays greedy)",
+            value=False,
+        )
+        win_k = gr.Number(label="↳ window before the break (k content tokens)", value=4, precision=0)
+        win_j = gr.Number(label="↳ window after the forced letter (j content tokens)", value=3, precision=0)
+        max_rewind = gr.Number(label="↳ max tokens to trim the break earlier (R; 0 = greedy)", value=4, precision=0)
+        min_line = gr.Number(label="↳ min chars per line (avoid stubby lines; 0 = off)", value=30, precision=0)
+        status = gr.Markdown("ready.")
+    # Manual preview: detect list vs. prose without generating.
+    detect.click(classify_fn, [prompt], [list_mode, status])
+    prompt.submit(classify_fn, [prompt], [list_mode, status])
+    # Generate: auto-detect first (updates the checkbox), then generate using it.
+    run.click(
+        maybe_detect, [prompt, list_mode, auto_detect], [list_mode, status]
+    ).then(
+        generate_fn,
+        [prompt, secret, list_mode, max_line, crossing, win_k, win_j, max_rewind, min_line],
+        [output, metrics, status],
+    )
 if __name__ == "__main__":
+    demo.queue().launch(theme=gr.themes.Soft())

classifier.py ADDED Viewed

	@@ -0,0 +1,124 @@

+"""List-vs-prose classifier (Python port of the shipped part of src/eval.js).
+The classifier reads the user's prompt and decides whether the answer is best
+rendered as a bulleted list or as narrative prose. It is itself an LLM call,
+grammar-constrained to exactly one of two literal completions: apply the chat
+template, append a partial assistant response (the `prefill`), constrain
+generation to one of `branches`, parse the result.
+Failure modes are model-specific, so the prompt is tuned per model. The default
+here is the MiniCPM5-1B winner (`minicpm_intent_write_sp`, 96% on the 100-prompt
+suite) found by re-running the sweep (eval_classifier.py / sweep_minicpm.py) on
+that model. The LFM2.5-350M winner (`r6_c1_v2_single_plural`, 97.5% dev / 85%
+val) is kept as an alternate — it is *prose-biased* on MiniCPM (~75%), so don't
+reuse it there. See CLASSIFIER_PROMPT_OPTIMIZATION.md for the original JS sweep.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Callable, List
+import torch
+from transformers import LogitsProcessorList
+from grammar import compile_literal, union_grammars
+from logits import GrammarLogitsProcessor
+@dataclass
+class Variant:
+    name: str
+    system: str
+    prefill: str
+    branches: List[str]
+    parse: Callable[[str], bool]  # raw generated text -> True (list) / False (prose)
+# Shared trigger-rule strings.
+_INTENT_BASE = (
+    "Classify the user's intent. Use \"list\" when the answer is a set of "
+    "separate items the user can scan. Use \"story\" when the answer flows as "
+    "one narrative, single fact, or short paragraph."
+)
+_WRITE_FORMS = (
+    " Whenever the user asks to \"write\" or \"compose\" a haiku, poem, letter, "
+    "cover letter, email, joke, story, essay, or limerick, the answer is a story."
+)
+_SINGLE_PLURAL = (
+    " \"What is X\" (a single fact) is a story; \"What are the/some Xs\" (plural "
+    "enumeration) is a list; \"what are the steps/differences/causes/symptoms\" "
+    "is a list."
+)
+# --- The shipped MiniCPM5-1B winner -----------------------------------------
+# On MiniCPM, every "Default to list" framing collapses to all-story (list 0/50)
+# and the LFM2 winner is prose-biased. A neutral *intent* framing nails list
+# recall; adding the write-forms rule (catches "write a haiku/email") and the
+# single-vs-plural rule (catches "what is X" single facts) fixes the residual
+# prose misses. 96% on the 100-prompt suite (list 49/50, prose 47/50).
+DEFAULT_VARIANT = Variant(
+    name="minicpm_intent_write_sp",
+    system=_INTENT_BASE + _WRITE_FORMS + _SINGLE_PLURAL,
+    prefill="The intent is to get a ",
+    branches=["list.", "story."],
+    parse=lambda s: s.startswith("list"),
+)
+# --- Reference alternates (other strong variants; useful when re-tuning) -----
+ALTERNATES = [
+    # The LFM2.5-350M winner (97.5% dev / 85% val on LFM2; ~75% on MiniCPM).
+    Variant(
+        name="r6_c1_v2_single_plural",
+        system=(
+            "Classify the user's request. Use \"list\" when the user wants "
+            "enumerated items. Use \"story\" for everything else. \"What is X\" "
+            "(a single fact) is a story; \"What are the/some Xs\" (plural "
+            "enumeration) is a list; \"what are the steps/differences/causes/"
+            "symptoms\" is a list."
+        ),
+        prefill="The user is asking for a ",
+        branches=["list.", "story."],
+        parse=lambda s: s.startswith("list"),
+    ),
+    # Intent base + single-plural only (100% screen, 93% full on MiniCPM;
+    # perfect list recall but misses some "write a X" prose prompts).
+    Variant(
+        name="minicpm_intent_sp",
+        system=_INTENT_BASE + _SINGLE_PLURAL,
+        prefill="The intent is to get a ",
+        branches=["list.", "story."],
+        parse=lambda s: s.startswith("list"),
+    ),
+]
+VARIANTS = [DEFAULT_VARIANT, *ALTERNATES]
+def classify(ctx, prompt, variant=DEFAULT_VARIANT):
+    """Run one classifier call. ctx is a Context (see app.py): .model,
+    .tokenizer, .token_text, .eos_token_ids. Returns (prediction, raw)."""
+    tok = ctx.tokenizer
+    messages = [
+        {"role": "system", "content": variant.system},
+        {"role": "user", "content": prompt},
+    ]
+    templated = tok.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    full_text = templated + variant.prefill
+    grammar = union_grammars([compile_literal(b) for b in variant.branches])
+    proc = GrammarLogitsProcessor(grammar, tok, ctx.token_text, ctx.eos_token_ids)
+    enc = tok(full_text, return_tensors="pt", add_special_tokens=False).to(ctx.model.device)
+    with torch.no_grad():
+        out = ctx.model.generate(
+            **enc,
+            max_new_tokens=16,
+            do_sample=False,
+            logits_processor=LogitsProcessorList([proc]),
+            pad_token_id=ctx.pad_token_id,
+        )
+    raw = tok.decode(out[0][enc["input_ids"].shape[1]:], skip_special_tokens=True)
+    return variant.parse(raw), raw

crossing_search.py ADDED Viewed

	@@ -0,0 +1,292 @@

+"""Local-crossing-objective search for acrostics (Python port of
+src/crossingSearch.js + the LineMaskScore / NewlineStop bits of
+src/surprisalLookahead.js).
+This is the "extra attention when we come to a constraint": the forced first
+letter of each line is a cliff where the constraint fights what the model wants
+to say. Greedy is a strong baseline; search only beats it when the *objective*
+is right. So we change only the objective:
+  1. Score a SHORT fixed window straddling the crossing — the last `k` content
+     tokens before the break, plus the forced letter and the next `j` tokens.
+     Length-neutral; the structural newline is never scored, so there's no
+     run-to-the-wall bias.
+  2. Look `j` tokens PAST the forced letter (does the next line *continue*
+     well?), not just at it.
+  3. Make the break point a search variable, snapped to word boundaries:
+     generate the line greedily, then consider ending it 0..R tokens earlier.
+     r=0 (greedy) is always a candidate, so this can only match or beat greedy.
+Plus two stealth touches carried in LineMaskScore: lowercase the forced letter
+mid-sentence (so the acrostic hides), and a minimum line length (no stubby
+lines). Public-API only: each line/rollout is a fresh model.generate()
+continuation of the chat-templated prompt + committed text fed back as a string.
+"""
+from __future__ import annotations
+import re
+import numpy as np
+import torch
+from transformers import LogitsProcessor, LogitsProcessorList, StoppingCriteria, StoppingCriteriaList
+from masking import LegalCache
+# Mid-sentence iff the text so far doesn't end a sentence — then the next forced
+# letter should be lowercase. Empty prefix (line 0) is a sentence start.
+_SENTENCE_END = re.compile(r"[.!?][\"'”’)\]]?$")
+_WORD_START = re.compile(r"^[\s.,;:!?)\]\"'’”]")
+def mid_sentence(t):
+    s = re.sub(r"\s+$", "", t or "")
+    return len(s) > 0 and not _SENTENCE_END.search(s)
+class LineMaskScore(LogitsProcessor):
+    """Grammar-masking processor that also accumulates the greedy chosen-token
+    log-prob per step, applies stealth casing + minimum line length, and (when
+    capture_top_n > 0, at the first step) records the top-N legal openings and a
+    surprise signal."""
+    def __init__(self, grammar, start_state, tokenizer, token_text, tok_info, cache,
+                 capture_top_n=0, force_lower_first=False, min_line=0):
+        super().__init__()
+        self.grammar = grammar
+        self.start_state = start_state
+        self.tokenizer = tokenizer
+        self.token_text = token_text
+        self.info = tok_info
+        self.cache = cache
+        self.capture_top_n = capture_top_n
+        self.force_lower_first = force_lower_first
+        self.min_line = min_line
+        self.prompt_length = None
+        self.step_logprobs = []   # chosen (argmax) log-prob, one per generated step
+        self.top_n = None
+        self.surprise = None
+    def __call__(self, input_ids, scores):
+        ids = input_ids[0]
+        if self.prompt_length is None:
+            self.prompt_length = ids.shape[0]
+        generated = ids[self.prompt_length:].tolist()
+        gen = (
+            self.tokenizer.decode(generated, skip_special_tokens=True)
+            if generated
+            else ""
+        )
+        state = self.grammar.advance(self.start_state, gen)
+        data = scores[0]
+        if state == -1:
+            self.step_logprobs.append(0.0)
+            return scores
+        first_step = self.top_n is None and self.surprise is None
+        want_signal = self.capture_top_n and first_step
+        lse_all = None
+        if want_signal:
+            lse_all = torch.logsumexp(data, dim=0).item()
+        # Grammar-legal set (shared, state-cached), then stealth + minLine on top.
+        legal = self.cache.legal_np(state).copy()
+        info = self.info
+        if self.force_lower_first and len(gen) == 0:
+            if np.any(legal & info.alpha_lower):
+                legal &= ~info.alpha_upper
+        if self.min_line and len(gen) < self.min_line:
+            body = legal & ~info.eos_mask & info.nonempty & ~info.has_newline
+            if np.any(body):
+                legal &= ~(info.eos_mask | info.has_newline)
+        illegal = torch.from_numpy(~legal).to(data.device)
+        data[illegal] = float("-inf")
+        max_legal = torch.max(data)
+        if max_legal.item() == float("-inf"):
+            self.step_logprobs.append(0.0)
+            return scores
+        lse_masked = torch.logsumexp(data, dim=0)
+        self.step_logprobs.append((max_legal - lse_masked).item())
+        if want_signal:
+            self.surprise = lse_all - max_legal.item()
+            legal_idx = np.nonzero(legal)[0]
+            logits = data[torch.from_numpy(legal_idx).to(data.device)]
+            order = torch.argsort(logits, descending=True)[: self.capture_top_n]
+            lse_m = lse_masked.item()
+            self.top_n = [
+                {"id": int(legal_idx[int(o)]), "logit": float(logits[int(o)]),
+                 "logprob": float(logits[int(o)]) - lse_m}
+                for o in order
+            ]
+        return scores
+class NewlineStop(StoppingCriteria):
+    """Stop a rollout as soon as the newly generated token contains a newline."""
+    def __init__(self, prompt_length, has_newline):
+        super().__init__()
+        self.prompt_length = prompt_length
+        self.has_newline = has_newline
+    def __call__(self, input_ids, scores, **kwargs):
+        out = []
+        for ids in input_ids:
+            if ids.shape[0] <= self.prompt_length:
+                out.append(False)
+            else:
+                out.append(bool(self.has_newline[int(ids[-1])]))
+        return torch.tensor(out, dtype=torch.bool, device=input_ids.device)
+def _mean(a):
+    return sum(a) / len(a) if a else 0.0
+def generate_crossing_search(ctx, grammar, secret, max_line, prompt, system_prompt,
+                             k=4, j=3, R=4, min_line=30, on_line=None):
+    """Generate acrostic text with the local-crossing search. Returns
+    {"text": str, "per_line": [...]}. on_line(line_text, info) is called as each
+    line is committed (for incremental display)."""
+    model = ctx.model
+    tok = ctx.tokenizer
+    token_text = ctx.token_text
+    info = ctx.tok_info
+    cache = LegalCache(grammar, token_text, ctx.eos_token_ids)  # shared across rollouts
+    has_newline = info.has_newline
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    prompt_string = tok.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    def enc_ids(text):
+        return tok(text, add_special_tokens=False).input_ids
+    def generate_from(text, max_new_tokens, proc, stop_newline):
+        enc = tok(text, return_tensors="pt", add_special_tokens=False).to(model.device)
+        stops = None
+        if stop_newline:
+            stops = StoppingCriteriaList(
+                [NewlineStop(enc["input_ids"].shape[1], has_newline)]
+            )
+        with torch.no_grad():
+            out = model.generate(
+                **enc,
+                max_new_tokens=max_new_tokens,
+                do_sample=False,
+                logits_processor=LogitsProcessorList([proc]),
+                stopping_criteria=stops,
+                pad_token_id=ctx.pad_token_id,
+            )
+        gen_ids = out[0][enc["input_ids"].shape[1]:]
+        return tok.decode(gen_ids, skip_special_tokens=True)
+    # Greedy line from prefix_text (acrostic text so far).
+    def gen_line(prefix_text, is_last):
+        start_state = grammar.advance(grammar.initial, prefix_text)
+        ctx_str = prompt_string + prefix_text
+        proc = LineMaskScore(
+            grammar, start_state, tok, token_text, info, cache,
+            force_lower_first=mid_sentence(prefix_text), min_line=min_line,
+        )
+        text = generate_from(ctx_str, max_line + 8, proc, stop_newline=not is_last)
+        if not is_last:
+            nl = text.find("\n")
+            if nl != -1:
+                text = text[: nl + 1]
+        base_n = len(enc_ids(ctx_str))
+        line_ids = enc_ids(ctx_str + text)[base_n:]
+        return text, line_ids, proc.step_logprobs
+    # Roll the NEXT line's opening: forced letter + up to n-1 content tokens.
+    def roll_open(prefix_text, n):
+        start_state = grammar.advance(grammar.initial, prefix_text)
+        if start_state == -1:
+            return []
+        proc = LineMaskScore(
+            grammar, start_state, tok, token_text, info, cache,
+            force_lower_first=mid_sentence(prefix_text),
+        )
+        generate_from(prompt_string + prefix_text, n, proc, stop_newline=True)
+        return proc.step_logprobs
+    n_lines = len(secret)
+    committed = ""
+    per_line = []
+    for i in range(n_lines):
+        is_last = i == n_lines - 1
+        text, line_ids, logps = gen_line(committed, is_last)
+        # Last line, or no break search: commit the greedy line as-is.
+        if is_last or R <= 0:
+            committed += text
+            per_line.append({"line": i, "chosen": text, "r": 0, "candidates": None})
+            if on_line:
+                on_line(text, {"line": i})
+            continue
+        m = min(len(line_ids), len(logps))
+        ids = line_ids[-m:] if m else []
+        lps = logps[-m:] if m else []
+        has_nl = text.endswith("\n")
+        line_start_state = grammar.advance(grammar.initial, committed)
+        candidates = []
+        for r in range(0, min(R, m - 1) + 1):
+            # r tokens trimmed -> break after (m-r) tokens. Require the first
+            # trimmed token to begin a new word/punctuation (clean boundary).
+            if r > 0:
+                first_trimmed = token_text[ids[m - r]]
+                if not first_trimmed or not _WORD_START.match(first_trimmed):
+                    continue
+            kept_ids = ids[: m - r]
+            if not kept_ids:
+                continue
+            prefix_text = re.sub(r"\n+$", "", tok.decode(kept_ids, skip_special_tokens=True))
+            broke_line = prefix_text + "\n"
+            # The trimmed line must still be grammar-legal (keep the forced letter).
+            if grammar.advance(line_start_state, broke_line) == -1:
+                continue
+            if r > 0 and len(prefix_text) < min_line:
+                continue
+            before_lps = lps[: m - r]
+            if r == 0 and has_nl:
+                before_lps = before_lps[:-1]
+            before_lps = before_lps[-k:]
+            after_lps = roll_open(committed + broke_line, 1 + j)
+            candidates.append({
+                "r": r,
+                "broke_line": broke_line,
+                "score": _mean(before_lps + after_lps),
+                "n_before": len(before_lps),
+                "n_after": len(after_lps),
+                "preview": broke_line[-28:],
+            })
+        chosen, r = text, 0
+        if candidates:
+            candidates.sort(key=lambda c: c["score"], reverse=True)
+            chosen = candidates[0]["broke_line"]
+            r = candidates[0]["r"]
+        committed += chosen
+        per_line.append({"line": i, "chosen": chosen, "r": r, "candidates": candidates})
+        if on_line:
+            on_line(chosen, {"line": i})
+    return {"text": committed, "per_line": per_line}

eval_classifier.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""List-vs-prose classifier eval harness (Python port of the dataset + runner
+from src/eval.js).
+50 list-style + 50 prose-style hand-picked prompts, split 10+10 validation /
+40+40 dev. Run this as a script to sweep candidate classifier variants on the
+current model and pick the best one for it:
+    SIDECHAT_MODEL=openbmb/MiniCPM5-1B .venv/bin/python eval_classifier.py
+It prints a ranking table (dev accuracy, list-recall, prose-recall) and then
+validates the top variants on the held-out set. The winner becomes
+classifier.DEFAULT_VARIANT.
+"""
+from __future__ import annotations
+import time
+from classifier import Variant, classify
+# ---------------------------------------------------------------------------
+# Datasets (ported verbatim from src/eval.js)
+# ---------------------------------------------------------------------------
+LIST_PROMPTS = [
+    # --- validation (first 10) ---
+    "list 10 ways to improve morale at work",
+    "give me five reasons to learn Rust",
+    "what are the main benefits of meditation?",
+    "suggest some names for my new puppy",
+    "name three famous jazz musicians",
+    "list the ingredients for guacamole",
+    "what are the steps to change a tire?",
+    "give me ideas for weekend activities with kids",
+    "tips for packing light when traveling",
+    "what are some common Italian desserts?",
+    # --- dev (next 40) ---
+    "list popular video game consoles from the 1990s",
+    "suggest questions to ask at a job interview",
+    "what are the symptoms of dehydration?",
+    "name ten countries in Africa",
+    "list some movies directed by Christopher Nolan",
+    "give me seven examples of onomatopoeia",
+    "what tools do I need to build a raised garden bed?",
+    "suggest some icebreaker activities for a team meeting",
+    "ways to reduce food waste at home",
+    "list the planets in order from the sun",
+    "what are the main differences between Python 2 and Python 3?",
+    "give me 5 good podcast recommendations about history",
+    "name three types of dance",
+    "top tourist attractions in Kyoto",
+    "list common symptoms of the flu",
+    "what are some healthy snack ideas for kids?",
+    "suggest some books similar to The Hobbit",
+    "name five spices commonly used in Indian cooking",
+    "list programming languages that compile to WebAssembly",
+    "give me a list of yoga poses for beginners",
+    "what are some good stretches before running?",
+    "name the colors of the rainbow",
+    "list the months of the year in French",
+    "what are common causes of burnout?",
+    "suggest some romantic date ideas in New York",
+    "give me a bullet list of home safety tips",
+    "list the bones in the human hand",
+    "ways to learn a new language quickly",
+    "name five mammals native to Australia",
+    "what are some highlights of the French Revolution?",
+    "list common pitfalls of distributed systems",
+    "top 10 songs from the 1980s",
+    "suggest some hobbies for introverts",
+    "name the original members of The Beatles",
+    "what are the primary colors?",
+    "list reasons to adopt a cat",
+    "give me 6 tips for better sleep hygiene",
+    "name the Great Lakes",
+    "list programming concepts every developer should know",
+    "suggest some vegan dinner recipes",
+]
+PROSE_PROMPTS = [
+    # --- validation (first 10) ---
+    "tell me a short story about a lighthouse keeper",
+    "write a haiku about autumn",
+    "explain how a solar panel works in a paragraph",
+    "summarize the plot of Pride and Prejudice",
+    'what does the word "quixotic" mean?',
+    'translate "good morning" to Japanese',
+    "write a professional email declining a meeting",
+    "describe the taste of a ripe mango",
+    "compose a poem about loneliness",
+    "what is the capital of Australia?",
+    # --- dev (next 40) ---
+    "tell me about the invention of the printing press",
+    "write a cover letter for a software engineering role",
+    "explain the theory of relativity to a 10-year-old",
+    "who was Marie Curie?",
+    "describe a sunset over the ocean",
+    "what is photosynthesis?",
+    "write a bedtime story for a 4-year-old",
+    "explain how blockchain works",
+    "tell me about the history of tea in China",
+    "describe the plot of Inception",
+    "write a haiku about the sea",
+    "what is the meaning of life according to Camus?",
+    "tell me a joke about programming",
+    "explain why the sky is blue",
+    "describe what it feels like to run a marathon",
+    "write a love letter in the style of Shakespeare",
+    "what year did the Berlin Wall fall?",
+    "tell me about the architecture of the Sagrada Familia",
+    "write a persuasive essay on renewable energy",
+    "describe the personality of a golden retriever",
+    "who was the first person on the moon?",
+    "tell me about quantum entanglement briefly",
+    "write a one-paragraph synopsis of The Great Gatsby",
+    'what is the etymology of the word "sandwich"?',
+    "explain why we dream",
+    "tell me a myth about the origin of fire",
+    "describe the feeling of nostalgia",
+    "write a toast for a wedding",
+    'what does "serendipity" mean?',
+    "tell me about your favorite season",
+    "explain the difference between empathy and sympathy",
+    "who wrote Hamlet?",
+    "write a limerick about cats",
+    "tell me a ghost story",
+    "describe Mount Fuji in winter",
+    "what happened in the Cuban Missile Crisis?",
+    "explain how a car engine works",
+    "tell me a folk tale from Ireland",
+    "write an essay on the importance of libraries",
+    "describe a perfect day",
+]
+VALIDATION_LIST = LIST_PROMPTS[:10]
+VALIDATION_PROSE = PROSE_PROMPTS[:10]
+DEV_LIST = LIST_PROMPTS[10:]
+DEV_PROSE = PROSE_PROMPTS[10:]
+def make_labelled(list_prompts, prose_prompts):
+    return [{"prompt": p, "expected": True} for p in list_prompts] + [
+        {"prompt": p, "expected": False} for p in prose_prompts
+    ]
+def run_variant_on(ctx, variant, labelled, on_progress=None):
+    results = []
+    for i, item in enumerate(labelled):
+        pred, raw = classify(ctx, item["prompt"], variant)
+        results.append({**item, "prediction": pred, "raw": raw, "correct": pred == item["expected"]})
+        if on_progress:
+            on_progress(i + 1, len(labelled))
+    correct = sum(1 for r in results if r["correct"])
+    list_total = sum(1 for r in results if r["expected"])
+    prose_total = len(results) - list_total
+    list_hit = sum(1 for r in results if r["expected"] and r["correct"])
+    prose_hit = sum(1 for r in results if not r["expected"] and r["correct"])
+    return {
+        "variant": variant.name,
+        "accuracy": correct / len(results),
+        "correct": correct,
+        "total": len(results),
+        "list_recall": (list_hit, list_total),
+        "prose_recall": (prose_hit, prose_total),
+        "results": results,
+    }
+def sweep(ctx, variants, labelled, label=""):
+    summaries = []
+    for v in variants:
+        t0 = time.time()
+        res = run_variant_on(ctx, v, labelled)
+        res["wall_s"] = time.time() - t0
+        lh, lt = res["list_recall"]
+        ph, pt = res["prose_recall"]
+        print(
+            f"  [{label}] {v.name:30} {res['correct']:>2}/{res['total']} "
+            f"= {res['accuracy']*100:5.1f}%  list {lh}/{lt}  prose {ph}/{pt}  "
+            f"({res['wall_s']:.0f}s)",
+            flush=True,
+        )
+        summaries.append(res)
+    return summaries

grammar.py ADDED Viewed

	@@ -0,0 +1,169 @@

+"""Tiny grammar engine for acrostic-style constraints.
+A faithful Python port of src/grammar.js.
+Primitives:
+  - Atoms: {"kind": "lit", "allowed": set[str]} (consumes exactly one char from
+    `allowed`) or {"kind": "body", "max": int} (consumes 0..max non-newline
+    chars).
+  - Atom sequences are concatenation-only; with the body/newline structure we
+    use, transitions are deterministic, so state packs into one int:
+    atom_idx * stride + count.
+Builders:
+  - compile_acrostic(secret, ...) — list-mode or prose-mode acrostic.
+  - compile_literal(text) — exact-text matcher (used by the classifier).
+  - union_grammars([g1, g2, ...]) — accept if any branch is alive.
+The dead-state sentinel is -1 everywhere, matching the JS original.
+"""
+from __future__ import annotations
+# Spaces in the secret are treated as "word breaks" — they don't pin the line to
+# any particular letter, but they still produce a line, and the line must start
+# with a punctuation character so the acrostic reads naturally
+# ("HI WORLD" -> H... / I... / <punct>... / W... / O... / R... / L... / D...).
+PUNCT_FOR_SPACE = set(
+    list(".,;:!?-")
+    + list("()[]{}")
+    + list("~<>")
+    + ['"', "'", "`"]
+    + list("@#$%&+=/\\|_^")
+)
+class AtomGrammar:
+    """A single concatenation-only atom-sequence grammar (an NFA packed into ints)."""
+    def __init__(self, atoms):
+        self.atoms = atoms
+        max_body_max = 0
+        for a in atoms:
+            if a["kind"] == "body" and a["max"] > max_body_max:
+                max_body_max = a["max"]
+        self.stride = max_body_max + 2
+        self.PAST_END = len(atoms) * self.stride
+        self.state_count = self.PAST_END + 1
+        # Precompute accepting states: (a, c) accepts iff atom `a` can be
+        # epsilon-skipped at count `c` AND (a+1, 0) is accepting.
+        accepting = bytearray(self.state_count)
+        accepting[self.PAST_END] = 1
+        next_accepting = True
+        for a in range(len(atoms) - 1, -1, -1):
+            atom = atoms[a]
+            if next_accepting:
+                mn = 1 if atom["kind"] == "lit" else 0
+                mx = 1 if atom["kind"] == "lit" else atom["max"]
+                for c in range(mn, mx + 1):
+                    accepting[a * self.stride + c] = 1
+            next_accepting = accepting[a * self.stride + 0] == 1
+        self.accepting = accepting
+        self.initial = 0
+    def _consume_at(self, a, ch):
+        atoms = self.atoms
+        while a < len(atoms):
+            atom = atoms[a]
+            if atom["kind"] == "lit":
+                if ch in atom["allowed"]:
+                    return self.PAST_END if a + 1 >= len(atoms) else (a + 1) * self.stride
+                return -1
+            if ch != "\n":
+                return a * self.stride + 1
+            a += 1
+        return -1
+    def advance(self, state, s):
+        stride = self.stride
+        atoms = self.atoms
+        cur = state
+        for ch in s:
+            if cur == self.PAST_END:
+                return -1
+            a = cur // stride
+            c = cur - a * stride
+            atom = atoms[a]
+            if atom["kind"] == "lit":
+                if c < 1 and ch in atom["allowed"]:
+                    nxt = self.PAST_END if a + 1 >= len(atoms) else (a + 1) * stride
+                else:
+                    return -1
+            else:
+                if c < atom["max"] and ch != "\n":
+                    nxt = a * stride + (c + 1)
+                else:
+                    nxt = self._consume_at(a + 1, ch)
+            if nxt == -1:
+                return -1
+            cur = nxt
+        return cur
+    def accepts(self, state):
+        return state is not None and 0 <= state < self.state_count and self.accepting[state] == 1
+def compile_acrostic(secret, list_prefix=" * ", max_line=80, case_insensitive=False, first_line_prefix=True):
+    if not secret:
+        raise ValueError("secret must be non-empty")
+    atoms = []
+    for i, letter in enumerate(secret):
+        want_prefix = i > 0 or first_line_prefix
+        if want_prefix:
+            for c in list_prefix:
+                atoms.append({"kind": "lit", "allowed": {c}})
+        if letter == " ":
+            allowed = set(PUNCT_FOR_SPACE)
+        elif case_insensitive:
+            allowed = {letter.upper(), letter.lower()}
+        else:
+            allowed = {letter}
+        atoms.append({"kind": "lit", "allowed": allowed})
+        atoms.append({"kind": "body", "max": max_line})
+        if i < len(secret) - 1:
+            atoms.append({"kind": "lit", "allowed": {"\n"}})
+    return AtomGrammar(atoms)
+def compile_literal(text):
+    if not text:
+        raise ValueError("literal must be non-empty")
+    atoms = [{"kind": "lit", "allowed": {c}} for c in text]
+    return AtomGrammar(atoms)
+class UnionGrammar:
+    """Run several grammars in parallel; a token is alive iff at least one branch
+    is alive. State is a list of per-branch ints (-1 = dead branch). When every
+    branch is dead, advance returns -1 (the single-grammar dead sentinel)."""
+    def __init__(self, grammars):
+        self.grammars = grammars
+        self.initial = [g.initial for g in grammars]
+    def advance(self, state, s):
+        nxt = [-1] * len(self.grammars)
+        any_live = False
+        for i, g in enumerate(self.grammars):
+            if state[i] == -1:
+                nxt[i] = -1
+                continue
+            r = g.advance(state[i], s)
+            nxt[i] = r
+            if r != -1:
+                any_live = True
+        return nxt if any_live else -1
+    def accepts(self, state):
+        if state == -1:
+            return False
+        for i, g in enumerate(self.grammars):
+            if state[i] != -1 and g.accepts(state[i]):
+                return True
+        return False
+def union_grammars(grammars):
+    return UnionGrammar(grammars)

logits.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""Grammar-constrained LogitsProcessor (Python port of src/logits.js).
+At each generation step:
+  1. Decode the generated suffix back to text.
+  2. Advance the grammar NFA by that text.
+  3. For every candidate token id, check whether appending its decoded text
+     keeps the NFA alive; mask losers to -inf (via the shared LegalCache).
+  4. EOS is allowed only once the NFA has reached an accept state.
+Per-token decoding can disagree with BPE sequence-decoding in edge cases
+(merged punctuation, etc.); for the acrostic patterns we care about this
+approximation is fine.
+"""
+from __future__ import annotations
+import time
+from transformers import LogitsProcessor
+from masking import LegalCache
+def build_token_text_table(tokenizer, vocab_size):
+    """One-shot build of tokenId -> text, using per-token decode. Special tokens
+    decode to '' under skip_special_tokens=True, which we treat as
+    "disallowed" (empty string)."""
+    texts = tokenizer.batch_decode(
+        [[i] for i in range(vocab_size)], skip_special_tokens=True
+    )
+    return [t if isinstance(t, str) else "" for t in texts]
+class GrammarLogitsProcessor(LogitsProcessor):
+    def __init__(self, grammar, tokenizer, token_text, eos_token_ids=(), legal_cache=None):
+        super().__init__()
+        self.grammar = grammar
+        self.tokenizer = tokenizer
+        self.token_text = token_text
+        self.cache = legal_cache or LegalCache(grammar, token_text, eos_token_ids)
+        self.prompt_length = None
+        self.stats = _fresh_stats()
+    def reset(self):
+        self.prompt_length = None
+        self.stats = _fresh_stats()
+    def __call__(self, input_ids, scores):
+        t_entry = time.perf_counter()
+        ids = input_ids[0]
+        if self.prompt_length is None:
+            self.prompt_length = ids.shape[0]
+        generated = ids[self.prompt_length:].tolist()
+        text = (
+            self.tokenizer.decode(generated, skip_special_tokens=True)
+            if generated
+            else ""
+        )
+        state = self.grammar.advance(self.grammar.initial, text)
+        data = scores[0]
+        if state == -1:
+            # Already violated; nothing useful to do without rewinding. Let the
+            # original logits through so generation at least terminates.
+            self._record(time.perf_counter() - t_entry, -1)
+            return scores
+        illegal = self.cache.illegal_tensor(state)
+        data[illegal.to(data.device)] = float("-inf")
+        self._record(time.perf_counter() - t_entry, int((~illegal).sum().item()))
+        return scores
+    def _record(self, dt, survivors):
+        st = self.stats
+        st["calls"] += 1
+        st["total_ms"] += dt * 1000.0
+        st["per_step"].append({"ms": dt * 1000.0, "survivors": survivors})
+def _fresh_stats():
+    return {"calls": 0, "total_ms": 0.0, "per_step": []}

masking.py ADDED Viewed

	@@ -0,0 +1,53 @@

+"""Shared grammar-legality computation + a per-state cache.
+The set of grammar-legal next tokens is a pure function of the grammar state, so
+we cache the boolean legal mask by state. This is what makes the crossing search
+affordable: its many short rollouts all start from the same handful of
+line-start states and reuse one (expensive) full-vocab scan.
+"""
+from __future__ import annotations
+import numpy as np
+import torch
+class LegalCache:
+    def __init__(self, grammar, token_text, eos_token_ids=()):
+        self.grammar = grammar
+        self.token_text = token_text
+        self.eos_token_ids = [int(x) for x in eos_token_ids]
+        # Special tokens decode to '' and are always illegal — never probe them.
+        self._scan_ids = [i for i, t in enumerate(token_text) if t]
+        self._legal_cache = {}      # state-key -> np.bool_ array
+        self._illegal_cache = {}    # state-key -> torch.BoolTensor
+    @staticmethod
+    def _key(state):
+        return state if isinstance(state, int) else tuple(state)
+    def legal_np(self, state):
+        key = self._key(state)
+        cached = self._legal_cache.get(key)
+        if cached is not None:
+            return cached
+        advance = self.grammar.advance
+        token_text = self.token_text
+        at_accept = self.grammar.accepts(state)
+        legal = np.zeros(len(token_text), dtype=bool)
+        for i in self._scan_ids:
+            if advance(state, token_text[i]) != -1:
+                legal[i] = True
+        for eid in self.eos_token_ids:
+            legal[eid] = at_accept
+        self._legal_cache[key] = legal
+        return legal
+    def illegal_tensor(self, state):
+        key = self._key(state)
+        cached = self._illegal_cache.get(key)
+        if cached is not None:
+            return cached
+        t = torch.from_numpy(~self.legal_np(state))
+        self._illegal_cache[key] = t
+        return t

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch
+transformers>=5.12
+accelerate
+numpy
+gradio>=6

sweep_minicpm.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""Sweep candidate list/prose classifier variants on the current model, pick a
+winner. CPU-conscious: screen all candidates on a 20-prompt subset, then run the
+top few on the full 80-prompt dev set + 20-prompt validation set.
+    SIDECHAT_MODEL=openbmb/MiniCPM5-1B .venv/bin/python sweep_minicpm.py
+"""
+from __future__ import annotations
+import time
+import app  # loads the model + CTX
+from classifier import Variant
+from eval_classifier import (
+    DEV_LIST, DEV_PROSE, VALIDATION_LIST, VALIDATION_PROSE, make_labelled, sweep,
+)
+CTX = app.CTX
+parse_list = lambda s: s.startswith("list")
+parse_items = lambda s: s.startswith("items")
+# Candidate variants spanning the axes that mattered in the JS sweep: intent vs.
+# request framing, default-to-list vs. default-to-story polarity, list/story
+# branch vocab, and the single-vs-plural rule. (true/false and CAPS branches are
+# known-bad and omitted.)
+C1_BASE = (
+    "Classify the user's request. Use \"list\" when the user wants enumerated "
+    "items. Use \"story\" for everything else."
+)
+SINGLE_PLURAL = (
+    " \"What is X\" (a single fact) is a story; \"What are the/some Xs\" (plural "
+    "enumeration) is a list; \"what are the steps/differences/causes/symptoms\" "
+    "is a list."
+)
+WRITE_FORMS = (
+    " Whenever the user asks to \"write\" or \"compose\" a haiku, poem, letter, "
+    "cover letter, email, joke, story, essay, or limerick, the answer is a story."
+)
+EXTENDED_TRIGGERS = (
+    "Classify the user's request. Default to \"list\". Use \"story\" only when the "
+    "user asks for narrative/prose: \"tell me a story\", \"write a poem/haiku/"
+    "limerick/email/essay/letter\", \"describe\", \"explain\", \"translate\", "
+    "\"summarize\", \"what does X mean\", \"who was/is\", \"what is X\", \"when "
+    "did\", \"why does\", \"how does (concept)\", \"compose\"."
+)
+STORY_DEFAULT = (
+    "Classify the user's request. Default to \"story\". Use \"list\" only when the "
+    "user clearly asks for multiple discrete items: \"list N\", \"name N\", "
+    "\"give N\", \"top N\", \"suggest some\", \"ways to\", \"tips\", \"steps\", "
+    "\"reasons\", \"examples of\"."
+)
+INTENT_BASE = (
+    "Classify the user's intent. Use \"list\" when the answer is a set of "
+    "separate items the user can scan. Use \"story\" when the answer flows as "
+    "one narrative, single fact, or short paragraph."
+)
+DEFAULT_LIST_BASE = (
+    "Classify the user's request. Default to \"list\". Use \"story\" only when "
+    "the user clearly asks for narrative: \"tell me a story\", \"write a "
+    "poem/haiku/email\", \"describe X\", \"explain X\", \"translate X\", "
+    "\"what does X mean\", \"who was/what is/when did\"."
+)
+# Baseline reveals MiniCPM's split failure: intent/list-default bases nail list
+# recall but miss prose "write a haiku" (write-forms) and "capital of Australia"
+# (single fact); the c1 base is the opposite. So pair list-favoring bases with
+# the WRITE_FORMS + SINGLE_PLURAL rules that target exactly those prose misses.
+CANDIDATES = [
+    # Baselines (carried from the LFM2 sweep).
+    Variant("c1_single_plural", C1_BASE + SINGLE_PLURAL, "The user is asking for a ", ["list.", "story."], parse_list),
+    Variant("intent_two_rules", INTENT_BASE, "The intent is to get a ", ["list.", "story."], parse_list),
+    # Intent base + targeted prose rules.
+    Variant("intent_write", INTENT_BASE + WRITE_FORMS, "The intent is to get a ", ["list.", "story."], parse_list),
+    Variant("intent_sp", INTENT_BASE + SINGLE_PLURAL, "The intent is to get a ", ["list.", "story."], parse_list),
+    Variant("intent_write_sp", INTENT_BASE + WRITE_FORMS + SINGLE_PLURAL, "The intent is to get a ", ["list.", "story."], parse_list),
+    # Default-to-list base + targeted prose rules.
+    Variant("default_list", DEFAULT_LIST_BASE, "The user wants the answer as a ", ["list.", "story."], parse_list),
+    Variant("default_list_write_sp", DEFAULT_LIST_BASE + WRITE_FORMS + SINGLE_PLURAL, "The user wants the answer as a ", ["list.", "story."], parse_list),
+    # c1 base + write-forms (complementary to single_plural).
+    Variant("c1_write_sp", C1_BASE + WRITE_FORMS + SINGLE_PLURAL, "The user is asking for a ", ["list.", "story."], parse_list),
+    # Long built-in trigger list (no separate rules).
+    Variant("extended_triggers", EXTENDED_TRIGGERS, "The user wants the answer as a ", ["list.", "story."], parse_list),
+    # Alternate branch vocab.
+    Variant(
+        "items_text",
+        "Classify the user's intent. Use \"items\" when the user wants enumerated "
+        "items. Use \"text\" for everything else (narrative, single answer, "
+        "explanation, translation, story, poem).",
+        "The intent is to get ", ["items.", "text."], parse_items,
+    ),
+]
+def main():
+    print(f"model: {app.MODEL_ID} · {len(CTX.token_text)} tokens", flush=True)
+    # Fast screen on a 20-prompt subset (first 10 of each dev class).
+    screen = make_labelled(DEV_LIST[:10], DEV_PROSE[:10])
+    print(f"\n=== SCREEN ({len(screen)} prompts) ===", flush=True)
+    t0 = time.time()
+    screen_res = sweep(CTX, CANDIDATES, screen, label="screen")
+    screen_res.sort(key=lambda r: r["accuracy"], reverse=True)
+    print(f"screen done in {(time.time()-t0)/60:.1f} min", flush=True)
+    top = [next(c for c in CANDIDATES if c.name == r["variant"]) for r in screen_res[:3]]
+    print(f"\ntop 3 on screen: {[c.name for c in top]}", flush=True)
+    full = make_labelled(DEV_LIST, DEV_PROSE) + make_labelled(VALIDATION_LIST, VALIDATION_PROSE)
+    print(f"\n=== FULL ({len(full)} prompts: 50 list + 50 prose) ===", flush=True)
+    full_res = sweep(CTX, top, full, label="full")
+    full_res.sort(key=lambda r: r["accuracy"], reverse=True)
+    print("\n=== RANKING (full) ===", flush=True)
+    for r in full_res:
+        lh, lt = r["list_recall"]; ph, pt = r["prose_recall"]
+        print(f"  {r['variant']:30} {r['accuracy']*100:5.1f}%  list {lh}/{lt}  prose {ph}/{pt}", flush=True)
+    print(f"\nWINNER: {full_res[0]['variant']} @ {full_res[0]['accuracy']*100:.1f}%", flush=True)
+if __name__ == "__main__":
+    main()

tokinfo.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""Precomputed per-token boolean arrays used by the crossing search's stealth
+casing and minimum-line-length masking. Built once at startup."""
+from __future__ import annotations
+from dataclasses import dataclass
+import numpy as np
+def _is_ascii_alpha(ch):
+    return ("a" <= ch <= "z") or ("A" <= ch <= "Z")
+@dataclass
+class TokInfo:
+    has_newline: np.ndarray   # token text contains '\n'
+    alpha_lower: np.ndarray   # first ASCII letter is lowercase
+    alpha_upper: np.ndarray   # first ASCII letter is uppercase
+    nonempty: np.ndarray      # token decodes to a non-empty string
+    eos_mask: np.ndarray      # token is an EOS id
+def build_tok_info(token_text, eos_token_ids):
+    n = len(token_text)
+    has_newline = np.zeros(n, dtype=bool)
+    alpha_lower = np.zeros(n, dtype=bool)
+    alpha_upper = np.zeros(n, dtype=bool)
+    nonempty = np.zeros(n, dtype=bool)
+    for i, t in enumerate(token_text):
+        if not t:
+            continue
+        nonempty[i] = True
+        if "\n" in t:
+            has_newline[i] = True
+        for ch in t:
+            if _is_ascii_alpha(ch):
+                if ch.islower():
+                    alpha_lower[i] = True
+                else:
+                    alpha_upper[i] = True
+                break
+    eos_mask = np.zeros(n, dtype=bool)
+    for e in eos_token_ids:
+        eos_mask[int(e)] = True
+    return TokInfo(has_newline, alpha_lower, alpha_upper, nonempty, eos_mask)