Spaces:

basilboy
/

SelfOrganisingText

Sleeping

App Files Files Community

the-puzzler commited on Sep 7, 2025

Commit

22b6693

1 Parent(s): 6fb6b5d

minor

Browse files

Files changed (1) hide show

app.py +248 -46

app.py CHANGED Viewed

@@ -119,7 +119,7 @@ class CNA(nn.Module):
         return self.proj(h)
 # -----------------------------
-# Helpers (trimmed to Strategy 1)
 # -----------------------------
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
@@ -132,18 +132,84 @@ def infer_expansion_factor_from_state(state, embed_dim):
     return 4
 @torch.no_grad()
-def decode(ids, tokenizer, max_chars=220):
     s = tokenizer.decode(ids.tolist(), skip_special_tokens=True)
     s = s.replace("\n", " ")
     return s[:max_chars] + ("…" if len(s) > max_chars else "")
 @torch.no_grad()
 def model_logits(model, x):
     return model(x)
 # -----------------------------
 # Load checkpoint & build model
 # -----------------------------
 def load_model(ckpt_path: str):
     if not os.path.exists(ckpt_path):
         raise FileNotFoundError(
@@ -196,68 +262,204 @@ def load_model(ckpt_path: str):
     model.eval()
     return model, tokenizer, int(radius)
 # -----------------------------
-# Simplest sampling: Strategy 1
 # -----------------------------
 @torch.no_grad()
-def strategy_random_argmax(model, tokenizer, seqlen=100, steps=200, snap_every=20, seed=0, max_chars=220):
-    random.seed(seed); torch.manual_seed(seed)
-    V = tokenizer.vocab_size
-    x = torch.randint(0, V, (1, seqlen))
-    snaps = [(0, decode(x[0].cpu(), tokenizer, max_chars))]
-    for t in range(1, steps + 1):
-        pos = int(torch.randint(0, seqlen, (1,)))
-        logits_pos = model_logits(model, x)[0, pos]  # [V]
-        x[0, pos] = int(torch.argmax(logits_pos).item())
-        if (t % snap_every == 0) or (t == steps):
-            snaps.append((t, decode(x[0].cpu(), tokenizer, max_chars)))
-    return snaps
 # -----------------------------
-# Gradio UI
 # -----------------------------
-DEFAULT_CKPT = os.environ.get("CKPT_PATH", "ckpt_latest.pt")
-model_cache = {"model": None, "tokenizer": None, "radius": None, "ckpt": None}
-def ensure_model(ckpt_path):
-    if model_cache["model"] is None or model_cache["ckpt"] != ckpt_path:
-        m, tok, rad = load_model(ckpt_path)
-        model_cache.update({"model": m, "tokenizer": tok, "radius": rad, "ckpt": ckpt_path})
-def run_demo(ckpt_path, seqlen, steps, snap_every, seed, max_chars):
     ensure_model(ckpt_path or DEFAULT_CKPT)
-    snaps = strategy_random_argmax(
-        model_cache["model"], model_cache["tokenizer"],
-        seqlen=seqlen, steps=steps, snap_every=snap_every,
-        seed=seed, max_chars=max_chars
-    )
-    # Pretty print log
-    log = "\n".join([f"t={t:>3}: {txt}" for (t, txt) in snaps])
-    final_text = snaps[-1][1] if snaps else ""
-    return log, final_text
-with gr.Blocks(title="CNA — Simple Sampling (Random Position • Argmax)") as demo:
     gr.Markdown(
         """
-        # CNA — Simple Sampling (Strategy 1)
-        This Space loads your checkpoint and runs the **random position → argmax** update for a fixed-length sequence.
-        - Put your checkpoint at `ckpt_latest.pt` (repo root), or set a custom path below.
         """
     )
     with gr.Row():
-        ckpt = gr.Textbox(value=DEFAULT_CKPT, label="Checkpoint path", placeholder="ckpt_latest.pt")
-    with gr.Row():
         seqlen = gr.Slider(10, 512, value=100, step=1, label="Sequence length (S)")
-        steps = gr.Slider(10, 1000, value=200, step=1, label="Steps")
-        snap_every = gr.Slider(1, 200, value=20, step=1, label="Snapshot every N steps")
     with gr.Row():
-        seed = gr.Slider(0, 10_000, value=0, step=1, label="Seed")
-        max_chars = gr.Slider(32, 1000, value=220, step=1, label="Max chars per snapshot")
-    run_btn = gr.Button("Run")
     with gr.Row():
-        log_out = gr.Textbox(lines=18, label="Snapshots")
-    final_out = gr.Textbox(lines=6, label="Final text (last snapshot)")
-    run_btn.click(run_demo, [ckpt, seqlen, steps, snap_every, seed, max_chars], [log_out, final_out])
 demo.queue(concurrency_count=1).launch()

         return self.proj(h)
 # -----------------------------
+# Helpers
 # -----------------------------
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
     return 4
 @torch.no_grad()
+def decode(ids, tokenizer, max_chars=1000):
     s = tokenizer.decode(ids.tolist(), skip_special_tokens=True)
     s = s.replace("\n", " ")
     return s[:max_chars] + ("…" if len(s) > max_chars else "")
+def to_fixed_len_ids(text, tokenizer, seqlen, pad_mode="random", rnd=None):
+    """Encode text and force to length seqlen."""
+    if rnd is None:
+        rnd = random.Random()
+    ids = tokenizer.encode(text, add_special_tokens=False)
+    V = tokenizer.vocab_size
+    if len(ids) >= seqlen:
+        ids = ids[:seqlen]
+    else:
+        need = seqlen - len(ids)
+        if pad_mode == "eos" and tokenizer.eos_token_id is not None:
+            ids = ids + [tokenizer.eos_token_id] * need
+        else:
+            ids = ids + [rnd.randrange(V) for _ in range(need)]
+    return torch.tensor(ids, dtype=torch.long).unsqueeze(0)
 @torch.no_grad()
 def model_logits(model, x):
     return model(x)
+def apply_noise_ops(x, tokenizer, indices_csv, add_noise_left, add_noise_right, seqlen, seed=0):
+    """Noise selected positions and optionally prepend/append random tokens."""
+    rnd = random.Random(seed)
+    V = tokenizer.vocab_size
+    x = x.clone()
+    # noise brush (indices like "0, 5, 6-10")
+    idxs = set()
+    if indices_csv.strip():
+        for part in indices_csv.split(","):
+            part = part.strip()
+            if not part:
+                continue
+            if "-" in part:
+                a, b = part.split("-", 1)
+                try:
+                    a, b = int(a), int(b)
+                    for j in range(min(a,b), max(a,b)+1):
+                        idxs.add(j)
+                except:
+                    continue
+            else:
+                try:
+                    idxs.add(int(part))
+                except:
+                    continue
+    for j in idxs:
+        if 0 <= j < seqlen:
+            x[0, j] = rnd.randrange(V)
+    # prepend/append random noise
+    if add_noise_left > 0:
+        prefix = torch.tensor([rnd.randrange(V) for _ in range(add_noise_left)], dtype=torch.long).unsqueeze(0)
+        x = torch.cat([prefix, x], dim=1)
+    if add_noise_right > 0:
+        suffix = torch.tensor([rnd.randrange(V) for _ in range(add_noise_right)], dtype=torch.long).unsqueeze(0)
+        x = torch.cat([x, suffix], dim=1)
+    # force length back to seqlen (trim or pad random)
+    if x.shape[1] > seqlen:
+        x = x[:, :seqlen]
+    elif x.shape[1] < seqlen:
+        need = seqlen - x.shape[1]
+        pad = torch.tensor([rnd.randrange(V) for _ in range(need)], dtype=torch.long).unsqueeze(0)
+        x = torch.cat([x, pad], dim=1)
+    return x
 # -----------------------------
 # Load checkpoint & build model
 # -----------------------------
+DEFAULT_CKPT = os.environ.get("CKPT_PATH", "ckpt_latest.pt")
+model_cache = {"model": None, "tokenizer": None, "radius": None, "ckpt": None}
 def load_model(ckpt_path: str):
     if not os.path.exists(ckpt_path):
         raise FileNotFoundError(
     model.eval()
     return model, tokenizer, int(radius)
+def ensure_model(ckpt_path):
+    if model_cache["model"] is None or model_cache["ckpt"] != ckpt_path:
+        m, tok, rad = load_model(ckpt_path)
+        model_cache.update({"model": m, "tokenizer": tok, "radius": rad, "ckpt": ckpt_path})
 # -----------------------------
+# Strategy 1 core step
 # -----------------------------
 @torch.no_grad()
+def step_strategy1(model, x):
+    """One iteration: choose random position, set to argmax(logits)."""
+    S = x.shape[1]
+    pos = int(torch.randint(0, S, (1,)).item())
+    logits_pos = model_logits(model, x)[0, pos]  # [V]
+    x[0, pos] = int(torch.argmax(logits_pos).item())
+    return x
 # -----------------------------
+# Gradio logic
 # -----------------------------
+def init_random(ckpt_path, seqlen, seed):
+    ensure_model(ckpt_path or DEFAULT_CKPT)
+    random.seed(seed); torch.manual_seed(seed)
+    V = model_cache["tokenizer"].vocab_size
+    x = torch.randint(0, V, (1, seqlen))
+    txt = decode(x[0], model_cache["tokenizer"])
+    return x.tolist(), txt, f"Initialized random sequence (len={seqlen})"
+def init_from_text(ckpt_path, seqlen, text, seed, pad_mode):
+    ensure_model(ckpt_path or DEFAULT_CKPT)
+    rnd = random.Random(seed)
+    x = to_fixed_len_ids(text or "", model_cache["tokenizer"], seqlen, pad_mode=pad_mode, rnd=rnd)
+    txt = decode(x[0], model_cache["tokenizer"])
+    return x.tolist(), txt, "Initialized from text"
+def append_text(ckpt_path, state_ids, seqlen, text_to_append, seed):
     ensure_model(ckpt_path or DEFAULT_CKPT)
+    tok = model_cache["tokenizer"]
+    rnd = random.Random(seed)
+    if state_ids is None or len(state_ids) == 0:
+        x = to_fixed_len_ids(text_to_append or "", tok, seqlen, pad_mode="random", rnd=rnd)
+    else:
+        x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+        # append
+        extra = tok.encode(text_to_append or "", add_special_tokens=False)
+        x = torch.cat([x, torch.tensor(extra, dtype=torch.long).unsqueeze(0)], dim=1)
+        # force length
+        if x.shape[1] > seqlen:
+            x = x[:, :seqlen]
+        elif x.shape[1] < seqlen:
+            need = seqlen - x.shape[1]
+            V = tok.vocab_size
+            pad = torch.tensor([rnd.randrange(V) for _ in range(need)], dtype=torch.long).unsqueeze(0)
+            x = torch.cat([x, pad], dim=1)
+    txt = decode(x[0], tok)
+    return x.tolist(), txt, "Appended text and resized to target length"
+def apply_noise(ckpt_path, state_ids, seqlen, indices_csv, add_left, add_right, seed):
+    ensure_model(ckpt_path or DEFAULT_CKPT)
+    tok = model_cache["tokenizer"]
+    if state_ids is None or len(state_ids) == 0:
+        # create an empty base (random) then apply ops
+        V = tok.vocab_size
+        base = torch.randint(0, V, (1, seqlen))
+    else:
+        base = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    x = apply_noise_ops(base, tok, indices_csv, int(add_left), int(add_right), seqlen, seed=seed)
+    txt = decode(x[0], tok)
+    return x.tolist(), txt, "Applied noise brush / prepend / append"
+def step_once(ckpt_path, state_ids):
+    ensure_model(ckpt_path or DEFAULT_CKPT)
+    tok = model_cache["tokenizer"]
+    if state_ids is None or len(state_ids) == 0:
+        return None, "", "No sequence to step — initialize first."
+    x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    x = step_strategy1(model_cache["model"], x)
+    txt = decode(x[0], tok)
+    return x.tolist(), txt, "Stepped 1 iteration"
+def live_denoise(ckpt_path, state_ids, steps, snap_every, seed):
+    """
+    Generator for live updates. Yields (ids, text, status) every snap_every steps and on completion.
+    """
+    ensure_model(ckpt_path or DEFAULT_CKPT)
+    tok = model_cache["tokenizer"]
+    if state_ids is None or len(state_ids) == 0:
+        return
+    random.seed(seed); torch.manual_seed(seed)
+    x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    total = int(steps)
+    snap = max(1, int(snap_every))
+    for t in range(1, total + 1):
+        x = step_strategy1(model_cache["model"], x)
+        if (t % snap == 0) or (t == total):
+            txt = decode(x[0], tok)
+            yield x.tolist(), txt, f"Live denoise… step {t}/{total}"
+    # final yield already done in loop
+# -----------------------------
+# UI
+# -----------------------------
+with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
     gr.Markdown(
         """
+        # CNA — Interactive Denoising (Strategy 1)
+        - **Mode 1:** Randomize then watch it **denoise live** (random-position → argmax).
+        - **Mode 2:** Initialize from **your text**.
+        - **Noise Brush:** Select positions (e.g., `0, 5, 10-20`), and/or add random noise tokens at **start**/**end**.
+        - **Append:** Add your text to the current sequence.
         """
     )
+    # Global settings
     with gr.Row():
+        ckpt = gr.Textbox(value=DEFAULT_CKPT, label="Checkpoint path")
         seqlen = gr.Slider(10, 512, value=100, step=1, label="Sequence length (S)")
+        seed = gr.Slider(0, 10000, value=0, step=1, label="Seed")
+    # Hidden state (ids list)
+    ids_state = gr.State(value=None)
+    # Displays
+    with gr.Row():
+        current_text = gr.Textbox(lines=8, label="Current text", interactive=False)
+    status = gr.Markdown("Ready.")
+    gr.Markdown("## Mode 1 · Random → Denoise Live")
+    with gr.Row():
+        btn_random = gr.Button("Initialize Random")
+        steps = gr.Slider(1, 2000, value=200, step=1, label="Denoise steps (N)")
+        snap_every = gr.Slider(1, 100, value=5, step=1, label="Update every K steps")
+    with gr.Row():
+        btn_step_once = gr.Button("Step Once")
+        btn_live = gr.Button("Denoise Live (streaming)")
+    gr.Markdown("## Mode 2 · Initialize From Your Text")
     with gr.Row():
+        init_text = gr.Textbox(lines=4, label="Initial text")
     with gr.Row():
+        pad_mode = gr.Radio(choices=["random", "eos"], value="random", label="Pad mode (if text shorter than S)")
+        btn_init_text = gr.Button("Initialize From Text")
+    gr.Markdown("## Noise Brush · Select Positions + Prepend/Append Noise")
+    with gr.Row():
+        indices_csv = gr.Textbox(label="Positions to noise (e.g., 0, 5, 10-20)", placeholder="Leave empty to skip")
+    with gr.Row():
+        add_left = gr.Number(value=0, precision=0, label="Noise tokens to add at START")
+        add_right = gr.Number(value=0, precision=0, label="Noise tokens to add at END")
+        btn_apply_noise = gr.Button("Apply Noise Brush / Prepend / Append")
+    gr.Markdown("## Append Text")
+    with gr.Row():
+        append_box = gr.Textbox(lines=3, label="Text to append")
+        btn_append = gr.Button("Append to Current Sequence")
+    # --- Wiring ---
+    # Random init
+    out = btn_random.click(
+        init_random,
+        [ckpt, seqlen, seed],
+        [ids_state, current_text, status]
+    )
+    # Init from text
+    btn_init_text.click(
+        init_from_text,
+        [ckpt, seqlen, init_text, seed, pad_mode],
+        [ids_state, current_text, status]
+    )
+    # Apply noise
+    btn_apply_noise.click(
+        apply_noise,
+        [ckpt, ids_state, seqlen, indices_csv, add_left, add_right, seed],
+        [ids_state, current_text, status]
+    )
+    # Append text
+    btn_append.click(
+        append_text,
+        [ckpt, ids_state, seqlen, append_box, seed],
+        [ids_state, current_text, status]
+    )
+    # Single step
+    btn_step_once.click(
+        step_once,
+        [ckpt, ids_state],
+        [ids_state, current_text, status]
+    )
+    # Live denoise (streaming)
+    btn_live.click(
+        live_denoise,
+        [ckpt, ids_state, steps, snap_every, seed],
+        [ids_state, current_text, status],
+        show_progress=True
+    )
 demo.queue(concurrency_count=1).launch()