Spaces:

basilboy
/

SelfOrganisingText

Sleeping

App Files Files Community

the-puzzler commited on Sep 7, 2025

Commit

515a8b4

1 Parent(s): fbcf0db

added differnt argmax or sampling lgoits

Browse files

Files changed (1) hide show

app.py +83 -12

app.py CHANGED Viewed

@@ -121,6 +121,32 @@ class CNA(nn.Module):
 # -----------------------------
 # Helpers
 # -----------------------------
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
         if key in state:
@@ -271,14 +297,34 @@ def ensure_model(ckpt_path):
 # Strategy 1 core step
 # -----------------------------
 @torch.no_grad()
-def step_strategy1(model, x):
-    """One iteration: choose random position, set to argmax(logits)."""
     S = x.shape[1]
     pos = int(torch.randint(0, S, (1,)).item())
     logits_pos = model_logits(model, x)[0, pos]  # [V]
-    x[0, pos] = int(torch.argmax(logits_pos).item())
     return x
 # -----------------------------
 # Gradio logic
 # -----------------------------
@@ -332,17 +378,23 @@ def apply_noise(ckpt_path, state_ids, seqlen, indices_csv, add_left, add_right,
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Applied noise brush / prepend / append"
-def step_once(ckpt_path, state_ids):
     ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return None, "", "No sequence to step — initialize first."
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
-    x = step_strategy1(model_cache["model"], x)
     txt = decode(x[0], tok)
-    return x.tolist(), txt, "Stepped 1 iteration"
-def live_denoise(ckpt_path, state_ids, steps, snap_every, seed):
     """
     Generator for live updates. Yields (ids, text, status) every snap_every steps and on completion.
     """
@@ -355,11 +407,16 @@ def live_denoise(ckpt_path, state_ids, steps, snap_every, seed):
     total = int(steps)
     snap = max(1, int(snap_every))
     for t in range(1, total + 1):
-        x = step_strategy1(model_cache["model"], x)
         if (t % snap == 0) or (t == total):
             txt = decode(x[0], tok)
-            yield x.tolist(), txt, f"Live denoise… step {t}/{total}"
-    # final yield already done in loop
 # -----------------------------
 # UI
@@ -390,6 +447,20 @@ with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
     status = gr.Markdown("Ready.")
     gr.Markdown("## Mode 1 · Random → Denoise Live")
     with gr.Row():
         btn_random = gr.Button("Initialize Random")
         steps = gr.Slider(1, 2000, value=200, step=1, label="Denoise steps (N)")
@@ -450,14 +521,14 @@ with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
     # Single step
     btn_step_once.click(
         step_once,
-        [ckpt, ids_state],
         [ids_state, current_text, status]
     )
     # Live denoise (streaming)
     btn_live.click(
         live_denoise,
-        [ckpt, ids_state, steps, snap_every, seed],
         [ids_state, current_text, status],
         show_progress=True
     )

 # -----------------------------
 # Helpers
 # -----------------------------
+@torch.no_grad()
+def sample_from_logits(logits_row: torch.Tensor, temperature: float = 1.0,
+                       current_token: int | None = None, exclude_current: bool = True) -> int:
+    """
+    Sample a token from logits_row using softmax with temperature.
+    If exclude_current=True and current_token is provided, set its prob to 0 (then renormalize).
+    """
+    if temperature <= 0:
+        # safety: treat as argmax
+        return int(torch.argmax(logits_row).item())
+    scaled = logits_row / float(temperature)
+    probs = torch.softmax(scaled, dim=-1)
+    if exclude_current and current_token is not None:
+        probs = probs.clone()
+        probs[current_token] = 0.0
+        s = probs.sum()
+        if s.item() <= 0:
+            # fallback to argmax if everything got zeroed
+            return int(torch.argmax(logits_row).item())
+        probs = probs / s
+    return int(torch.multinomial(probs, num_samples=1).item())
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
         if key in state:
 # Strategy 1 core step
 # -----------------------------
 @torch.no_grad()
+def step_strategy1(model, x, mode: str = "argmax",
+                   temperature: float = 1.0,
+                   exclude_current: bool = True):
+    """
+    One iteration: choose random position, then update via:
+      - mode="argmax": set token to argmax(logits)
+      - mode="sample": sample from softmax(logits / temperature)
+                       (optionally excluding current token)
+    """
     S = x.shape[1]
     pos = int(torch.randint(0, S, (1,)).item())
     logits_pos = model_logits(model, x)[0, pos]  # [V]
+    if mode == "sample":
+        cur_tok = int(x[0, pos].item())
+        new_tok = sample_from_logits(
+            logits_pos,
+            temperature=float(temperature),
+            current_token=cur_tok,
+            exclude_current=bool(exclude_current)
+        )
+        x[0, pos] = new_tok
+    else:
+        # default / fallback: argmax
+        x[0, pos] = int(torch.argmax(logits_pos).item())
     return x
 # -----------------------------
 # Gradio logic
 # -----------------------------
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Applied noise brush / prepend / append"
+def step_once(ckpt_path, state_ids, mode, temperature, exclude_current):
     ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return None, "", "No sequence to step — initialize first."
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    x = step_strategy1(
+        model_cache["model"], x,
+        mode=mode,
+        temperature=temperature,
+        exclude_current=exclude_current
+    )
     txt = decode(x[0], tok)
+    return x.tolist(), txt, f"Stepped 1 iteration ({mode})"
+def live_denoise(ckpt_path, state_ids, steps, snap_every, seed,
+                 mode, temperature, exclude_current):
     """
     Generator for live updates. Yields (ids, text, status) every snap_every steps and on completion.
     """
     total = int(steps)
     snap = max(1, int(snap_every))
     for t in range(1, total + 1):
+        x = step_strategy1(
+            model_cache["model"], x,
+            mode=mode,
+            temperature=temperature,
+            exclude_current=exclude_current
+        )
         if (t % snap == 0) or (t == total):
             txt = decode(x[0], tok)
+            yield x.tolist(), txt, f"Live denoise… step {t}/{total} ({mode})"
 # -----------------------------
 # UI
     status = gr.Markdown("Ready.")
     gr.Markdown("## Mode 1 · Random → Denoise Live")
+    with gr.Row():
+        update_mode = gr.Radio(
+            choices=["argmax", "sample"],
+            value="argmax",
+            label="Update rule"
+        )
+        temperature = gr.Slider(
+            minimum=0.0, maximum=5.0, value=1.0, step=0.05,
+            label="Temperature (sampling)"
+        )
+        exclude_current = gr.Checkbox(
+            value=True,
+            label="Exclude current token when sampling"
+        )
     with gr.Row():
         btn_random = gr.Button("Initialize Random")
         steps = gr.Slider(1, 2000, value=200, step=1, label="Denoise steps (N)")
     # Single step
     btn_step_once.click(
         step_once,
+        [ckpt, ids_state, update_mode, temperature, exclude_current],
         [ids_state, current_text, status]
     )
     # Live denoise (streaming)
     btn_live.click(
         live_denoise,
+        [ckpt, ids_state, steps, snap_every, seed, update_mode, temperature, exclude_current],
         [ids_state, current_text, status],
         show_progress=True
     )