Spaces:

basilboy
/

SelfOrganisingText

Sleeping

App Files Files Community

the-puzzler commited on Sep 7, 2025

Commit

b9b97d8

1 Parent(s): 515a8b4

test

Browse files

Files changed (2) hide show

app.py +210 -176
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -1,14 +1,16 @@
 # app.py
-import os, re, math, random
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
 from transformers import AutoTokenizer
-# -----------------------------
 # Minimal CNA (inference-ready)
-# -----------------------------
 class AttnBlock(nn.Module):
     def __init__(self, embed_dim, num_heads, expansion_factor):
         super().__init__()
@@ -28,7 +30,7 @@ class AttnBlock(nn.Module):
             nn.Linear(embed_dim * expansion_factor, embed_dim),
         )
-        # match training's zero-init on residual branches
         nn.init.zeros_(self.Wo.weight);   nn.init.zeros_(self.Wo.bias)
         nn.init.zeros_(self.mlp[-1].weight); nn.init.zeros_(self.mlp[-1].bias)
@@ -118,35 +120,9 @@ class CNA(nn.Module):
             h = blk(h, rope=(cos, sin), radius=self.radius)
         return self.proj(h)
-# -----------------------------
 # Helpers
-# -----------------------------
-@torch.no_grad()
-def sample_from_logits(logits_row: torch.Tensor, temperature: float = 1.0,
-                       current_token: int | None = None, exclude_current: bool = True) -> int:
-    """
-    Sample a token from logits_row using softmax with temperature.
-    If exclude_current=True and current_token is provided, set its prob to 0 (then renormalize).
-    """
-    if temperature <= 0:
-        # safety: treat as argmax
-        return int(torch.argmax(logits_row).item())
-    scaled = logits_row / float(temperature)
-    probs = torch.softmax(scaled, dim=-1)
-    if exclude_current and current_token is not None:
-        probs = probs.clone()
-        probs[current_token] = 0.0
-        s = probs.sum()
-        if s.item() <= 0:
-            # fallback to argmax if everything got zeroed
-            return int(torch.argmax(logits_row).item())
-        probs = probs / s
-    return int(torch.multinomial(probs, num_samples=1).item())
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
         if key in state:
@@ -191,7 +167,7 @@ def apply_noise_ops(x, tokenizer, indices_csv, add_noise_left, add_noise_right,
     # noise brush (indices like "0, 5, 6-10")
     idxs = set()
-    if indices_csv.strip():
         for part in indices_csv.split(","):
             part = part.strip()
             if not part:
@@ -210,15 +186,15 @@ def apply_noise_ops(x, tokenizer, indices_csv, add_noise_left, add_noise_right,
                 except:
                     continue
     for j in idxs:
-        if 0 <= j < seqlen:
             x[0, j] = rnd.randrange(V)
     # prepend/append random noise
     if add_noise_left > 0:
-        prefix = torch.tensor([rnd.randrange(V) for _ in range(add_noise_left)], dtype=torch.long).unsqueeze(0)
         x = torch.cat([prefix, x], dim=1)
     if add_noise_right > 0:
-        suffix = torch.tensor([rnd.randrange(V) for _ in range(add_noise_right)], dtype=torch.long).unsqueeze(0)
         x = torch.cat([x, suffix], dim=1)
     # force length back to seqlen (trim or pad random)
@@ -230,53 +206,164 @@ def apply_noise_ops(x, tokenizer, indices_csv, add_noise_left, add_noise_right,
         x = torch.cat([x, pad], dim=1)
     return x
-# -----------------------------
-# Load checkpoint & build model
-# -----------------------------
-DEFAULT_CKPT = os.environ.get("CKPT_PATH", "ckpt_latest.pt")
-model_cache = {"model": None, "tokenizer": None, "radius": None, "ckpt": None}
-def load_model(ckpt_path: str):
-    if not os.path.exists(ckpt_path):
-        raise FileNotFoundError(
-            f"Checkpoint not found at {ckpt_path}. "
-            "Upload ckpt_latest.pt to the repo root or set the correct path."
-        )
-    payload = torch.load(ckpt_path, map_location="cpu")
-    state = payload["model"]
-    cfg = payload.get("config", {}) or {}
-    # Carry over config (robust fallbacks)
     embed_dim        = cfg.get("embed_dim")
     num_heads        = cfg.get("num_heads")
     num_blocks       = cfg.get("num_blocks")
     radius           = cfg.get("radius")
     expansion_factor = cfg.get("expansion_factor")
-    if embed_dim is None:  embed_dim = state["tok_emb.weight"].shape[1]
     if num_blocks is None:
         block_idxs = [int(m.group(1)) for k in state.keys() for m in [re.match(r"blocks\.(\d+)\.", k)] if m]
         num_blocks = max(block_idxs) + 1 if block_idxs else 1
-    if num_heads is None: num_heads = 8
-    if radius is None: radius = 16
     if expansion_factor is None:
         expansion_factor = infer_expansion_factor_from_state(state, embed_dim)
     else:
-        expansion_factor = int(expansion_factor)
-    tokenizer_name = payload.get("tokenizer_name", "gpt2")
-    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, use_fast=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     tokenizer.model_max_length = 1_000_000_000
     vocab_size = tokenizer.vocab_size
     model = CNA(
-        int(embed_dim), int(num_heads), int(expansion_factor),
-        int(num_blocks), int(radius), int(vocab_size)
     )
-    # Load weights (tolerate proj head size diff)
     missing, unexpected = model.load_state_dict(state, strict=False)
     if any(k.startswith("proj.") for k in missing):
         with torch.no_grad():
@@ -286,155 +373,132 @@ def load_model(ckpt_path: str):
         model.load_state_dict(state, strict=True)
     model.eval()
-    return model, tokenizer, int(radius)
-def ensure_model(ckpt_path):
-    if model_cache["model"] is None or model_cache["ckpt"] != ckpt_path:
-        m, tok, rad = load_model(ckpt_path)
-        model_cache.update({"model": m, "tokenizer": tok, "radius": rad, "ckpt": ckpt_path})
-# -----------------------------
-# Strategy 1 core step
-# -----------------------------
 @torch.no_grad()
-def step_strategy1(model, x, mode: str = "argmax",
-                   temperature: float = 1.0,
-                   exclude_current: bool = True):
-    """
-    One iteration: choose random position, then update via:
-      - mode="argmax": set token to argmax(logits)
-      - mode="sample": sample from softmax(logits / temperature)
-                       (optionally excluding current token)
-    """
     S = x.shape[1]
     pos = int(torch.randint(0, S, (1,)).item())
     logits_pos = model_logits(model, x)[0, pos]  # [V]
     if mode == "sample":
         cur_tok = int(x[0, pos].item())
         new_tok = sample_from_logits(
             logits_pos,
             temperature=float(temperature),
             current_token=cur_tok,
-            exclude_current=bool(exclude_current)
         )
         x[0, pos] = new_tok
     else:
-        # default / fallback: argmax
         x[0, pos] = int(torch.argmax(logits_pos).item())
     return x
-# -----------------------------
-# Gradio logic
-# -----------------------------
-def init_random(ckpt_path, seqlen, seed):
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     random.seed(seed); torch.manual_seed(seed)
     V = model_cache["tokenizer"].vocab_size
-    x = torch.randint(0, V, (1, seqlen))
     txt = decode(x[0], model_cache["tokenizer"])
-    return x.tolist(), txt, f"Initialized random sequence (len={seqlen})"
-def init_from_text(ckpt_path, seqlen, text, seed, pad_mode):
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     rnd = random.Random(seed)
-    x = to_fixed_len_ids(text or "", model_cache["tokenizer"], seqlen, pad_mode=pad_mode, rnd=rnd)
     txt = decode(x[0], model_cache["tokenizer"])
     return x.tolist(), txt, "Initialized from text"
-def append_text(ckpt_path, state_ids, seqlen, text_to_append, seed):
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     rnd = random.Random(seed)
     if state_ids is None or len(state_ids) == 0:
-        x = to_fixed_len_ids(text_to_append or "", tok, seqlen, pad_mode="random", rnd=rnd)
     else:
         x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
-        # append
         extra = tok.encode(text_to_append or "", add_special_tokens=False)
         x = torch.cat([x, torch.tensor(extra, dtype=torch.long).unsqueeze(0)], dim=1)
-        # force length
-        if x.shape[1] > seqlen:
-            x = x[:, :seqlen]
-        elif x.shape[1] < seqlen:
-            need = seqlen - x.shape[1]
             V = tok.vocab_size
             pad = torch.tensor([rnd.randrange(V) for _ in range(need)], dtype=torch.long).unsqueeze(0)
             x = torch.cat([x, pad], dim=1)
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Appended text and resized to target length"
-def apply_noise(ckpt_path, state_ids, seqlen, indices_csv, add_left, add_right, seed):
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
-        # create an empty base (random) then apply ops
         V = tok.vocab_size
-        base = torch.randint(0, V, (1, seqlen))
     else:
         base = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
-    x = apply_noise_ops(base, tok, indices_csv, int(add_left), int(add_right), seqlen, seed=seed)
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Applied noise brush / prepend / append"
-def step_once(ckpt_path, state_ids, mode, temperature, exclude_current):
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return None, "", "No sequence to step — initialize first."
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
-    x = step_strategy1(
-        model_cache["model"], x,
-        mode=mode,
-        temperature=temperature,
-        exclude_current=exclude_current
-    )
     txt = decode(x[0], tok)
     return x.tolist(), txt, f"Stepped 1 iteration ({mode})"
-def live_denoise(ckpt_path, state_ids, steps, snap_every, seed,
-                 mode, temperature, exclude_current):
-    """
-    Generator for live updates. Yields (ids, text, status) every snap_every steps and on completion.
-    """
-    ensure_model(ckpt_path or DEFAULT_CKPT)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return
     random.seed(seed); torch.manual_seed(seed)
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
-    total = int(steps)
-    snap = max(1, int(snap_every))
     for t in range(1, total + 1):
-        x = step_strategy1(
-            model_cache["model"], x,
-            mode=mode,
-            temperature=temperature,
-            exclude_current=exclude_current
-        )
         if (t % snap == 0) or (t == total):
             txt = decode(x[0], tok)
             yield x.tolist(), txt, f"Live denoise… step {t}/{total} ({mode})"
-# -----------------------------
 # UI
-# -----------------------------
-with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
     gr.Markdown(
         """
         # CNA — Interactive Denoising (Strategy 1)
-        - **Mode 1:** Randomize then watch it **denoise live** (random-position → argmax).
-        - **Mode 2:** Initialize from **your text**.
-        - **Noise Brush:** Select positions (e.g., `0, 5, 10-20`), and/or add random noise tokens at **start**/**end**.
-        - **Append:** Add your text to the current sequence.
         """
     )
     # Global settings
     with gr.Row():
-        ckpt = gr.Textbox(value=DEFAULT_CKPT, label="Checkpoint path")
         seqlen = gr.Slider(10, 512, value=100, step=1, label="Sequence length (S)")
         seed = gr.Slider(0, 10000, value=0, step=1, label="Seed")
@@ -447,24 +511,14 @@ with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
     status = gr.Markdown("Ready.")
     gr.Markdown("## Mode 1 · Random → Denoise Live")
-    with gr.Row():
-        update_mode = gr.Radio(
-            choices=["argmax", "sample"],
-            value="argmax",
-            label="Update rule"
-        )
-        temperature = gr.Slider(
-            minimum=0.0, maximum=5.0, value=1.0, step=0.05,
-            label="Temperature (sampling)"
-        )
-        exclude_current = gr.Checkbox(
-            value=True,
-            label="Exclude current token when sampling"
-        )
     with gr.Row():
         btn_random = gr.Button("Initialize Random")
         steps = gr.Slider(1, 2000, value=200, step=1, label="Denoise steps (N)")
         snap_every = gr.Slider(1, 100, value=5, step=1, label="Update every K steps")
     with gr.Row():
         btn_step_once = gr.Button("Step Once")
         btn_live = gr.Button("Denoise Live (streaming)")
@@ -490,48 +544,28 @@ with gr.Blocks(title="CNA — Interactive Denoising (Strategy 1)") as demo:
         btn_append = gr.Button("Append to Current Sequence")
     # --- Wiring ---
-    # Random init
-    out = btn_random.click(
-        init_random,
-        [ckpt, seqlen, seed],
-        [ids_state, current_text, status]
-    )
-    # Init from text
-    btn_init_text.click(
-        init_from_text,
-        [ckpt, seqlen, init_text, seed, pad_mode],
-        [ids_state, current_text, status]
-    )
-    # Apply noise
     btn_apply_noise.click(
-        apply_noise,
-        [ckpt, ids_state, seqlen, indices_csv, add_left, add_right, seed],
         [ids_state, current_text, status]
     )
-    # Append text
-    btn_append.click(
-        append_text,
-        [ckpt, ids_state, seqlen, append_box, seed],
-        [ids_state, current_text, status]
-    )
-    # Single step
     btn_step_once.click(
         step_once,
-        [ckpt, ids_state, update_mode, temperature, exclude_current],
         [ids_state, current_text, status]
     )
-    # Live denoise (streaming)
     btn_live.click(
         live_denoise,
-        [ckpt, ids_state, steps, snap_every, seed, update_mode, temperature, exclude_current],
         [ids_state, current_text, status],
         show_progress=True
     )
 demo.queue().launch()

 # app.py
+import os, re, math, random, json
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
 from transformers import AutoTokenizer
+from safetensors.torch import load_file as load_sft
+from huggingface_hub import snapshot_download
+# ============================================================
 # Minimal CNA (inference-ready)
+# ============================================================
 class AttnBlock(nn.Module):
     def __init__(self, embed_dim, num_heads, expansion_factor):
         super().__init__()
             nn.Linear(embed_dim * expansion_factor, embed_dim),
         )
+        # zero-init on residual branches (to match training behavior)
         nn.init.zeros_(self.Wo.weight);   nn.init.zeros_(self.Wo.bias)
         nn.init.zeros_(self.mlp[-1].weight); nn.init.zeros_(self.mlp[-1].bias)
             h = blk(h, rope=(cos, sin), radius=self.radius)
         return self.proj(h)
+# ============================================================
 # Helpers
+# ============================================================
 def infer_expansion_factor_from_state(state, embed_dim):
     for key in ("blocks.0.mlp.0.weight", "blocks.0.mlp.2.weight"):
         if key in state:
     # noise brush (indices like "0, 5, 6-10")
     idxs = set()
+    if indices_csv and indices_csv.strip():
         for part in indices_csv.split(","):
             part = part.strip()
             if not part:
                 except:
                     continue
     for j in idxs:
+        if 0 <= j < x.shape[1]:
             x[0, j] = rnd.randrange(V)
     # prepend/append random noise
     if add_noise_left > 0:
+        prefix = torch.tensor([rnd.randrange(V) for _ in range(int(add_noise_left))], dtype=torch.long).unsqueeze(0)
         x = torch.cat([prefix, x], dim=1)
     if add_noise_right > 0:
+        suffix = torch.tensor([rnd.randrange(V) for _ in range(int(add_noise_right))], dtype=torch.long).unsqueeze(0)
         x = torch.cat([x, suffix], dim=1)
     # force length back to seqlen (trim or pad random)
         x = torch.cat([x, pad], dim=1)
     return x
+@torch.no_grad()
+def sample_from_logits(logits_row, temperature=1.0, current_token=None, exclude_current=True):
+    """Temperature sampling; optionally exclude current token to force change."""
+    if temperature <= 0:
+        return int(torch.argmax(logits_row).item())
+    scaled = logits_row / float(temperature)
+    probs = torch.softmax(scaled, dim=-1)
+    if exclude_current and current_token is not None:
+        probs = probs.clone()
+        probs[current_token] = 0.0
+        s = probs.sum()
+        if s.item() <= 0:
+            return int(torch.argmax(logits_row).item())
+        probs = probs / s
+    return int(torch.multinomial(probs, 1).item())
+# ============================================================
+# Weight loading: file, folder, or Hub repo
+# ============================================================
+DEFAULT_CKPT = os.environ.get("CKPT_PATH", "ckpt_latest.pt")
+DEFAULT_WEIGHTS_DIR = os.environ.get("WEIGHTS_DIR", "weights_latest")
+def _read_config_from_dict_or_infer(state, cfg):
     embed_dim        = cfg.get("embed_dim")
     num_heads        = cfg.get("num_heads")
     num_blocks       = cfg.get("num_blocks")
     radius           = cfg.get("radius")
     expansion_factor = cfg.get("expansion_factor")
+    tokenizer_name   = cfg.get("tokenizer_name", cfg.get("tokenizer") or "gpt2")
+    if embed_dim is None:
+        embed_dim = state["tok_emb.weight"].shape[1]
     if num_blocks is None:
         block_idxs = [int(m.group(1)) for k in state.keys() for m in [re.match(r"blocks\.(\d+)\.", k)] if m]
         num_blocks = max(block_idxs) + 1 if block_idxs else 1
+    if num_heads is None:
+        num_heads = 8
+    if radius is None:
+        radius = 16
     if expansion_factor is None:
         expansion_factor = infer_expansion_factor_from_state(state, embed_dim)
+    return {
+        "embed_dim": int(embed_dim),
+        "num_heads": int(num_heads),
+        "num_blocks": int(num_blocks),
+        "radius": int(radius),
+        "expansion_factor": int(expansion_factor),
+        "tokenizer_name": tokenizer_name,
+    }
+def _load_state_from_pt(payload_path: str):
+    payload = torch.load(payload_path, map_location="cpu")
+    state = payload["model"]
+    cfg = payload.get("config", {}) or {}
+    if "tokenizer_name" in payload:
+        cfg = {**cfg, "tokenizer_name": payload["tokenizer_name"]}
+    return state, cfg
+def _merge_state_dicts(dicts):
+    merged = {}
+    for d in dicts:
+        for k, v in d.items():
+            merged[k] = v
+    return merged
+def _load_state_from_folder(weights_dir: str):
+    if not os.path.isdir(weights_dir):
+        raise FileNotFoundError(f"Folder not found: {weights_dir}")
+    cfg_path = os.path.join(weights_dir, "config.json")
+    cfg = {}
+    if os.path.exists(cfg_path):
+        with open(cfg_path, "r") as f:
+            cfg = json.load(f)
+    files = sorted(os.listdir(weights_dir))
+    sft_files = [f for f in files if f.endswith(".safetensors")]
+    pt_files  = [f for f in files if f.endswith(".pt") or f.endswith(".bin")]
+    state = None
+    if "model.safetensors" in sft_files:
+        state = load_sft(os.path.join(weights_dir, "model.safetensors"))
+    elif sft_files:
+        parts = [load_sft(os.path.join(weights_dir, f)) for f in sft_files]
+        state = _merge_state_dicts(parts)
+    elif pt_files:
+        parts = []
+        for f in pt_files:
+            part = torch.load(os.path.join(weights_dir, f), map_location="cpu")
+            if isinstance(part, dict) and "model" in part and isinstance(part["model"], dict):
+                parts.append(part["model"])
+                if "config" in part and isinstance(part["config"], dict):
+                    cfg = {**cfg, **part["config"]}
+                if "tokenizer_name" in part:
+                    cfg.setdefault("tokenizer_name", part["tokenizer_name"])
+            else:
+                parts.append(part)
+        state = _merge_state_dicts(parts)
     else:
+        raise FileNotFoundError(
+            f"No weights found in {weights_dir}. Expected .safetensors or .pt files."
+        )
+    return state, cfg
+def _load_state_from_hub(repo_id: str, subfolder: str | None = None, revision: str | None = None):
+    cache_dir = snapshot_download(repo_id=repo_id, revision=revision, allow_patterns=None)
+    path = os.path.join(cache_dir, subfolder) if subfolder else cache_dir
+    return _load_state_from_folder(path)
+def load_model(source: str):
+    """
+    `source` can be:
+      - Path to single-file checkpoint: 'ckpt_latest.pt'
+      - Path to folder of shards: 'weights_latest'
+      - HF Hub repo id: 'org/model'
+    """
+    # Resolve source
+    src = source or ""
+    state, cfg = None, {}
+    if os.path.isfile(src) and src.endswith(".pt"):
+        state, cfg = _load_state_from_pt(src)
+    elif os.path.isdir(src):
+        state, cfg = _load_state_from_folder(src)
+    elif "/" in src:  # probably a hub repo id
+        subfolder = os.environ.get("WEIGHTS_SUBFOLDER") or None
+        revision  = os.environ.get("WEIGHTS_REVISION") or None
+        state, cfg = _load_state_from_hub(src, subfolder=subfolder, revision=revision)
+    else:
+        # fallbacks
+        if os.path.isfile(DEFAULT_CKPT):
+            state, cfg = _load_state_from_pt(DEFAULT_CKPT)
+        elif os.path.isdir(DEFAULT_WEIGHTS_DIR):
+            state, cfg = _load_state_from_folder(DEFAULT_WEIGHTS_DIR)
+        else:
+            raise FileNotFoundError(
+                f"Could not resolve weights from '{src}'. Tried file (.pt), folder, hub repo id, "
+                f"then defaults ('{DEFAULT_CKPT}', '{DEFAULT_WEIGHTS_DIR}')."
+            )
+    conf = _read_config_from_dict_or_infer(state, cfg)
+    # Tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(conf["tokenizer_name"], use_fast=True)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
     tokenizer.model_max_length = 1_000_000_000
     vocab_size = tokenizer.vocab_size
+    # Build model
     model = CNA(
+        conf["embed_dim"], conf["num_heads"], conf["expansion_factor"],
+        conf["num_blocks"], conf["radius"], vocab_size
     )
+    # Load state (tolerate projection size mismatch)
     missing, unexpected = model.load_state_dict(state, strict=False)
     if any(k.startswith("proj.") for k in missing):
         with torch.no_grad():
         model.load_state_dict(state, strict=True)
     model.eval()
+    return model, tokenizer, conf["radius"]
+model_cache = {"model": None, "tokenizer": None, "radius": None, "ckpt": None}
+def ensure_model(source_path_or_repo):
+    src = source_path_or_repo or os.environ.get("WEIGHTS_SOURCE") or DEFAULT_WEIGHTS_DIR
+    if model_cache["model"] is None or model_cache["ckpt"] != src:
+        m, tok, rad = load_model(src)
+        model_cache.update({"model": m, "tokenizer": tok, "radius": rad, "ckpt": src})
+# ============================================================
+# Strategy 1 core step (with argmax / sample toggle)
+# ============================================================
 @torch.no_grad()
+def step_strategy1(model, x, mode="argmax", temperature=1.0, exclude_current=True):
+    """One iteration: choose random position, update via argmax or sampling."""
     S = x.shape[1]
     pos = int(torch.randint(0, S, (1,)).item())
     logits_pos = model_logits(model, x)[0, pos]  # [V]
     if mode == "sample":
         cur_tok = int(x[0, pos].item())
         new_tok = sample_from_logits(
             logits_pos,
             temperature=float(temperature),
             current_token=cur_tok,
+            exclude_current=bool(exclude_current),
         )
         x[0, pos] = new_tok
     else:
         x[0, pos] = int(torch.argmax(logits_pos).item())
     return x
+# ============================================================
+# Gradio callbacks
+# ============================================================
+def init_random(src, seqlen, seed):
+    ensure_model(src)
     random.seed(seed); torch.manual_seed(seed)
     V = model_cache["tokenizer"].vocab_size
+    x = torch.randint(0, V, (1, int(seqlen)))
     txt = decode(x[0], model_cache["tokenizer"])
+    return x.tolist(), txt, f"Initialized random sequence (len={int(seqlen)})"
+def init_from_text(src, seqlen, text, seed, pad_mode):
+    ensure_model(src)
     rnd = random.Random(seed)
+    x = to_fixed_len_ids(text or "", model_cache["tokenizer"], int(seqlen), pad_mode=pad_mode, rnd=rnd)
     txt = decode(x[0], model_cache["tokenizer"])
     return x.tolist(), txt, "Initialized from text"
+def append_text(src, state_ids, seqlen, text_to_append, seed):
+    ensure_model(src)
     tok = model_cache["tokenizer"]
     rnd = random.Random(seed)
+    S = int(seqlen)
     if state_ids is None or len(state_ids) == 0:
+        x = to_fixed_len_ids(text_to_append or "", tok, S, pad_mode="random", rnd=rnd)
     else:
         x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
         extra = tok.encode(text_to_append or "", add_special_tokens=False)
         x = torch.cat([x, torch.tensor(extra, dtype=torch.long).unsqueeze(0)], dim=1)
+        if x.shape[1] > S:
+            x = x[:, :S]
+        elif x.shape[1] < S:
+            need = S - x.shape[1]
             V = tok.vocab_size
             pad = torch.tensor([rnd.randrange(V) for _ in range(need)], dtype=torch.long).unsqueeze(0)
             x = torch.cat([x, pad], dim=1)
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Appended text and resized to target length"
+def apply_noise(src, state_ids, seqlen, indices_csv, add_left, add_right, seed):
+    ensure_model(src)
     tok = model_cache["tokenizer"]
+    S = int(seqlen)
     if state_ids is None or len(state_ids) == 0:
         V = tok.vocab_size
+        base = torch.randint(0, V, (1, S))
     else:
         base = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    x = apply_noise_ops(base, tok, indices_csv, int(add_left or 0), int(add_right or 0), S, seed=seed)
     txt = decode(x[0], tok)
     return x.tolist(), txt, "Applied noise brush / prepend / append"
+def step_once(src, state_ids, mode, temperature, exclude_current):
+    ensure_model(src)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return None, "", "No sequence to step — initialize first."
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    x = step_strategy1(model_cache["model"], x, mode=mode, temperature=temperature, exclude_current=exclude_current)
     txt = decode(x[0], tok)
     return x.tolist(), txt, f"Stepped 1 iteration ({mode})"
+def live_denoise(src, state_ids, steps, snap_every, seed, mode, temperature, exclude_current):
+    """Generator: yields (ids, text, status) every snap_every steps & on completion."""
+    ensure_model(src)
     tok = model_cache["tokenizer"]
     if state_ids is None or len(state_ids) == 0:
         return
     random.seed(seed); torch.manual_seed(seed)
     x = torch.tensor(state_ids, dtype=torch.long).unsqueeze(0)
+    total = int(steps); snap = max(1, int(snap_every))
     for t in range(1, total + 1):
+        x = step_strategy1(model_cache["model"], x, mode=mode, temperature=temperature, exclude_current=exclude_current)
         if (t % snap == 0) or (t == total):
             txt = decode(x[0], tok)
             yield x.tolist(), txt, f"Live denoise… step {t}/{total} ({mode})"
+# ============================================================
 # UI
+# ============================================================
+with gr.Blocks(title="CNA — Interactive Denoising") as demo:
     gr.Markdown(
         """
         # CNA — Interactive Denoising (Strategy 1)
+        - **Weights source** can be: a `.pt` file, a folder like `weights_latest/` (safetensors or .pt shards), or a **Hub repo id** `org/model`.
+        - Update rule per step: **argmax** or **sample** (temperature + option to exclude current token).
+        - Tools: Random init, Init from text, Noise brush (select indices, prepend/append noise), Append text, Live denoise.
         """
     )
     # Global settings
+    default_source = os.environ.get("WEIGHTS_SOURCE", DEFAULT_WEIGHTS_DIR if os.path.isdir(DEFAULT_WEIGHTS_DIR) else DEFAULT_CKPT)
     with gr.Row():
+        src = gr.Textbox(value=default_source, label="Weights (file / folder / HF repo id)")
         seqlen = gr.Slider(10, 512, value=100, step=1, label="Sequence length (S)")
         seed = gr.Slider(0, 10000, value=0, step=1, label="Seed")
     status = gr.Markdown("Ready.")
     gr.Markdown("## Mode 1 · Random → Denoise Live")
     with gr.Row():
         btn_random = gr.Button("Initialize Random")
         steps = gr.Slider(1, 2000, value=200, step=1, label="Denoise steps (N)")
         snap_every = gr.Slider(1, 100, value=5, step=1, label="Update every K steps")
+    with gr.Row():
+        update_mode = gr.Radio(choices=["argmax", "sample"], value="argmax", label="Update rule")
+        temperature = gr.Slider(minimum=0.0, maximum=5.0, value=1.0, step=0.05, label="Temperature (sampling)")
+        exclude_current = gr.Checkbox(value=True, label="Exclude current token when sampling")
     with gr.Row():
         btn_step_once = gr.Button("Step Once")
         btn_live = gr.Button("Denoise Live (streaming)")
         btn_append = gr.Button("Append to Current Sequence")
     # --- Wiring ---
+    btn_random.click(init_random, [src, seqlen, seed], [ids_state, current_text, status])
+    btn_init_text.click(init_from_text, [src, seqlen, init_text, seed, pad_mode], [ids_state, current_text, status])
     btn_apply_noise.click(
+        apply_noise, [src, ids_state, seqlen, indices_csv, add_left, add_right, seed],
         [ids_state, current_text, status]
     )
+    btn_append.click(append_text, [src, ids_state, seqlen, append_box, seed], [ids_state, current_text, status])
     btn_step_once.click(
         step_once,
+        [src, ids_state, update_mode, temperature, exclude_current],
         [ids_state, current_text, status]
     )
     btn_live.click(
         live_denoise,
+        [src, ids_state, steps, snap_every, seed, update_mode, temperature, exclude_current],
         [ids_state, current_text, status],
         show_progress=True
     )
 demo.queue().launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 torch --extra-index-url https://download.pytorch.org/whl/cpu
 transformers>=4.41.0
 gradio>=4.31.0

 torch --extra-index-url https://download.pytorch.org/whl/cpu
 transformers>=4.41.0
 gradio>=4.31.0
+safetensors>=0.4.2
+huggingface_hub>=0.23.0