Spaces:

trixyL
/

mnist-diff-demo

Sleeping

App Files Files Community

trixyL commited on Mar 8

Commit

776256b

1 Parent(s): d4ac762

refactor: single sample, intermediate steps

Browse files

Files changed (2) hide show

app.py +13 -9
model.py +93 -0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 import gradio as gr
 import spaces
-from model import generate_grid_image, load_model
 MODEL_READY = False
@@ -16,31 +16,35 @@ def ensure_model_loaded():
 @spaces.GPU
 @torch.inference_mode()
-def predict(label: int, steps: int, num_samples: int):
     ensure_model_loaded()
-    return generate_grid_image(label=label, steps=steps, num_samples=num_samples)
 with gr.Blocks(title="MNIST Diffusion") as demo:
     gr.Markdown("# MNIST Diffusion")
     gr.Markdown(
         "Discrete diffusion model for MNIST digits. "
-        "Sampling uses fixed CFG=2.0, temperature=0.6, top_p=0.99."
     )
-    grid = gr.Image(label="Samples", show_label=True)
     with gr.Row():
         label = gr.Dropdown([str(i) for i in range(10)], value="4", label="Label")
-        steps = gr.Slider(1, 784, value=784, step=1, label="Steps")
-        num_samples = gr.Slider(1, 36, value=16, step=1, label="Samples")
     generate_btn = gr.Button("Generate")
     generate_btn.click(
         fn=predict,
-        inputs=[label, steps, num_samples],
-        outputs=grid,
         scroll_to_output=True,
     )

 import gradio as gr
 import spaces
+from model import iter_trajectory_frames, load_model
 MODEL_READY = False
 @spaces.GPU
 @torch.inference_mode()
+def predict(label: int, steps: int):
     ensure_model_loaded()
+    for idx, (image, step_idx, total_steps, total) in enumerate(
+        iter_trajectory_frames(label=label, steps=steps), start=1
+    ):
+        yield image, f"trajectory checkpoint {idx}/{total} | denoising step {step_idx}/{total_steps}"
 with gr.Blocks(title="MNIST Diffusion") as demo:
     gr.Markdown("# MNIST Diffusion")
     gr.Markdown(
         "Discrete diffusion model for MNIST digits. "
+        "The demo streams one sample as masked tokens are resolved with fixed CFG=2.0, "
+        "temperature=0.6, and top_p=0.99."
     )
+    grid = gr.Image(label="Trajectory", show_label=True)
+    status = gr.Textbox(label="Status")
     with gr.Row():
         label = gr.Dropdown([str(i) for i in range(10)], value="4", label="Label")
+        steps = gr.Slider(32, 784, value=784, step=1, label="Steps")
     generate_btn = gr.Button("Generate")
     generate_btn.click(
         fn=predict,
+        inputs=[label, steps],
+        outputs=[grid, status],
         scroll_to_output=True,
     )

model.py CHANGED Viewed

@@ -40,6 +40,7 @@ INFER_CONFIG = {
     "top_p": 0.99,
     "cfg_scale": 2.0,
     "remasking": "random",
 }
 DTYPES = {
@@ -779,6 +780,18 @@ def generate_images(label: int, steps: int, num_samples: int) -> List[Image.Imag
     return images
 def _grid_dims(num_samples: int) -> Tup[int, int]:
     cols = int(np.ceil(np.sqrt(num_samples)))
     rows = int(np.ceil(num_samples / cols))
@@ -799,3 +812,83 @@ def generate_grid_image(label: int, steps: int, num_samples: int) -> Image.Image
         c = idx % cols
         grid.paste(img, (c * w, r * h))
     return grid

     "top_p": 0.99,
     "cfg_scale": 2.0,
     "remasking": "random",
+    "trajectory_checkpoints": 32,
 }
 DTYPES = {
     return images
+def _to_image(tokens: torch.Tensor) -> Image.Image:
+    h = int(MODEL_CONFIG["image_height"])
+    w = int(MODEL_CONFIG["image_width"])
+    pixel_bins = int(MODEL_CONFIG["pixel_bins"])
+    scale = 10
+    arr = tokens.detach().cpu().to(torch.int32).numpy().reshape(h, w)
+    img = Image.fromarray(dequantize_tokens_to_uint8(arr, pixel_bins=pixel_bins), mode="L")
+    if scale > 1:
+        img = img.resize((w * scale, h * scale), resample=Image.NEAREST)
+    return img
 def _grid_dims(num_samples: int) -> Tup[int, int]:
     cols = int(np.ceil(np.sqrt(num_samples)))
     rows = int(np.ceil(num_samples / cols))
         c = idx % cols
         grid.paste(img, (c * w, r * h))
     return grid
+@torch.inference_mode()
+def iter_trajectory_frames(label: int, steps: int):
+    model, device, _ = load_model()
+    label = int(label)
+    steps = max(32, int(steps))
+    batch_size = 1
+    context = torch.full((batch_size,), label, device=device, dtype=torch.long)
+    prompt_len = 0
+    gen_length = int(MODEL_CONFIG["context_length"])
+    block_length = int(INFER_CONFIG["block_length"])
+    total_len = prompt_len + gen_length
+    blocks = max(1, int(np.ceil(gen_length / block_length)))
+    if steps < blocks:
+        steps = blocks
+    base_steps = steps // blocks
+    extra_steps = steps % blocks
+    x = torch.full((batch_size, total_len), fill_value=int(MODEL_CONFIG["mask_token_id"]), device=device, dtype=torch.long)
+    uncond_context = torch.full((batch_size,), int(MODEL_CONFIG["null_label_id"]), device=device, dtype=torch.long)
+    checkpoint_count = max(32, int(INFER_CONFIG["trajectory_checkpoints"]))
+    checkpoint_indices = np.linspace(1, steps, num=checkpoint_count, dtype=int).tolist()
+    checkpoint_indices = sorted(set(max(1, min(steps, idx)) for idx in checkpoint_indices))
+    checkpoint_set = set(checkpoint_indices)
+    captured = 0
+    global_step = 0
+    for block_idx in range(blocks):
+        block_start = prompt_len + block_idx * block_length
+        block_end = min(block_start + block_length, total_len)
+        block_steps = base_steps + (1 if block_idx < extra_steps else 0)
+        if block_steps <= 0:
+            block_steps = 1
+        block_mask = x[:, block_start:block_end] == int(MODEL_CONFIG["mask_token_id"])
+        transfer_counts = compute_transfer_schedule(block_mask, block_steps)
+        for step_idx in range(block_steps):
+            global_step += 1
+            mask_index = x == int(MODEL_CONFIG["mask_token_id"])
+            cfg_scale = float(INFER_CONFIG["cfg_scale"])
+            if cfg_scale > 0.0:
+                cond_logits = model(x, context=context)
+                uncond_logits = model(x, context=uncond_context)
+                logits = uncond_logits + (cfg_scale + 1.0) * (cond_logits - uncond_logits)
+            else:
+                logits = model(x, context=context)
+            probs = softmax(logits, dim=-1)
+            probs = top_p_filter(probs, float(INFER_CONFIG["top_p"]))
+            logits = torch.where(probs > 0, logits, torch.full_like(logits, float("-inf")))
+            logits_with_noise = add_gumbel_noise(logits, float(INFER_CONFIG["temperature"]), generator=None)
+            predictions = torch.argmax(logits_with_noise, dim=-1)
+            predictions = torch.where(mask_index, predictions, x)
+            confidence = torch.rand((batch_size, total_len), device=device, dtype=torch.float32)
+            confidence[:, block_end:] = float("-inf")
+            confidence = torch.where(mask_index, confidence, torch.full_like(confidence, float("-inf")))
+            transfer_mask = torch.zeros_like(mask_index)
+            for b in range(batch_size):
+                k = int(transfer_counts[b, step_idx].item())
+                if k <= 0:
+                    continue
+                available = confidence[b] > float("-inf")
+                available_count = int(available.sum().item())
+                if available_count == 0:
+                    continue
+                if available_count < k:
+                    k = available_count
+                topk_indices = torch.topk(confidence[b], k=k, dim=-1).indices
+                transfer_mask[b, topk_indices] = True
+            x = torch.where(transfer_mask, predictions, x)
+            if global_step in checkpoint_set:
+                captured += 1
+                yield _to_image(x[0]), global_step, steps, len(checkpoint_indices)