Upload distilled WSI diffusion model package

Browse files

Files changed (3) hide show

.gitattributes +1 -0
README.md +121 -0
compare.png +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+compare.png filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -76,6 +76,127 @@ latents = sample_student_trajectory(
 img = decode_latents_to_images(pipeline, latents)[0]
 ```
 ## Notes
 - This is a distilled student checkpoint intended for research.

 img = decode_latents_to_images(pipeline, latents)[0]
 ```
+## Generate In 3 Steps
+1. Load base PixCell pipeline + this distilled student.
+2. Feed one UNI feature (`[1,1,1536]`) as condition.
+3. Sample with a small step count (for example, 4) and decode.
+## Teacher vs Student (Visualization + Timing)
+`compare.png` (left = teacher, right = student):
+![Teacher vs Student](./compare.png)
+Use the following snippet to reproduce side-by-side image and speedup numbers:
+```python
+import time
+import random
+import torch
+import numpy as np
+from PIL import Image
+from IPython.display import display
+from models.diffusion import (
+    make_uncond_embedding,
+    scheduler_rollout,
+    decode_latents_to_images,
+)
+idx = random.randrange(len(test_ds))
+uni_feat = test_ds[idx]                 # [1536]
+cond = uni_feat.unsqueeze(0).unsqueeze(1).to(device=device, dtype=torch.float32)  # [1,1,1536]
+# cond: [1,1,1536] from test manifest (as in previous cell)
+# student, teacher, pipeline already loaded
+student.eval()
+teacher.eval()
+latent_channels = int(pipeline.vae.config.latent_channels)
+latent_size = 32
+steps_student = 4
+steps_teacher = 35
+guidance_student = 1.0
+guidance_teacher = 3.0
+# fixed noise for fair comparison
+g = torch.Generator(device=device)
+g.manual_seed(1234)
+xT = torch.randn(
+    (1, latent_channels, latent_size, latent_size),
+    generator=g,
+    device=device,
+    dtype=torch.float32,   # base noise dtype
+)
+def sync_if_cuda(dev):
+    if dev.type == "cuda":
+        torch.cuda.synchronize(dev)
+with torch.no_grad():
+    # teacher/original PixCell timing
+    sync_if_cuda(device)
+    t0 = time.perf_counter()
+    _, teacher_states = scheduler_rollout(
+        model=teacher,
+        pipeline=pipeline,
+        xT=xT.to(dtype=next(teacher.parameters()).dtype),
+        cond=cond.to(dtype=next(teacher.parameters()).dtype),
+        num_steps=steps_teacher,
+        guidance_scale=guidance_teacher,
+    )
+    sync_if_cuda(device)
+    t_teacher_rollout = time.perf_counter() - t0
+    lat_teacher = teacher_states[-1]
+    # student timing
+    sync_if_cuda(device)
+    t0 = time.perf_counter()
+    _, student_states = scheduler_rollout(
+        model=student,
+        pipeline=pipeline,
+        xT=xT.to(dtype=next(student.parameters()).dtype),
+        cond=cond.to(dtype=next(student.parameters()).dtype),
+        num_steps=steps_student,
+        guidance_scale=guidance_student,
+    )
+    sync_if_cuda(device)
+    t_student_rollout = time.perf_counter() - t0
+    lat_student = student_states[-1]
+    # teacher decode timing
+    sync_if_cuda(device)
+    t0 = time.perf_counter()
+    img_teacher = decode_latents_to_images(pipeline, lat_teacher)[0]
+    sync_if_cuda(device)
+    t_teacher_decode = time.perf_counter() - t0
+    # student decode timing
+    sync_if_cuda(device)
+    t0 = time.perf_counter()
+    img_student = decode_latents_to_images(pipeline, lat_student)[0]
+    sync_if_cuda(device)
+    t_student_decode = time.perf_counter() - t0
+arr_t = (img_teacher.permute(1,2,0).cpu().numpy() * 255).astype(np.uint8)
+arr_s = (img_student.permute(1,2,0).cpu().numpy() * 255).astype(np.uint8)
+display(Image.fromarray(np.concatenate([arr_t, arr_s], axis=1)))  # left=teacher, right=student
+teacher_total = t_teacher_rollout + t_teacher_decode
+student_total = t_student_rollout + t_student_decode
+print(f"Teacher rollout ({steps_teacher} steps): {t_teacher_rollout:.4f}s")
+print(f"Student rollout ({steps_student} steps): {t_student_rollout:.4f}s")
+print(f"Teacher decode: {t_teacher_decode:.4f}s")
+print(f"Student decode: {t_student_decode:.4f}s")
+print(f"Teacher total: {teacher_total:.4f}s")
+print(f"Student total: {student_total:.4f}s")
+print(f"Rollout speedup: {t_teacher_rollout / max(t_student_rollout, 1e-9):.2f}x")
+print(f"End-to-end speedup: {teacher_total / max(student_total, 1e-9):.2f}x")
+```
 ## Notes
 - This is a distilled student checkpoint intended for research.

compare.png ADDED Viewed

Git LFS Details

SHA256: ce7868abb9ca039e024b10da7dc309532b47d7223e32828187642d194ec1a68d
Pointer size: 131 Bytes
Size of remote file: 281 kB