Add files using upload-large-folder tool

Browse files

Files changed (10) hide show

README.md +25 -0
conditioning_encoder/config.json +11 -8
demo_data/0_ssl_feat.npy +3 -0
demo_images/input.jpeg +0 -0
demo_images/output.jpeg +0 -0
model_index.json +1 -2
pipeline_zoomldm.py +21 -2
run_demo_inference.py +62 -0
unet/config.json +25 -22
vae/config.json +24 -19

README.md CHANGED Viewed

@@ -9,12 +9,23 @@ tags:
   - latent-diffusion
   - custom-pipeline
   - arxiv:2411.16969
 ---
 # BiliSakura/ZoomLDM-naip
 Diffusers-format **NAIP** variant of ZoomLDM with a bundled custom pipeline and local `ldm` modules.
 ## Model Description
 - **Architecture:** ZoomLDM latent diffusion pipeline (`UNet + VAE + conditioning encoder`)
@@ -60,6 +71,20 @@ out = pipe(
 images = out.images
 ```
 ## Limitations
 - Requires correctly precomputed NAIP conditioning features.

   - latent-diffusion
   - custom-pipeline
   - arxiv:2411.16969
+widget:
+  - src: demo_images/input.jpeg
+    prompt: NAIP sample conditioned on demo SSL feature (mag=0)
+    output:
+      url: demo_images/output.jpeg
 ---
 # BiliSakura/ZoomLDM-naip
 Diffusers-format **NAIP** variant of ZoomLDM with a bundled custom pipeline and local `ldm` modules.
+## Known Issue
+- Current NAIP generations may look incorrect (BRCA-like) even with valid NAIP demo inputs.
+- Root cause: the upstream raw checkpoints currently available for `naip` and `brca` are byte-identical, so conversion reproduces the same model weights.
+- This repo will be updated once a distinct NAIP checkpoint is available.
 ## Model Description
 - **Architecture:** ZoomLDM latent diffusion pipeline (`UNet + VAE + conditioning encoder`)
 images = out.images
 ```
+## Demo Generation (dataset-backed)
+This repo includes `run_demo_inference.py`, which uses local repo assets only:
+- image: `demo_images/input.jpeg`
+- SSL feature: `demo_data/0_ssl_feat.npy`
+- magnification label: `2` (3x level)
+Run:
+```bash
+python run_demo_inference.py
+```
 ## Limitations
 - Requires correctly precomputed NAIP conditioning features.

conditioning_encoder/config.json CHANGED Viewed

@@ -1,10 +1,13 @@
 {
-  "feat_key": "ssl_feat",
-  "mag_key": "mag",
-  "num_layers": 12,
-  "input_channels": 1024,
-  "hidden_channels": 512,
-  "vit_mlp_dim": 2048,
-  "p_uncond": 0.1,
-  "mag_levels": 8
 }

 {
+  "target": "ldm.modules.encoders.modules.EmbeddingViT2_5",
+  "params": {
+    "feat_key": "ssl_feat",
+    "mag_key": "mag",
+    "num_layers": 12,
+    "input_channels": 1024,
+    "hidden_channels": 512,
+    "vit_mlp_dim": 2048,
+    "p_uncond": 0.1,
+    "mag_levels": 8
+  }
 }

demo_data/0_ssl_feat.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:faf910fc04b10c4e1014da8c3b2b3c8375b7955b7d39cf94de2c5398af8756d4
+size 65664

demo_images/input.jpeg ADDED Viewed

demo_images/output.jpeg ADDED Viewed

model_index.json CHANGED Viewed

@@ -18,6 +18,5 @@
     "ZoomLDMPipeline"
   ],
   "scale_factor": 1.0,
-  "conditioning_key": "crossattn",
-  "variant": "naip"
 }

     "ZoomLDMPipeline"
   ],
   "scale_factor": 1.0,
+  "conditioning_key": "crossattn"
 }

pipeline_zoomldm.py CHANGED Viewed

@@ -44,6 +44,10 @@ def _ensure_local_ldm_on_path():
 _ensure_local_ldm_on_path()
 def _get_class(target: str):
@@ -299,6 +303,17 @@ class ZoomLDMPipeline(DiffusionPipeline):
             path = Path(snapshot_download(pretrained_model_name_or_path))
         path = path.resolve()
         def _is_diffusers_model_dir(candidate: Path) -> bool:
             required = [
@@ -333,8 +348,6 @@ class ZoomLDMPipeline(DiffusionPipeline):
                 )
             model_dir = candidate_dirs[0]
-        scheduler = DDIMScheduler.from_pretrained(model_dir / "scheduler")
         _TARGETS = {
             "unet": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
             "vae": "ldm.models.autoencoder.VQModelInterface",
@@ -381,6 +394,12 @@ class ZoomLDMPipeline(DiffusionPipeline):
             component.eval()
             return component
         unet = load_custom_component("unet")
         vae = load_custom_component("vae")
         conditioning_encoder = load_custom_component("conditioning_encoder")

 _ensure_local_ldm_on_path()
+# Register module alias so diffusers component loading can resolve
+# model_index entries like "pipeline_zoomldm" even when this file is loaded
+# under a dynamic module name (e.g. diffusers_modules.local.*).
+sys.modules["pipeline_zoomldm"] = sys.modules[__name__]
 def _get_class(target: str):
             path = Path(snapshot_download(pretrained_model_name_or_path))
         path = path.resolve()
+        component_names = {"unet", "vae", "conditioning_encoder"}
+        # When diffusers loads components, it may call this class with a path like ".../unet".
+        requested_component = None
+        if path.name in component_names and (path / "config.json").exists():
+            requested_component = path.name
+            path = path.parent
+        # Also support explicit component requests via subfolder.
+        subfolder = kwargs.pop("subfolder", None)
+        if requested_component is None and subfolder in component_names:
+            requested_component = subfolder
         def _is_diffusers_model_dir(candidate: Path) -> bool:
             required = [
                 )
             model_dir = candidate_dirs[0]
         _TARGETS = {
             "unet": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
             "vae": "ldm.models.autoencoder.VQModelInterface",
             component.eval()
             return component
+        # Diffusers component-loading path: return a single module.
+        if requested_component is not None:
+            return load_custom_component(requested_component)
+        scheduler = DDIMScheduler.from_pretrained(model_dir / "scheduler")
         unet = load_custom_component("unet")
         vae = load_custom_component("vae")
         conditioning_encoder = load_custom_component("conditioning_encoder")

run_demo_inference.py ADDED Viewed

	@@ -0,0 +1,62 @@

+#!/usr/bin/env python3
+"""Run ZoomLDM-NAIP demo inference using local demo assets."""
+from pathlib import Path
+import numpy as np
+import torch
+from diffusers import DiffusionPipeline
+def preprocess_naip_ssl(npy_path: Path) -> torch.Tensor:
+    # Copied from dataset material:
+    #   rearrange (n_embed, 1024) -> (1024, h, h), normalize per-feature.
+    feat = np.load(npy_path).astype(np.float32)  # (n_embed, 1024) or (1024,)
+    if feat.ndim == 1:
+        feat = feat[:, None]
+    mean = feat.mean(axis=0, keepdims=True)
+    std = feat.std(axis=0, keepdims=True)
+    feat = (feat - mean) / (std + 1e-8)
+    h = int(np.sqrt(feat.shape[0]))
+    feat = feat.reshape(h, h, feat.shape[1]).transpose(2, 0, 1)  # (1024, h, h)
+    return torch.from_numpy(feat).float()
+def main() -> None:
+    repo = Path(__file__).resolve().parent
+    demo_dir = repo / "demo_images"
+    demo_data = repo / "demo_data"
+    demo_dir.mkdir(exist_ok=True)
+    # Use repo-local demo assets only (3x sample -> magnification label 2).
+    src_img = demo_dir / "input.jpeg"
+    src_feat = demo_data / "0_ssl_feat.npy"
+    if not src_img.exists():
+        raise FileNotFoundError(f"Missing demo input image: {src_img}")
+    if not src_feat.exists():
+        raise FileNotFoundError(f"Missing demo SSL feature: {src_feat}")
+    ssl_feat = preprocess_naip_ssl(src_feat).unsqueeze(0).to("cuda")  # (1, 1024, h, h)
+    magnification = torch.tensor([2], device="cuda", dtype=torch.long)
+    pipe = DiffusionPipeline.from_pretrained(
+        str(repo),
+        custom_pipeline=str(repo / "pipeline_zoomldm.py"),
+        trust_remote_code=True,
+        local_files_only=True,
+    ).to("cuda")
+    out = pipe(
+        ssl_features=ssl_feat,
+        magnification=magnification,
+        num_inference_steps=50,
+        guidance_scale=2.0,
+        generator=torch.Generator(device="cuda").manual_seed(42),
+    )
+    out.images[0].save(demo_dir / "output.jpeg")
+    print(f"Saved {demo_dir / 'input.jpeg'}")
+    print(f"Saved {demo_dir / 'output.jpeg'}")
+if __name__ == "__main__":
+    main()

unet/config.json CHANGED Viewed

@@ -1,24 +1,27 @@
 {
-  "use_checkpoint": true,
-  "use_fp16": true,
-  "image_size": 64,
-  "in_channels": 3,
-  "out_channels": 3,
-  "model_channels": 192,
-  "attention_resolutions": [
-    8,
-    4,
-    2
-  ],
-  "num_res_blocks": 2,
-  "channel_mult": [
-    1,
-    2,
-    3,
-    5
-  ],
-  "num_heads": 1,
-  "use_spatial_transformer": true,
-  "transformer_depth": 1,
-  "context_dim": 512
 }

 {
+  "target": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
+  "params": {
+    "use_checkpoint": true,
+    "use_fp16": true,
+    "image_size": 64,
+    "in_channels": 3,
+    "out_channels": 3,
+    "model_channels": 192,
+    "attention_resolutions": [
+      8,
+      4,
+      2
+    ],
+    "num_res_blocks": 2,
+    "channel_mult": [
+      1,
+      2,
+      3,
+      5
+    ],
+    "num_heads": 1,
+    "use_spatial_transformer": true,
+    "transformer_depth": 1,
+    "context_dim": 512
+  }
 }

vae/config.json CHANGED Viewed

@@ -1,21 +1,26 @@
 {
-  "embed_dim": 3,
-  "n_embed": 8192,
-  "ddconfig": {
-    "double_z": false,
-    "z_channels": 3,
-    "resolution": 256,
-    "in_channels": 3,
-    "out_ch": 3,
-    "ch": 128,
-    "ch_mult": [
-      1,
-      2,
-      4
-    ],
-    "num_res_blocks": 2,
-    "attn_resolutions": [],
-    "dropout": 0.0
-  },
-  "lossconfig": {}
 }

 {
+  "target": "ldm.models.autoencoder.VQModelInterface",
+  "params": {
+    "embed_dim": 3,
+    "n_embed": 8192,
+    "ddconfig": {
+      "double_z": false,
+      "z_channels": 3,
+      "resolution": 256,
+      "in_channels": 3,
+      "out_ch": 3,
+      "ch": 128,
+      "ch_mult": [
+        1,
+        2,
+        4
+      ],
+      "num_res_blocks": 2,
+      "attn_resolutions": [],
+      "dropout": 0.0
+    },
+    "lossconfig": {
+      "target": "torch.nn.Identity"
+    }
+  }
 }