BiliSakura commited on
Commit
241472e
·
verified ·
1 Parent(s): e1991a4

Add files using upload-large-folder tool

Browse files
README.md CHANGED
@@ -9,12 +9,23 @@ tags:
9
  - latent-diffusion
10
  - custom-pipeline
11
  - arxiv:2411.16969
 
 
 
 
 
12
  ---
13
 
14
  # BiliSakura/ZoomLDM-naip
15
 
16
  Diffusers-format **NAIP** variant of ZoomLDM with a bundled custom pipeline and local `ldm` modules.
17
 
 
 
 
 
 
 
18
  ## Model Description
19
 
20
  - **Architecture:** ZoomLDM latent diffusion pipeline (`UNet + VAE + conditioning encoder`)
@@ -60,6 +71,20 @@ out = pipe(
60
  images = out.images
61
  ```
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ## Limitations
64
 
65
  - Requires correctly precomputed NAIP conditioning features.
 
9
  - latent-diffusion
10
  - custom-pipeline
11
  - arxiv:2411.16969
12
+ widget:
13
+ - src: demo_images/input.jpeg
14
+ prompt: NAIP sample conditioned on demo SSL feature (mag=0)
15
+ output:
16
+ url: demo_images/output.jpeg
17
  ---
18
 
19
  # BiliSakura/ZoomLDM-naip
20
 
21
  Diffusers-format **NAIP** variant of ZoomLDM with a bundled custom pipeline and local `ldm` modules.
22
 
23
+ ## Known Issue
24
+
25
+ - Current NAIP generations may look incorrect (BRCA-like) even with valid NAIP demo inputs.
26
+ - Root cause: the upstream raw checkpoints currently available for `naip` and `brca` are byte-identical, so conversion reproduces the same model weights.
27
+ - This repo will be updated once a distinct NAIP checkpoint is available.
28
+
29
  ## Model Description
30
 
31
  - **Architecture:** ZoomLDM latent diffusion pipeline (`UNet + VAE + conditioning encoder`)
 
71
  images = out.images
72
  ```
73
 
74
+ ## Demo Generation (dataset-backed)
75
+
76
+ This repo includes `run_demo_inference.py`, which uses local repo assets only:
77
+
78
+ - image: `demo_images/input.jpeg`
79
+ - SSL feature: `demo_data/0_ssl_feat.npy`
80
+ - magnification label: `2` (3x level)
81
+
82
+ Run:
83
+
84
+ ```bash
85
+ python run_demo_inference.py
86
+ ```
87
+
88
  ## Limitations
89
 
90
  - Requires correctly precomputed NAIP conditioning features.
conditioning_encoder/config.json CHANGED
@@ -1,10 +1,13 @@
1
  {
2
- "feat_key": "ssl_feat",
3
- "mag_key": "mag",
4
- "num_layers": 12,
5
- "input_channels": 1024,
6
- "hidden_channels": 512,
7
- "vit_mlp_dim": 2048,
8
- "p_uncond": 0.1,
9
- "mag_levels": 8
 
 
 
10
  }
 
1
  {
2
+ "target": "ldm.modules.encoders.modules.EmbeddingViT2_5",
3
+ "params": {
4
+ "feat_key": "ssl_feat",
5
+ "mag_key": "mag",
6
+ "num_layers": 12,
7
+ "input_channels": 1024,
8
+ "hidden_channels": 512,
9
+ "vit_mlp_dim": 2048,
10
+ "p_uncond": 0.1,
11
+ "mag_levels": 8
12
+ }
13
  }
demo_data/0_ssl_feat.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf910fc04b10c4e1014da8c3b2b3c8375b7955b7d39cf94de2c5398af8756d4
3
+ size 65664
demo_images/input.jpeg ADDED
demo_images/output.jpeg ADDED
model_index.json CHANGED
@@ -18,6 +18,5 @@
18
  "ZoomLDMPipeline"
19
  ],
20
  "scale_factor": 1.0,
21
- "conditioning_key": "crossattn",
22
- "variant": "naip"
23
  }
 
18
  "ZoomLDMPipeline"
19
  ],
20
  "scale_factor": 1.0,
21
+ "conditioning_key": "crossattn"
 
22
  }
pipeline_zoomldm.py CHANGED
@@ -44,6 +44,10 @@ def _ensure_local_ldm_on_path():
44
 
45
 
46
  _ensure_local_ldm_on_path()
 
 
 
 
47
 
48
 
49
  def _get_class(target: str):
@@ -299,6 +303,17 @@ class ZoomLDMPipeline(DiffusionPipeline):
299
  path = Path(snapshot_download(pretrained_model_name_or_path))
300
 
301
  path = path.resolve()
 
 
 
 
 
 
 
 
 
 
 
302
 
303
  def _is_diffusers_model_dir(candidate: Path) -> bool:
304
  required = [
@@ -333,8 +348,6 @@ class ZoomLDMPipeline(DiffusionPipeline):
333
  )
334
  model_dir = candidate_dirs[0]
335
 
336
- scheduler = DDIMScheduler.from_pretrained(model_dir / "scheduler")
337
-
338
  _TARGETS = {
339
  "unet": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
340
  "vae": "ldm.models.autoencoder.VQModelInterface",
@@ -381,6 +394,12 @@ class ZoomLDMPipeline(DiffusionPipeline):
381
  component.eval()
382
  return component
383
 
 
 
 
 
 
 
384
  unet = load_custom_component("unet")
385
  vae = load_custom_component("vae")
386
  conditioning_encoder = load_custom_component("conditioning_encoder")
 
44
 
45
 
46
  _ensure_local_ldm_on_path()
47
+ # Register module alias so diffusers component loading can resolve
48
+ # model_index entries like "pipeline_zoomldm" even when this file is loaded
49
+ # under a dynamic module name (e.g. diffusers_modules.local.*).
50
+ sys.modules["pipeline_zoomldm"] = sys.modules[__name__]
51
 
52
 
53
  def _get_class(target: str):
 
303
  path = Path(snapshot_download(pretrained_model_name_or_path))
304
 
305
  path = path.resolve()
306
+ component_names = {"unet", "vae", "conditioning_encoder"}
307
+ # When diffusers loads components, it may call this class with a path like ".../unet".
308
+ requested_component = None
309
+ if path.name in component_names and (path / "config.json").exists():
310
+ requested_component = path.name
311
+ path = path.parent
312
+
313
+ # Also support explicit component requests via subfolder.
314
+ subfolder = kwargs.pop("subfolder", None)
315
+ if requested_component is None and subfolder in component_names:
316
+ requested_component = subfolder
317
 
318
  def _is_diffusers_model_dir(candidate: Path) -> bool:
319
  required = [
 
348
  )
349
  model_dir = candidate_dirs[0]
350
 
 
 
351
  _TARGETS = {
352
  "unet": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
353
  "vae": "ldm.models.autoencoder.VQModelInterface",
 
394
  component.eval()
395
  return component
396
 
397
+ # Diffusers component-loading path: return a single module.
398
+ if requested_component is not None:
399
+ return load_custom_component(requested_component)
400
+
401
+ scheduler = DDIMScheduler.from_pretrained(model_dir / "scheduler")
402
+
403
  unet = load_custom_component("unet")
404
  vae = load_custom_component("vae")
405
  conditioning_encoder = load_custom_component("conditioning_encoder")
run_demo_inference.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Run ZoomLDM-NAIP demo inference using local demo assets."""
3
+
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ import torch
8
+ from diffusers import DiffusionPipeline
9
+
10
+
11
+ def preprocess_naip_ssl(npy_path: Path) -> torch.Tensor:
12
+ # Copied from dataset material:
13
+ # rearrange (n_embed, 1024) -> (1024, h, h), normalize per-feature.
14
+ feat = np.load(npy_path).astype(np.float32) # (n_embed, 1024) or (1024,)
15
+ if feat.ndim == 1:
16
+ feat = feat[:, None]
17
+ mean = feat.mean(axis=0, keepdims=True)
18
+ std = feat.std(axis=0, keepdims=True)
19
+ feat = (feat - mean) / (std + 1e-8)
20
+ h = int(np.sqrt(feat.shape[0]))
21
+ feat = feat.reshape(h, h, feat.shape[1]).transpose(2, 0, 1) # (1024, h, h)
22
+ return torch.from_numpy(feat).float()
23
+
24
+
25
+ def main() -> None:
26
+ repo = Path(__file__).resolve().parent
27
+ demo_dir = repo / "demo_images"
28
+ demo_data = repo / "demo_data"
29
+ demo_dir.mkdir(exist_ok=True)
30
+
31
+ # Use repo-local demo assets only (3x sample -> magnification label 2).
32
+ src_img = demo_dir / "input.jpeg"
33
+ src_feat = demo_data / "0_ssl_feat.npy"
34
+ if not src_img.exists():
35
+ raise FileNotFoundError(f"Missing demo input image: {src_img}")
36
+ if not src_feat.exists():
37
+ raise FileNotFoundError(f"Missing demo SSL feature: {src_feat}")
38
+
39
+ ssl_feat = preprocess_naip_ssl(src_feat).unsqueeze(0).to("cuda") # (1, 1024, h, h)
40
+ magnification = torch.tensor([2], device="cuda", dtype=torch.long)
41
+
42
+ pipe = DiffusionPipeline.from_pretrained(
43
+ str(repo),
44
+ custom_pipeline=str(repo / "pipeline_zoomldm.py"),
45
+ trust_remote_code=True,
46
+ local_files_only=True,
47
+ ).to("cuda")
48
+
49
+ out = pipe(
50
+ ssl_features=ssl_feat,
51
+ magnification=magnification,
52
+ num_inference_steps=50,
53
+ guidance_scale=2.0,
54
+ generator=torch.Generator(device="cuda").manual_seed(42),
55
+ )
56
+ out.images[0].save(demo_dir / "output.jpeg")
57
+ print(f"Saved {demo_dir / 'input.jpeg'}")
58
+ print(f"Saved {demo_dir / 'output.jpeg'}")
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()
unet/config.json CHANGED
@@ -1,24 +1,27 @@
1
  {
2
- "use_checkpoint": true,
3
- "use_fp16": true,
4
- "image_size": 64,
5
- "in_channels": 3,
6
- "out_channels": 3,
7
- "model_channels": 192,
8
- "attention_resolutions": [
9
- 8,
10
- 4,
11
- 2
12
- ],
13
- "num_res_blocks": 2,
14
- "channel_mult": [
15
- 1,
16
- 2,
17
- 3,
18
- 5
19
- ],
20
- "num_heads": 1,
21
- "use_spatial_transformer": true,
22
- "transformer_depth": 1,
23
- "context_dim": 512
 
 
 
24
  }
 
1
  {
2
+ "target": "ldm.modules.diffusionmodules.openaimodel.UNetModel",
3
+ "params": {
4
+ "use_checkpoint": true,
5
+ "use_fp16": true,
6
+ "image_size": 64,
7
+ "in_channels": 3,
8
+ "out_channels": 3,
9
+ "model_channels": 192,
10
+ "attention_resolutions": [
11
+ 8,
12
+ 4,
13
+ 2
14
+ ],
15
+ "num_res_blocks": 2,
16
+ "channel_mult": [
17
+ 1,
18
+ 2,
19
+ 3,
20
+ 5
21
+ ],
22
+ "num_heads": 1,
23
+ "use_spatial_transformer": true,
24
+ "transformer_depth": 1,
25
+ "context_dim": 512
26
+ }
27
  }
vae/config.json CHANGED
@@ -1,21 +1,26 @@
1
  {
2
- "embed_dim": 3,
3
- "n_embed": 8192,
4
- "ddconfig": {
5
- "double_z": false,
6
- "z_channels": 3,
7
- "resolution": 256,
8
- "in_channels": 3,
9
- "out_ch": 3,
10
- "ch": 128,
11
- "ch_mult": [
12
- 1,
13
- 2,
14
- 4
15
- ],
16
- "num_res_blocks": 2,
17
- "attn_resolutions": [],
18
- "dropout": 0.0
19
- },
20
- "lossconfig": {}
 
 
 
 
 
21
  }
 
1
  {
2
+ "target": "ldm.models.autoencoder.VQModelInterface",
3
+ "params": {
4
+ "embed_dim": 3,
5
+ "n_embed": 8192,
6
+ "ddconfig": {
7
+ "double_z": false,
8
+ "z_channels": 3,
9
+ "resolution": 256,
10
+ "in_channels": 3,
11
+ "out_ch": 3,
12
+ "ch": 128,
13
+ "ch_mult": [
14
+ 1,
15
+ 2,
16
+ 4
17
+ ],
18
+ "num_res_blocks": 2,
19
+ "attn_resolutions": [],
20
+ "dropout": 0.0
21
+ },
22
+ "lossconfig": {
23
+ "target": "torch.nn.Identity"
24
+ }
25
+ }
26
  }