BiliSakura commited on 20 days ago

Commit

01176da

verified ·

1 Parent(s): 1af9de2

Add files using upload-large-folder tool

Browse files

Files changed (31) hide show

README.md +82 -0
cd_head/cdd-50-100-400-650/config.json +25 -0
cd_head/cdd-50-100-400-650/diffusion_pytorch_model.safetensors +3 -0
cd_head/cdd-50-100-400/config.json +24 -0
cd_head/cdd-50-100-400/diffusion_pytorch_model.safetensors +3 -0
cd_head/cdd-50-100/config.json +23 -0
cd_head/cdd-50-100/diffusion_pytorch_model.safetensors +3 -0
cd_head/dsifn-50-100-400-650/config.json +25 -0
cd_head/dsifn-50-100-400-650/diffusion_pytorch_model.safetensors +3 -0
cd_head/dsifn-50-100-400/config.json +24 -0
cd_head/dsifn-50-100-400/diffusion_pytorch_model.safetensors +3 -0
cd_head/dsifn-50-100/config.json +23 -0
cd_head/dsifn-50-100/diffusion_pytorch_model.safetensors +3 -0
cd_head/levir-50-100-400-650/config.json +25 -0
cd_head/levir-50-100-400-650/diffusion_pytorch_model.safetensors +3 -0
cd_head/levir-50-100-400/config.json +24 -0
cd_head/levir-50-100-400/diffusion_pytorch_model.safetensors +3 -0
cd_head/levir-50-100/config.json +23 -0
cd_head/levir-50-100/diffusion_pytorch_model.safetensors +3 -0
cd_head/whu-50-100-400-650/config.json +25 -0
cd_head/whu-50-100-400-650/diffusion_pytorch_model.safetensors +3 -0
cd_head/whu-50-100-400/config.json +24 -0
cd_head/whu-50-100-400/diffusion_pytorch_model.safetensors +3 -0
cd_head/whu-50-100/config.json +23 -0
cd_head/whu-50-100/diffusion_pytorch_model.safetensors +3 -0
model_index.json +12 -0
pipeline.py +518 -0
scheduler/scheduler_config.json +19 -0
unet/config.json +23 -0
unet/diffusion_pytorch_model.safetensors +3 -0
unet/unet.py +495 -0

README.md ADDED Viewed

	@@ -0,0 +1,82 @@

+---
+license: mit
+tags:
+  - diffusers
+  - ddpm-cd
+  - change-detection
+  - remote-sensing
+---
+# BiliSakura/ddpm-cd
+**Consolidated DDPM-CD change detection** — Single repo with shared UNet backbone and multiple cd_head variants (trained on different datasets and timestep configs).
+## Model Structure
+- **Backbone**: Shared SR3-style UNet (same across all variants)
+- **cd_head**: Dataset-specific change detection heads in `cd_head/{variant}/`
+### Available cd_head Variants
+| Variant | Dataset | Timesteps | Path |
+|---------|---------|-----------|------|
+| cdd-50-100 | CDD | [50, 100] | `cd_head/cdd-50-100/` |
+| cdd-50-100-400 | CDD | [50, 100, 400] | `cd_head/cdd-50-100-400/` |
+| cdd-50-100-400-650 | CDD | [50, 100, 400, 650] | `cd_head/cdd-50-100-400-650/` |
+| dsifn-50-100 | DSIFN | [50, 100] | `cd_head/dsifn-50-100/` |
+| dsifn-50-100-400 | DSIFN | [50, 100, 400] | `cd_head/dsifn-50-100-400/` |
+| dsifn-50-100-400-650 | DSIFN | [50, 100, 400, 650] | `cd_head/dsifn-50-100-400-650/` |
+| levir-50-100 | LEVIR | [50, 100] | `cd_head/levir-50-100/` |
+| levir-50-100-400 | LEVIR | [50, 100, 400] | `cd_head/levir-50-100-400/` |
+| levir-50-100-400-650 | LEVIR | [50, 100, 400, 650] | `cd_head/levir-50-100-400-650/` |
+| whu-50-100 | WHU | [50, 100] | `cd_head/whu-50-100/` |
+| whu-50-100-400 | WHU | [50, 100, 400] | `cd_head/whu-50-100-400/` |
+| whu-50-100-400-650 | WHU | [50, 100, 400, 650] | `cd_head/whu-50-100-400-650/` |
+## Usage
+Load with explicit `custom_pipeline` (pipeline.py is in the repo, use relative path) and `cd_head_subfolder`:
+```python
+from diffusers import DiffusionPipeline
+pipe = DiffusionPipeline.from_pretrained(
+    "BiliSakura/ddpm-cd",
+    custom_pipeline="pipeline",
+    trust_remote_code=True,
+    cd_head_subfolder="levir-50-100",
+)
+# Images in [-1, 1], shape (B, 3, H, W)
+change_map = pipe(image_A, image_B, timesteps=[50, 100])
+pred = change_map.argmax(1)  # (B, H, W), 0=no-change, 1=change
+```
+**Important**: Pass the same `timesteps` used during training for each variant (see table above).
+### Switching cd_head at Runtime
+```python
+pipe = DiffusionPipeline.from_pretrained(
+    "BiliSakura/ddpm-cd",
+    custom_pipeline="pipeline",
+    trust_remote_code=True,
+    cd_head_subfolder="levir-50-100",
+)
+# Load different cd_head
+pipe.load_cd_head(subfolder="whu-50-100-400")
+change_map = pipe(image_A, image_B, timesteps=[50, 100, 400])
+```
+## Citation
+```bibtex
+@misc{bandara2024ddpmcdv3,
+    title={DDPM-CD: Denoising Diffusion Probabilistic Models as Feature Extractors for Change Detection},
+    author={Wele Gedara Chaminda Bandara and Nithin Gopalakrishnan Nair and Vishal M. Patel},
+    year={2024},
+    eprint={2206.11892},
+    archivePrefix={arXiv},
+    primaryClass={cs.CV},
+}
+```

cd_head/cdd-50-100-400-650/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400,
+    650
+  ]
+}

cd_head/cdd-50-100-400-650/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:088be206470c3e764d1e766754319d243d7a4e1c078fa21edca0a3658de3834e
+size 195390880

cd_head/cdd-50-100-400/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400
+  ]
+}

cd_head/cdd-50-100-400/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd3786cca5e126438d6d6676504c260d3d058e27afc8d43dda0627199eb4dc9e
+size 185626008

cd_head/cdd-50-100/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100
+  ]
+}

cd_head/cdd-50-100/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf58a46ac8449df67802ec45c5346ae183766951c54328f43b1d018f794f7ed2
+size 175861136

cd_head/dsifn-50-100-400-650/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400,
+    650
+  ]
+}

cd_head/dsifn-50-100-400-650/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c35c9945b1dab746cc7e5eb1cb940dfb1829783ca4dbb033bf44bf27e7a6cc87
+size 195390880

cd_head/dsifn-50-100-400/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400
+  ]
+}

cd_head/dsifn-50-100-400/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df8e5ea2901fb902568aef6beaeff4e77516d83aef6c24c2a7e5455e393e13a7
+size 185626008

cd_head/dsifn-50-100/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100
+  ]
+}

cd_head/dsifn-50-100/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b2214ccc1aa96b62bff1e810a64d81225495dd0b6500e26c11cc41c388d43b3
+size 175861136

cd_head/levir-50-100-400-650/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400,
+    650
+  ]
+}

cd_head/levir-50-100-400-650/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7712ce94b7c16adb675590a0b0909555de650b9b7dd4481e2f25557f643594fa
+size 195390880

cd_head/levir-50-100-400/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400
+  ]
+}

cd_head/levir-50-100-400/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:395683768fe2a31d2d10b3231c118a20e4b7107247fe7808d125c29635c8a419
+size 185626008

cd_head/levir-50-100/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100
+  ]
+}

cd_head/levir-50-100/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5624b511362aab2ae735fc2e670c4e4dd3d2405e51c3d3e608d3027a4288ad85
+size 175861136

cd_head/whu-50-100-400-650/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400,
+    650
+  ]
+}

cd_head/whu-50-100-400-650/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36d6d97b2839c6cb19b66a5aa709d43bb6fd8cf508341d7f0cc4d041ac7e8d76
+size 195390880

cd_head/whu-50-100-400/config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100,
+    400
+  ]
+}

cd_head/whu-50-100-400/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29d7151abaed8eb0280fb92beb9cd5ef73329fb269f7b77de0b612dcdbc8cd21
+size 185626008

cd_head/whu-50-100/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "feat_scales": [
+    2,
+    5,
+    8,
+    11,
+    14
+  ],
+  "inner_channel": 128,
+  "channel_multiplier": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "out_channels": 2,
+  "img_size": 256,
+  "time_steps": [
+    50,
+    100
+  ]
+}

cd_head/whu-50-100/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3fe09d7c0df23f0d790e8d2d1fcbf080c8f835e044e70df7d40f38a7112891
+size 175861136

model_index.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "_class_name": ["pipeline", "DDPMCDPipeline"],
+  "_diffusers_version": "0.36.0",
+  "scheduler": [
+    "diffusers",
+    "DDPMScheduler"
+  ],
+  "unet": [
+    "unet",
+    "UNet"
+  ]
+}

pipeline.py ADDED Viewed

	@@ -0,0 +1,518 @@

+"""
+DDPMCDPipeline for change detection.
+pipeline.py is in the repo — use custom_pipeline="pipeline" (relative path).
+Usage::
+    from diffusers import DiffusionPipeline
+    pipe = DiffusionPipeline.from_pretrained(
+        "BiliSakura/ddpm-cd",
+        custom_pipeline="pipeline",
+        trust_remote_code=True,
+        cd_head_subfolder="levir-50-100",
+    )
+    change_map = pipe(image_A, image_B, timesteps=[50, 100])
+"""
+import json
+import math
+import os
+from inspect import isfunction
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusers import DDPMScheduler
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_utils import ModelMixin  # ModelMixin subclasses nn.Module
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from tqdm.auto import tqdm
+# ===========================================================================
+# UNet (SR3-style) - all components inlined
+# ===========================================================================
+def _exists(x):
+    return x is not None
+def _default(val, d):
+    if _exists(val):
+        return val
+    return d() if isfunction(d) else d
+class PositionalEncoding(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, noise_level):
+        count = self.dim // 2
+        step = torch.arange(count, dtype=noise_level.dtype, device=noise_level.device) / count
+        encoding = noise_level.unsqueeze(1) * torch.exp(-math.log(1e4) * step.unsqueeze(0))
+        return torch.cat([torch.sin(encoding), torch.cos(encoding)], dim=-1)
+class FeatureWiseAffine(nn.Module):
+    def __init__(self, in_channels, out_channels, use_affine_level=False):
+        super().__init__()
+        self.use_affine_level = use_affine_level
+        self.noise_func = nn.Sequential(nn.Linear(in_channels, out_channels * (1 + self.use_affine_level)))
+    def forward(self, x, noise_embed):
+        batch = x.shape[0]
+        if self.use_affine_level:
+            gamma, beta = self.noise_func(noise_embed).view(batch, -1, 1, 1).chunk(2, dim=1)
+            x = (1 + gamma) * x + beta
+        else:
+            x = x + self.noise_func(noise_embed).view(batch, -1, 1, 1)
+        return x
+class Swish(nn.Module):
+    def forward(self, x):
+        return x * torch.sigmoid(x)
+class Upsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode="nearest")
+        self.conv = nn.Conv2d(dim, dim, 3, padding=1)
+    def forward(self, x):
+        return self.conv(self.up(x))
+class Downsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.Conv2d(dim, dim, 3, 2, 1)
+    def forward(self, x):
+        return self.conv(x)
+class Block(nn.Module):
+    def __init__(self, dim, dim_out, groups=32, dropout=0):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.GroupNorm(groups, dim),
+            Swish(),
+            nn.Dropout(dropout) if dropout != 0 else nn.Identity(),
+            nn.Conv2d(dim, dim_out, 3, padding=1),
+        )
+    def forward(self, x):
+        return self.block(x)
+class ResnetBlock(nn.Module):
+    def __init__(self, dim, dim_out, noise_level_emb_dim=None, dropout=0, use_affine_level=False, norm_groups=32):
+        super().__init__()
+        self.noise_func = FeatureWiseAffine(noise_level_emb_dim, dim_out, use_affine_level)
+        self.block1 = Block(dim, dim_out, groups=norm_groups)
+        self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout)
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+    def forward(self, x, time_emb):
+        h = self.block1(x)
+        h = self.noise_func(h, time_emb)
+        h = self.block2(h)
+        return h + self.res_conv(x)
+class SelfAttention(nn.Module):
+    def __init__(self, in_channel, n_head=1, norm_groups=32):
+        super().__init__()
+        self.n_head = n_head
+        self.norm = nn.GroupNorm(norm_groups, in_channel)
+        self.qkv = nn.Conv2d(in_channel, in_channel * 3, 1, bias=False)
+        self.out = nn.Conv2d(in_channel, in_channel, 1)
+    def forward(self, input):
+        batch, channel, height, width = input.shape
+        n_head, head_dim = self.n_head, channel // self.n_head
+        norm = self.norm(input)
+        qkv = self.qkv(norm).view(batch, n_head, head_dim * 3, height, width)
+        query, key, value = qkv.chunk(3, dim=2)
+        attn = torch.einsum("bnchw, bncyx -> bnhwyx", query, key).contiguous() / math.sqrt(channel)
+        attn = torch.softmax(attn.view(batch, n_head, height, width, -1), -1)
+        attn = attn.view(batch, n_head, height, width, height, width)
+        out = torch.einsum("bnhwyx, bncyx -> bnchw", attn, value).contiguous()
+        return self.out(out.view(batch, channel, height, width)) + input
+class ResnetBlocWithAttn(nn.Module):
+    def __init__(self, dim, dim_out, *, noise_level_emb_dim=None, norm_groups=32, dropout=0, with_attn=False):
+        super().__init__()
+        self.with_attn = with_attn
+        self.res_block = ResnetBlock(dim, dim_out, noise_level_emb_dim, norm_groups=norm_groups, dropout=dropout)
+        self.attn = SelfAttention(dim_out, norm_groups=norm_groups) if with_attn else None
+    def forward(self, x, time_emb):
+        x = self.res_block(x, time_emb)
+        if self.with_attn:
+            x = self.attn(x)
+        return x
+class UNet(ModelMixin, ConfigMixin):
+    """SR3-style UNet with noise-level conditioning. Supports feat_need=True for intermediate features."""
+    @register_to_config
+    def __init__(
+        self,
+        in_channel=6,
+        out_channel=3,
+        inner_channel=32,
+        norm_groups=32,
+        channel_mults=(1, 2, 4, 8, 8),
+        attn_res=(8,),
+        res_blocks=3,
+        dropout=0,
+        with_noise_level_emb=True,
+        image_size=128,
+    ):
+        super().__init__()
+        noise_level_channel = inner_channel if with_noise_level_emb else None
+        self.noise_level_mlp = (
+            nn.Sequential(
+                PositionalEncoding(inner_channel),
+                nn.Linear(inner_channel, inner_channel * 4),
+                Swish(),
+                nn.Linear(inner_channel * 4, inner_channel),
+            )
+            if with_noise_level_emb
+            else None
+        )
+        num_mults = len(channel_mults)
+        pre_channel, feat_channels, now_res = inner_channel, [inner_channel], image_size
+        self.init_conv = nn.Conv2d(in_channel, inner_channel, 3, padding=1)
+        downs = []
+        for ind in range(num_mults):
+            use_attn = now_res in attn_res
+            channel_mult = inner_channel * channel_mults[ind]
+            for _ in range(res_blocks):
+                downs.append(
+                    ResnetBlocWithAttn(
+                        pre_channel, channel_mult,
+                        noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups,
+                        dropout=dropout, with_attn=use_attn,
+                    )
+                )
+                feat_channels.append(channel_mult)
+                pre_channel = channel_mult
+            if ind < num_mults - 1:
+                downs.append(Downsample(pre_channel))
+                feat_channels.append(pre_channel)
+                now_res = now_res // 2
+        self.downs = nn.ModuleList(downs)
+        self.mid = nn.ModuleList([
+            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
+                              norm_groups=norm_groups, dropout=dropout, with_attn=True),
+            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
+                              norm_groups=norm_groups, dropout=dropout, with_attn=False),
+        ])
+        ups = []
+        for ind in reversed(range(num_mults)):
+            use_attn = now_res in attn_res
+            channel_mult = inner_channel * channel_mults[ind]
+            for _ in range(res_blocks + 1):
+                ups.append(
+                    ResnetBlocWithAttn(
+                        pre_channel + feat_channels.pop(), channel_mult,
+                        noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups,
+                        dropout=dropout, with_attn=use_attn,
+                    )
+                )
+                pre_channel = channel_mult
+            if ind > 0:
+                ups.append(Upsample(pre_channel))
+                now_res = now_res * 2
+        self.ups = nn.ModuleList(ups)
+        self.final_conv = Block(pre_channel, _default(out_channel, lambda: in_channel), groups=norm_groups)
+    def forward(self, x, time, feat_need=False):
+        t = self.noise_level_mlp(time) if _exists(self.noise_level_mlp) else None
+        x = self.init_conv(x)
+        feats = [x]
+        for layer in self.downs:
+            x = layer(x, t) if isinstance(layer, ResnetBlocWithAttn) else layer(x)
+            feats.append(x)
+        fe = feats.copy() if feat_need else None
+        for layer in self.mid:
+            x = layer(x, t) if isinstance(layer, ResnetBlocWithAttn) else layer(x)
+        fd = [] if feat_need else None
+        for layer in self.ups:
+            if isinstance(layer, ResnetBlocWithAttn):
+                x = layer(torch.cat((x, feats.pop()), dim=1), t)
+                if feat_need:
+                    fd.append(x)
+            else:
+                x = layer(x)
+        x = self.final_conv(x)
+        return (fe, list(reversed(fd))) if feat_need else x
+# ===========================================================================
+# Change detection head
+# ===========================================================================
+class ChannelSELayer(nn.Module):
+    def __init__(self, num_channels, reduction_ratio=2):
+        super().__init__()
+        reduced = num_channels // reduction_ratio
+        self.fc1 = nn.Linear(num_channels, reduced, bias=True)
+        self.fc2 = nn.Linear(reduced, num_channels, bias=True)
+        self.relu, self.sigmoid = nn.ReLU(), nn.Sigmoid()
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        s = x.view(b, c, -1).mean(dim=2)
+        s = self.sigmoid(self.fc2(self.relu(self.fc1(s)))).view(b, c, 1, 1)
+        return x * s
+class SpatialSELayer(nn.Module):
+    def __init__(self, num_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(num_channels, 1, 1)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x, weights=None):
+        b, c, h, w = x.size()
+        out = F.conv2d(x, weights.view(1, c, 1, 1)) if weights is not None else self.conv(x)
+        return x * self.sigmoid(out).view(b, 1, h, w)
+class ChannelSpatialSELayer(nn.Module):
+    def __init__(self, num_channels, reduction_ratio=2):
+        super().__init__()
+        self.cSE = ChannelSELayer(num_channels, reduction_ratio)
+        self.sSE = SpatialSELayer(num_channels)
+    def forward(self, x):
+        return self.cSE(x) + self.sSE(x)
+def _get_in_channels(feat_scales, inner_channel, channel_multiplier):
+    m, cm = inner_channel, channel_multiplier
+    r = 0
+    for s in feat_scales:
+        if s < 3: r += m * cm[0]
+        elif s < 6: r += m * cm[1]
+        elif s < 9: r += m * cm[2]
+        elif s < 12: r += m * cm[3]
+        elif s < 15: r += m * cm[4]
+        else: raise ValueError("feat_scales 0<=s<=14")
+    return r
+class AttentionBlock(nn.Module):
+    def __init__(self, dim, dim_out):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.Conv2d(dim, dim_out, 3, padding=1),
+            nn.ReLU(),
+            ChannelSpatialSELayer(dim_out, 2),
+        )
+    def forward(self, x):
+        return self.block(x)
+class CDBlock(nn.Module):
+    def __init__(self, dim, dim_out, time_steps):
+        super().__init__()
+        if len(time_steps) > 1:
+            self.block = nn.Sequential(
+                nn.Conv2d(dim * len(time_steps), dim, 1), nn.ReLU(),
+                nn.Conv2d(dim, dim_out, 3, padding=1), nn.ReLU(),
+            )
+        else:
+            self.block = nn.Sequential(nn.Conv2d(dim, dim_out, 3, padding=1), nn.ReLU())
+    def forward(self, x):
+        return self.block(x)
+class cd_head_v2(nn.Module):
+    """Change detection head (version 2)."""
+    def __init__(self, feat_scales, out_channels=2, inner_channel=None, channel_multiplier=None, img_size=256, time_steps=None):
+        super().__init__()
+        self.feat_scales = sorted(list(feat_scales), reverse=True)
+        self.in_channels = _get_in_channels(self.feat_scales, inner_channel, channel_multiplier)
+        self.img_size, self.time_steps = img_size, time_steps
+        self.decoder = nn.ModuleList()
+        for i in range(len(self.feat_scales)):
+            dim = _get_in_channels([self.feat_scales[i]], inner_channel, channel_multiplier)
+            self.decoder.append(CDBlock(dim, dim, time_steps))
+            if i < len(self.feat_scales) - 1:
+                dim_out = _get_in_channels([self.feat_scales[i + 1]], inner_channel, channel_multiplier)
+                self.decoder.append(AttentionBlock(dim, dim_out))
+        self.clfr_stg1 = nn.Conv2d(dim_out, 64, 3, padding=1)
+        self.clfr_stg2 = nn.Conv2d(64, out_channels, 3, padding=1)
+        self.relu = nn.ReLU()
+    def forward(self, feats_A, feats_B):
+        lvl, x = 0, None
+        for layer in self.decoder:
+            if isinstance(layer, CDBlock):
+                f_A = feats_A[0][self.feat_scales[lvl]]
+                f_B = feats_B[0][self.feat_scales[lvl]]
+                if len(self.time_steps) > 1:
+                    for i in range(1, len(self.time_steps)):
+                        f_A = torch.cat((f_A, feats_A[i][self.feat_scales[lvl]]), dim=1)
+                        f_B = torch.cat((f_B, feats_B[i][self.feat_scales[lvl]]), dim=1)
+                diff = torch.abs(layer(f_A) - layer(f_B))
+                if lvl > 0:
+                    diff = diff + x
+                lvl += 1
+            else:
+                diff = layer(diff)
+                x = F.interpolate(diff, scale_factor=2, mode="bilinear")
+        return self.clfr_stg2(self.relu(self.clfr_stg1(x)))
+# ===========================================================================
+# Diffusion utilities
+# ===========================================================================
+def _precompute_alpha_tables(scheduler):
+    ac = scheduler.alphas_cumprod.numpy()
+    return np.sqrt(np.append(1.0, ac))
+def _q_sample(x_start, continuous_sqrt_alpha_cumprod, noise=None):
+    if noise is None:
+        noise = torch.randn_like(x_start)
+    return continuous_sqrt_alpha_cumprod * x_start + (1 - continuous_sqrt_alpha_cumprod ** 2).sqrt() * noise
+@torch.no_grad()
+def _extract_features(model, x, t, sqrt_alphas):
+    b = x.shape[0]
+    lvl = torch.FloatTensor(
+        np.random.uniform(sqrt_alphas[t - 1], sqrt_alphas[t], size=b)
+    ).to(x.device).view(b, -1)
+    noise = torch.randn_like(x)
+    x_noisy = _q_sample(x, lvl.view(-1, 1, 1, 1), noise)
+    return model(x_noisy, lvl, feat_need=True)
+# ===========================================================================
+# Pipeline
+# ===========================================================================
+class DDPMCDPipeline(DiffusionPipeline):
+    """DDPM-based change detection. Load with trust_remote_code=True.
+    For consolidated ddpm-cd repo with multiple cd_head variants, pass cd_head_subfolder
+    (e.g. 'levir-50-100', 'whu-50-100-400', 'cdd-50-100', etc.) when loading."""
+    def __init__(self, unet, scheduler, cd_head=None, cd_head_subfolder=None):
+        super().__init__()
+        self.register_modules(unet=unet, scheduler=scheduler)
+        self.cd_head = cd_head
+        self._cd_head_subfolder = cd_head_subfolder
+        # Infer base path from unet config (dirname of unet subfolder = model root)
+        unet_path = getattr(getattr(unet, "config", None), "_name_or_path", None)
+        self._cd_head_base_path = os.path.dirname(unet_path) if unet_path else None
+    def _load_cd_head_if_needed(self):
+        """Lazy-load cd_head from disk when first needed (path inferred from unet)."""
+        if self.cd_head is not None:
+            return
+        base = self._cd_head_base_path
+        if base is None:
+            cfg = getattr(self.unet, "config", None)
+            base = os.path.dirname(getattr(cfg, "_name_or_path", "")) if cfg else None
+        if not base or not os.path.isdir(base):
+            return  # no cd_head (e.g. pretrained-only model)
+        subfolder = self._cd_head_subfolder
+        if subfolder:
+            cd_dir = os.path.join(base, "cd_head", subfolder)
+        else:
+            cd_dir = os.path.join(base, "cd_head")
+            if not os.path.isfile(os.path.join(cd_dir, "config.json")):
+                # Consolidated repo: cd_head_subfolder is required
+                subdirs = sorted([d for d in os.listdir(cd_dir) if os.path.isdir(os.path.join(cd_dir, d))])
+                raise RuntimeError(
+                    "DDPMCDPipeline requires cd_head_subfolder when loading from consolidated ddpm-cd repo. "
+                    f"Available: {subdirs}. Example: from_pretrained(..., cd_head_subfolder='levir-50-100')"
+                )
+        if not os.path.isdir(cd_dir):
+            return  # no cd_head (e.g. pretrained-only model)
+        with open(os.path.join(cd_dir, "config.json")) as f:
+            cfg = json.load(f)
+        ch = cd_head_v2(**cfg)
+        for name in ("diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.bin"):
+            p = os.path.join(cd_dir, name)
+            if os.path.exists(p):
+                if p.endswith(".safetensors"):
+                    from safetensors.torch import load_file
+                    ch.load_state_dict(load_file(p, device="cpu"))
+                else:
+                    try:
+                        s = torch.load(p, map_location="cpu", weights_only=True)
+                    except TypeError:
+                        s = torch.load(p, map_location="cpu")
+                    ch.load_state_dict(s.state_dict() if hasattr(s, "state_dict") else s)
+                break
+        self.cd_head = ch
+    def load_cd_head(self, pretrained_model_name_or_path=None, subfolder=None):
+        """Manually load cd_head from the given path (or infer from unet).
+        subfolder: e.g. 'levir-50-100', 'whu-50-100-400' for consolidated ddpm-cd repo."""
+        if pretrained_model_name_or_path:
+            self._cd_head_base_path = pretrained_model_name_or_path
+        if subfolder is not None:
+            self._cd_head_subfolder = subfolder
+        self._load_cd_head_if_needed()
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        cd_head_subfolder = kwargs.pop("cd_head_subfolder", None)
+        pipe = super().from_pretrained(pretrained_model_name_or_path, **kwargs)
+        pipe._cd_head_base_path = pretrained_model_name_or_path if os.path.isdir(pretrained_model_name_or_path) else None
+        pipe._cd_head_subfolder = cd_head_subfolder
+        pipe._load_cd_head_if_needed()
+        return pipe
+    @torch.no_grad()
+    def __call__(self, image_A, image_B, timesteps=None, feat_type="dec"):
+        self._load_cd_head_if_needed()
+        if self.cd_head is None:
+            raise RuntimeError("DDPMCDPipeline requires cd_head. Could not load from disk.")
+        timesteps = timesteps or [50, 100]
+        sqrt_a = _precompute_alpha_tables(self.scheduler)
+        feats_A, feats_B = [], []
+        for t in timesteps:
+            fe_A, fd_A = _extract_features(self.unet, image_A, t, sqrt_a)
+            fe_B, fd_B = _extract_features(self.unet, image_B, t, sqrt_a)
+            feats_A.append(fd_A if feat_type == "dec" else fe_A)
+            feats_B.append(fd_B if feat_type == "dec" else fe_B)
+        return self.cd_head(feats_A, feats_B)
+    @torch.no_grad()
+    def generate(self, batch_size=1, in_channels=3, image_size=256, num_inference_steps=None, generator=None):
+        device = next(self.unet.parameters()).device
+        steps = num_inference_steps or self.scheduler.config.num_train_timesteps
+        sqrt_a = _precompute_alpha_tables(self.scheduler)
+        image = torch.randn((batch_size, in_channels, image_size, image_size), device=device, generator=generator)
+        self.scheduler.set_timesteps(steps)
+        for t in tqdm(self.scheduler.timesteps, desc="Sampling"):
+            idx = min(int(t) + 1, len(sqrt_a) - 1)
+            lvl = torch.FloatTensor([sqrt_a[idx]]).repeat(batch_size, 1).to(device)
+            noise_pred = self.unet(image, lvl)
+            image = self.scheduler.step(noise_pred, t, image).prev_sample
+        return image

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "_class_name": "DDPMScheduler",
+  "_diffusers_version": "0.36.0",
+  "beta_end": 0.01,
+  "beta_schedule": "squaredcos_cap_v2",
+  "beta_start": 1e-06,
+  "clip_sample": false,
+  "clip_sample_range": 1.0,
+  "dynamic_thresholding_ratio": 0.995,
+  "num_train_timesteps": 2000,
+  "prediction_type": "epsilon",
+  "rescale_betas_zero_snr": false,
+  "sample_max_value": 1.0,
+  "steps_offset": 0,
+  "thresholding": false,
+  "timestep_spacing": "leading",
+  "trained_betas": null,
+  "variance_type": "fixed_small"
+}

unet/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "_class_name": "UNet",
+  "_diffusers_version": "0.36.0",
+  "_name_or_path": "D:\\sakura-project\\ddpm-cd-diffusers\\models\\BiliSakura\\BiliSakura\\ddpm-cd-pretrained-256\\unet",
+  "attn_res": [
+    16
+  ],
+  "channel_mults": [
+    1,
+    2,
+    4,
+    8,
+    8
+  ],
+  "dropout": 0.2,
+  "image_size": 256,
+  "in_channel": 3,
+  "inner_channel": 128,
+  "norm_groups": 32,
+  "out_channel": 3,
+  "res_blocks": 2,
+  "with_noise_level_emb": true
+}

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92980ede4037dcfec88f4626dd0353d74fa8e303fd867c3d426a6bb5cd416649
+size 1564231460

unet/unet.py ADDED Viewed

	@@ -0,0 +1,495 @@

+"""
+Self-contained DDPMCDPipeline for change detection.
+All custom code (UNet, cd_head, diffusion utils) in one file - no external repo needed.
+Usage::
+    from diffusers import DiffusionPipeline
+    pipe = DiffusionPipeline.from_pretrained(
+        "path/to/ddpm-cd-levir-50-100",
+        trust_remote_code=True,
+    )
+    change_map = pipe(image_A, image_B, timesteps=[50, 100])
+"""
+import json
+import math
+import os
+from inspect import isfunction
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from diffusers import DDPMScheduler
+from diffusers.configuration_utils import ConfigMixin, register_to_config
+from diffusers.models.modeling_utils import ModelMixin  # ModelMixin subclasses nn.Module
+from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+from tqdm.auto import tqdm
+# ===========================================================================
+# UNet (SR3-style) - all components inlined
+# ===========================================================================
+def _exists(x):
+    return x is not None
+def _default(val, d):
+    if _exists(val):
+        return val
+    return d() if isfunction(d) else d
+class PositionalEncoding(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, noise_level):
+        count = self.dim // 2
+        step = torch.arange(count, dtype=noise_level.dtype, device=noise_level.device) / count
+        encoding = noise_level.unsqueeze(1) * torch.exp(-math.log(1e4) * step.unsqueeze(0))
+        return torch.cat([torch.sin(encoding), torch.cos(encoding)], dim=-1)
+class FeatureWiseAffine(nn.Module):
+    def __init__(self, in_channels, out_channels, use_affine_level=False):
+        super().__init__()
+        self.use_affine_level = use_affine_level
+        self.noise_func = nn.Sequential(nn.Linear(in_channels, out_channels * (1 + self.use_affine_level)))
+    def forward(self, x, noise_embed):
+        batch = x.shape[0]
+        if self.use_affine_level:
+            gamma, beta = self.noise_func(noise_embed).view(batch, -1, 1, 1).chunk(2, dim=1)
+            x = (1 + gamma) * x + beta
+        else:
+            x = x + self.noise_func(noise_embed).view(batch, -1, 1, 1)
+        return x
+class Swish(nn.Module):
+    def forward(self, x):
+        return x * torch.sigmoid(x)
+class Upsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.up = nn.Upsample(scale_factor=2, mode="nearest")
+        self.conv = nn.Conv2d(dim, dim, 3, padding=1)
+    def forward(self, x):
+        return self.conv(self.up(x))
+class Downsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.Conv2d(dim, dim, 3, 2, 1)
+    def forward(self, x):
+        return self.conv(x)
+class Block(nn.Module):
+    def __init__(self, dim, dim_out, groups=32, dropout=0):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.GroupNorm(groups, dim),
+            Swish(),
+            nn.Dropout(dropout) if dropout != 0 else nn.Identity(),
+            nn.Conv2d(dim, dim_out, 3, padding=1),
+        )
+    def forward(self, x):
+        return self.block(x)
+class ResnetBlock(nn.Module):
+    def __init__(self, dim, dim_out, noise_level_emb_dim=None, dropout=0, use_affine_level=False, norm_groups=32):
+        super().__init__()
+        self.noise_func = FeatureWiseAffine(noise_level_emb_dim, dim_out, use_affine_level)
+        self.block1 = Block(dim, dim_out, groups=norm_groups)
+        self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout)
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+    def forward(self, x, time_emb):
+        h = self.block1(x)
+        h = self.noise_func(h, time_emb)
+        h = self.block2(h)
+        return h + self.res_conv(x)
+class SelfAttention(nn.Module):
+    def __init__(self, in_channel, n_head=1, norm_groups=32):
+        super().__init__()
+        self.n_head = n_head
+        self.norm = nn.GroupNorm(norm_groups, in_channel)
+        self.qkv = nn.Conv2d(in_channel, in_channel * 3, 1, bias=False)
+        self.out = nn.Conv2d(in_channel, in_channel, 1)
+    def forward(self, input):
+        batch, channel, height, width = input.shape
+        n_head, head_dim = self.n_head, channel // self.n_head
+        norm = self.norm(input)
+        qkv = self.qkv(norm).view(batch, n_head, head_dim * 3, height, width)
+        query, key, value = qkv.chunk(3, dim=2)
+        attn = torch.einsum("bnchw, bncyx -> bnhwyx", query, key).contiguous() / math.sqrt(channel)
+        attn = torch.softmax(attn.view(batch, n_head, height, width, -1), -1)
+        attn = attn.view(batch, n_head, height, width, height, width)
+        out = torch.einsum("bnhwyx, bncyx -> bnchw", attn, value).contiguous()
+        return self.out(out.view(batch, channel, height, width)) + input
+class ResnetBlocWithAttn(nn.Module):
+    def __init__(self, dim, dim_out, *, noise_level_emb_dim=None, norm_groups=32, dropout=0, with_attn=False):
+        super().__init__()
+        self.with_attn = with_attn
+        self.res_block = ResnetBlock(dim, dim_out, noise_level_emb_dim, norm_groups=norm_groups, dropout=dropout)
+        self.attn = SelfAttention(dim_out, norm_groups=norm_groups) if with_attn else None
+    def forward(self, x, time_emb):
+        x = self.res_block(x, time_emb)
+        if self.with_attn:
+            x = self.attn(x)
+        return x
+class UNet(ModelMixin, ConfigMixin):
+    """SR3-style UNet with noise-level conditioning. Supports feat_need=True for intermediate features."""
+    @register_to_config
+    def __init__(
+        self,
+        in_channel=6,
+        out_channel=3,
+        inner_channel=32,
+        norm_groups=32,
+        channel_mults=(1, 2, 4, 8, 8),
+        attn_res=(8,),
+        res_blocks=3,
+        dropout=0,
+        with_noise_level_emb=True,
+        image_size=128,
+    ):
+        super().__init__()
+        noise_level_channel = inner_channel if with_noise_level_emb else None
+        self.noise_level_mlp = (
+            nn.Sequential(
+                PositionalEncoding(inner_channel),
+                nn.Linear(inner_channel, inner_channel * 4),
+                Swish(),
+                nn.Linear(inner_channel * 4, inner_channel),
+            )
+            if with_noise_level_emb
+            else None
+        )
+        num_mults = len(channel_mults)
+        pre_channel, feat_channels, now_res = inner_channel, [inner_channel], image_size
+        self.init_conv = nn.Conv2d(in_channel, inner_channel, 3, padding=1)
+        downs = []
+        for ind in range(num_mults):
+            use_attn = now_res in attn_res
+            channel_mult = inner_channel * channel_mults[ind]
+            for _ in range(res_blocks):
+                downs.append(
+                    ResnetBlocWithAttn(
+                        pre_channel, channel_mult,
+                        noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups,
+                        dropout=dropout, with_attn=use_attn,
+                    )
+                )
+                feat_channels.append(channel_mult)
+                pre_channel = channel_mult
+            if ind < num_mults - 1:
+                downs.append(Downsample(pre_channel))
+                feat_channels.append(pre_channel)
+                now_res = now_res // 2
+        self.downs = nn.ModuleList(downs)
+        self.mid = nn.ModuleList([
+            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
+                              norm_groups=norm_groups, dropout=dropout, with_attn=True),
+            ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel,
+                              norm_groups=norm_groups, dropout=dropout, with_attn=False),
+        ])
+        ups = []
+        for ind in reversed(range(num_mults)):
+            use_attn = now_res in attn_res
+            channel_mult = inner_channel * channel_mults[ind]
+            for _ in range(res_blocks + 1):
+                ups.append(
+                    ResnetBlocWithAttn(
+                        pre_channel + feat_channels.pop(), channel_mult,
+                        noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups,
+                        dropout=dropout, with_attn=use_attn,
+                    )
+                )
+                pre_channel = channel_mult
+            if ind > 0:
+                ups.append(Upsample(pre_channel))
+                now_res = now_res * 2
+        self.ups = nn.ModuleList(ups)
+        self.final_conv = Block(pre_channel, _default(out_channel, lambda: in_channel), groups=norm_groups)
+    def forward(self, x, time, feat_need=False):
+        t = self.noise_level_mlp(time) if _exists(self.noise_level_mlp) else None
+        x = self.init_conv(x)
+        feats = [x]
+        for layer in self.downs:
+            x = layer(x, t) if isinstance(layer, ResnetBlocWithAttn) else layer(x)
+            feats.append(x)
+        fe = feats.copy() if feat_need else None
+        for layer in self.mid:
+            x = layer(x, t) if isinstance(layer, ResnetBlocWithAttn) else layer(x)
+        fd = [] if feat_need else None
+        for layer in self.ups:
+            if isinstance(layer, ResnetBlocWithAttn):
+                x = layer(torch.cat((x, feats.pop()), dim=1), t)
+                if feat_need:
+                    fd.append(x)
+            else:
+                x = layer(x)
+        x = self.final_conv(x)
+        return (fe, list(reversed(fd))) if feat_need else x
+# ===========================================================================
+# Change detection head
+# ===========================================================================
+class ChannelSELayer(nn.Module):
+    def __init__(self, num_channels, reduction_ratio=2):
+        super().__init__()
+        reduced = num_channels // reduction_ratio
+        self.fc1 = nn.Linear(num_channels, reduced, bias=True)
+        self.fc2 = nn.Linear(reduced, num_channels, bias=True)
+        self.relu, self.sigmoid = nn.ReLU(), nn.Sigmoid()
+    def forward(self, x):
+        b, c, _, _ = x.size()
+        s = x.view(b, c, -1).mean(dim=2)
+        s = self.sigmoid(self.fc2(self.relu(self.fc1(s)))).view(b, c, 1, 1)
+        return x * s
+class SpatialSELayer(nn.Module):
+    def __init__(self, num_channels):
+        super().__init__()
+        self.conv = nn.Conv2d(num_channels, 1, 1)
+        self.sigmoid = nn.Sigmoid()
+    def forward(self, x, weights=None):
+        b, c, h, w = x.size()
+        out = F.conv2d(x, weights.view(1, c, 1, 1)) if weights is not None else self.conv(x)
+        return x * self.sigmoid(out).view(b, 1, h, w)
+class ChannelSpatialSELayer(nn.Module):
+    def __init__(self, num_channels, reduction_ratio=2):
+        super().__init__()
+        self.cSE = ChannelSELayer(num_channels, reduction_ratio)
+        self.sSE = SpatialSELayer(num_channels)
+    def forward(self, x):
+        return self.cSE(x) + self.sSE(x)
+def _get_in_channels(feat_scales, inner_channel, channel_multiplier):
+    m, cm = inner_channel, channel_multiplier
+    r = 0
+    for s in feat_scales:
+        if s < 3: r += m * cm[0]
+        elif s < 6: r += m * cm[1]
+        elif s < 9: r += m * cm[2]
+        elif s < 12: r += m * cm[3]
+        elif s < 15: r += m * cm[4]
+        else: raise ValueError("feat_scales 0<=s<=14")
+    return r
+class AttentionBlock(nn.Module):
+    def __init__(self, dim, dim_out):
+        super().__init__()
+        self.block = nn.Sequential(
+            nn.Conv2d(dim, dim_out, 3, padding=1),
+            nn.ReLU(),
+            ChannelSpatialSELayer(dim_out, 2),
+        )
+    def forward(self, x):
+        return self.block(x)
+class CDBlock(nn.Module):
+    def __init__(self, dim, dim_out, time_steps):
+        super().__init__()
+        if len(time_steps) > 1:
+            self.block = nn.Sequential(
+                nn.Conv2d(dim * len(time_steps), dim, 1), nn.ReLU(),
+                nn.Conv2d(dim, dim_out, 3, padding=1), nn.ReLU(),
+            )
+        else:
+            self.block = nn.Sequential(nn.Conv2d(dim, dim_out, 3, padding=1), nn.ReLU())
+    def forward(self, x):
+        return self.block(x)
+class cd_head_v2(nn.Module):
+    """Change detection head (version 2)."""
+    def __init__(self, feat_scales, out_channels=2, inner_channel=None, channel_multiplier=None, img_size=256, time_steps=None):
+        super().__init__()
+        self.feat_scales = sorted(list(feat_scales), reverse=True)
+        self.in_channels = _get_in_channels(self.feat_scales, inner_channel, channel_multiplier)
+        self.img_size, self.time_steps = img_size, time_steps
+        self.decoder = nn.ModuleList()
+        for i in range(len(self.feat_scales)):
+            dim = _get_in_channels([self.feat_scales[i]], inner_channel, channel_multiplier)
+            self.decoder.append(CDBlock(dim, dim, time_steps))
+            if i < len(self.feat_scales) - 1:
+                dim_out = _get_in_channels([self.feat_scales[i + 1]], inner_channel, channel_multiplier)
+                self.decoder.append(AttentionBlock(dim, dim_out))
+        self.clfr_stg1 = nn.Conv2d(dim_out, 64, 3, padding=1)
+        self.clfr_stg2 = nn.Conv2d(64, out_channels, 3, padding=1)
+        self.relu = nn.ReLU()
+    def forward(self, feats_A, feats_B):
+        lvl, x = 0, None
+        for layer in self.decoder:
+            if isinstance(layer, CDBlock):
+                f_A = feats_A[0][self.feat_scales[lvl]]
+                f_B = feats_B[0][self.feat_scales[lvl]]
+                if len(self.time_steps) > 1:
+                    for i in range(1, len(self.time_steps)):
+                        f_A = torch.cat((f_A, feats_A[i][self.feat_scales[lvl]]), dim=1)
+                        f_B = torch.cat((f_B, feats_B[i][self.feat_scales[lvl]]), dim=1)
+                diff = torch.abs(layer(f_A) - layer(f_B))
+                if lvl > 0:
+                    diff = diff + x
+                lvl += 1
+            else:
+                diff = layer(diff)
+                x = F.interpolate(diff, scale_factor=2, mode="bilinear")
+        return self.clfr_stg2(self.relu(self.clfr_stg1(x)))
+# ===========================================================================
+# Diffusion utilities
+# ===========================================================================
+def _precompute_alpha_tables(scheduler):
+    ac = scheduler.alphas_cumprod.numpy()
+    return np.sqrt(np.append(1.0, ac))
+def _q_sample(x_start, continuous_sqrt_alpha_cumprod, noise=None):
+    if noise is None:
+        noise = torch.randn_like(x_start)
+    return continuous_sqrt_alpha_cumprod * x_start + (1 - continuous_sqrt_alpha_cumprod ** 2).sqrt() * noise
+@torch.no_grad()
+def _extract_features(model, x, t, sqrt_alphas):
+    b = x.shape[0]
+    lvl = torch.FloatTensor(
+        np.random.uniform(sqrt_alphas[t - 1], sqrt_alphas[t], size=b)
+    ).to(x.device).view(b, -1)
+    noise = torch.randn_like(x)
+    x_noisy = _q_sample(x, lvl.view(-1, 1, 1, 1), noise)
+    return model(x_noisy, lvl, feat_need=True)
+# ===========================================================================
+# Pipeline
+# ===========================================================================
+class DDPMCDPipeline(DiffusionPipeline):
+    """DDPM-based change detection. Load with trust_remote_code=True."""
+    def __init__(self, unet, scheduler, cd_head=None):
+        super().__init__()
+        self.register_modules(unet=unet, scheduler=scheduler)
+        self.cd_head = cd_head
+        self._cd_head_base_path = None  # set when loaded via from_pretrained
+    def _load_cd_head_if_needed(self):
+        """Lazy-load cd_head from disk when first needed (path inferred from unet)."""
+        if self.cd_head is not None:
+            return
+        base = self._cd_head_base_path
+        if base is None:
+            cfg = getattr(self.unet, "config", None)
+            base = os.path.dirname(getattr(cfg, "_name_or_path", "")) if cfg else None
+        if not base or not os.path.isdir(base):
+            raise RuntimeError("Cannot find model path to load cd_head. Use load_cd_head(path) or load from a full pipeline directory.")
+        cd_dir = os.path.join(base, "cd_head")
+        if not os.path.isdir(cd_dir):
+            raise RuntimeError(f"cd_head directory not found at {cd_dir}")
+        with open(os.path.join(cd_dir, "config.json")) as f:
+            cfg = json.load(f)
+        ch = cd_head_v2(**cfg)
+        for name in ("diffusion_pytorch_model.safetensors", "diffusion_pytorch_model.bin"):
+            p = os.path.join(cd_dir, name)
+            if os.path.exists(p):
+                if p.endswith(".safetensors"):
+                    from safetensors.torch import load_file
+                    ch.load_state_dict(load_file(p, device="cpu"))
+                else:
+                    try:
+                        s = torch.load(p, map_location="cpu", weights_only=True)
+                    except TypeError:
+                        s = torch.load(p, map_location="cpu")
+                    ch.load_state_dict(s.state_dict() if hasattr(s, "state_dict") else s)
+                break
+        self.cd_head = ch
+    def load_cd_head(self, pretrained_model_name_or_path=None):
+        """Manually load cd_head from the given path (or infer from unet)."""
+        if pretrained_model_name_or_path:
+            self._cd_head_base_path = pretrained_model_name_or_path
+        self._load_cd_head_if_needed()
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
+        pipe = super().from_pretrained(pretrained_model_name_or_path, **kwargs)
+        pipe._cd_head_base_path = pretrained_model_name_or_path if os.path.isdir(pretrained_model_name_or_path) else None
+        pipe._load_cd_head_if_needed()
+        return pipe
+    @torch.no_grad()
+    def __call__(self, image_A, image_B, timesteps=None, feat_type="dec"):
+        self._load_cd_head_if_needed()
+        if self.cd_head is None:
+            raise RuntimeError("DDPMCDPipeline requires cd_head. Could not load from disk.")
+        timesteps = timesteps or [50, 100]
+        sqrt_a = _precompute_alpha_tables(self.scheduler)
+        feats_A, feats_B = [], []
+        for t in timesteps:
+            fe_A, fd_A = _extract_features(self.unet, image_A, t, sqrt_a)
+            fe_B, fd_B = _extract_features(self.unet, image_B, t, sqrt_a)
+            feats_A.append(fd_A if feat_type == "dec" else fe_A)
+            feats_B.append(fd_B if feat_type == "dec" else fe_B)
+        return self.cd_head(feats_A, feats_B)
+    @torch.no_grad()
+    def generate(self, batch_size=1, in_channels=3, image_size=256, num_inference_steps=None, generator=None):
+        device = next(self.unet.parameters()).device
+        steps = num_inference_steps or self.scheduler.config.num_train_timesteps
+        sqrt_a = _precompute_alpha_tables(self.scheduler)
+        image = torch.randn((batch_size, in_channels, image_size, image_size), device=device, generator=generator)
+        self.scheduler.set_timesteps(steps)
+        for t in tqdm(self.scheduler.timesteps, desc="Sampling"):
+            idx = min(int(t) + 1, len(sqrt_a) - 1)
+            lvl = torch.FloatTensor([sqrt_a[idx]]).repeat(batch_size, 1).to(device)
+            noise_pred = self.unet(image, lvl)
+            image = self.scheduler.step(noise_pred, t, image).prev_sample
+        return image