Donnyll commited on Apr 5, 2025

Commit

658e26c

verified ·

1 Parent(s): bbd3c90

first commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +4 -0
.gitignore +7 -0
README.md +92 -3
diagram.png +3 -0
overwrite_attack/attack_with_stegastamp.py +165 -0
overwrite_attack/utils_img.py +176 -0
requirements.txt +119 -0
watermarker/LaWa/configs/SD14_LaWa.yaml +108 -0
watermarker/LaWa/configs/SD14_LaWa_dlwt.yaml +108 -0
watermarker/LaWa/configs/SD14_LaWa_inference.yaml +57 -0
watermarker/LaWa/configs/SD14_LaWa_inference_dlwt.yaml +58 -0
watermarker/LaWa/configs/SD14_LaWa_ldm.yaml +107 -0
watermarker/LaWa/configs/SD14_LaWa_modified.yaml +103 -0
watermarker/LaWa/dlwt.py +251 -0
watermarker/LaWa/ecc.py +281 -0
watermarker/LaWa/examples/gen_wmimgs_EW-LoRA_dlwt.ipynb +267 -0
watermarker/LaWa/examples/gen_wmimgs_EW-LoRA_fix_weights.ipynb +275 -0
watermarker/LaWa/examples/gen_wmimgs_SS_dlwt.ipynb +225 -0
watermarker/LaWa/examples/gen_wmimgs_SS_fix_weights.ipynb +236 -0
watermarker/LaWa/examples/gen_wmimgs_WMA_dlwt.ipynb +233 -0
watermarker/LaWa/examples/gen_wmimgs_WMA_fix_weights.ipynb +225 -0
watermarker/LaWa/gen_wm_imgs.py +177 -0
watermarker/LaWa/lawa_dataset/train_100k.csv +0 -0
watermarker/LaWa/lawa_dataset/train_200k.csv +0 -0
watermarker/LaWa/lawa_dataset/val_10k.csv +0 -0
watermarker/LaWa/lawa_dataset/val_1k.csv +1001 -0
watermarker/LaWa/ldm/__pycache__/util.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/data/__init__.py +0 -0
watermarker/LaWa/ldm/data/util.py +24 -0
watermarker/LaWa/ldm/models/__pycache__/autoencoder.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/autoencoder.py +492 -0
watermarker/LaWa/ldm/models/diffusion/__init__.py +0 -0
watermarker/LaWa/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/__pycache__/plms.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/__pycache__/sampling_util.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/ddim.py +339 -0
watermarker/LaWa/ldm/models/diffusion/ddpm.py +1798 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/__init__.py +1 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/dpm_solver.py +1154 -0
watermarker/LaWa/ldm/models/diffusion/dpm_solver/sampler.py +87 -0
watermarker/LaWa/ldm/models/diffusion/plms.py +244 -0
watermarker/LaWa/ldm/models/diffusion/sampling_util.py +22 -0
watermarker/LaWa/ldm/modules/__pycache__/attention.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/modules/__pycache__/ema.cpython-38.pyc +0 -0
watermarker/LaWa/ldm/modules/__pycache__/x_transformer.cpython-38.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+diagram.png filter=lfs diff=lfs merge=lfs -text
+watermarker/LaWa/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text
+watermarker/LaWa/stable-diffusion/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text
+watermarker/stable_signature/ldm/modules/image_degradation/utils/test.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+watermark_attacker
+watermarker/stable_signature/outputs
+watermarker/LaWa/outputs
+experiments
+scripts
+optimizers
+plots

README.md CHANGED Viewed

@@ -1,3 +1,92 @@
----
-license: mit
----

+# An efficient watermarking method for latent diffusion models via low-rank adaptation
+Code for our paper "An efficient watermarking method for latent diffusion models via low-rank adaptation".
+You can download the paper via: [[ArXiv]](https://arxiv.org/abs/2410.20202)
+## 😀Summary
+A lightweight parameter fine-tuning strategy with low-rank adaptation and dynamic loss weight adjustment enables efficient watermark embedding in large-scale models while minimizing impact on image quality and maintaining robustness.
+![image](diagram.png)
+## 🍉Requirement
+```shell
+pip install -r requirements.txt
+```
+## 🐬Preparation
+### Clone
+```shell
+git clone https://github.com/MrDongdongLin/EW-LoRA
+```
+### Create an anaconda environment [Optional]:
+```shell
+conda create -n ewlora python==3.8.18
+conda activate ewlora
+pip install -r requirements.txt
+```
+### Prepare the training data:
+* Download the dataset files [here](https://cocodataset.org/).
+* Extract them to the `data` folder.
+* The directory structure will be as follows:
+```shell
+coco2017
+└── train
+   ├── img1.jpg
+   ├── img2.jpg
+   └── img3.jpg
+└── test
+   ├── img4.jpg
+   ├── img5.jpg
+   └── img6.jpg
+```
+### Usage
+#### Training
+```shell
+cd ./watermarker/stable_signature
+CUDA_VISIBLE_DEVICES=0 python train_SS.py --num_keys 1 \
+--train_dir ./Datasets/coco2017/train2017 \
+--val_dir ./Datasets/coco2017/val2017 \
+--ldm_config ./watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml \
+--ldm_ckpt ../models/ldm_ckpts/sd-v1-4-full-ema.ckpt \
+--msg_decoder_path ../models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt \
+--output_dir ./watermarker/stable_signature/outputs/ \
+--task_name train_SS_fix_weights \
+--do_validation \
+--val_frep 50 \
+--batch_size 4 \
+--lambda_i 1.0 --lambda_w 0.2 \
+--steps 20000 --val_size 100 \
+--warmup_steps 20 \
+--save_img_freq 100 \
+--log_freq 1 --debug
+```
+## Citation
+If this work is helpful, please cite as:
+```latex
+@article{linEfficientWatermarkingMethod2024,
+  title     = {An Efficient Watermarking Method for Latent Diffusion Models via Low-Rank Adaptation},
+  author    = {Lin, Dongdong and Li, Yue and Tondi, Benedetta and Li, Bin and Barni, Mauro},
+  year      = {2024},
+  month     = oct,
+  number    = {arXiv:2410.20202},
+  eprint    = {2410.20202},
+}
+```

diagram.png ADDED Viewed

Git LFS Details

SHA256: 5238de8ba3840e411e5e3abf475213bbfa19d78dc8df766f2ebf6a8b8645722f
Pointer size: 131 Bytes
Size of remote file: 123 kB

overwrite_attack/attack_with_stegastamp.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import importlib.util
+import sys
+import os
+import torch
+import numpy as np
+import argparse
+from torch.utils.data import DataLoader
+from torchvision import transforms
+from PIL import Image
+from tqdm import tqdm
+import pandas as pd
+from torchvision.utils import save_image
+from accelerate.utils import set_seed
+from utils_img import normalize_vqgan, unnormalize_vqgan, psnr
+default_transform = transforms.Compose([
+    transforms.ToTensor(),
+    normalize_vqgan,
+])
+class CustomImageDataset(torch.utils.data.Dataset):
+    def __init__(self, image_dir, transform=None):
+        self.image_dir = image_dir
+        # Sort file names to ensure consistent order
+        self.image_paths = sorted([os.path.join(image_dir, fname) for fname in os.listdir(image_dir) if fname.endswith(('.png', '.jpg', '.jpeg'))])
+        self.transform = transform
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        img_path = self.image_paths[idx]
+        image = Image.open(img_path).convert('RGB')
+        if self.transform:
+            image = self.transform(image)
+        return image, 0
+def get_dataloader(data_dir, transform=default_transform, batch_size=128, shuffle=False, num_workers=4):
+    """
+    Custom dataloader that loads images from a directory without expecting class subfolders.
+    """
+    # Create custom dataset
+    dataset = CustomImageDataset(data_dir, transform=transform)
+    # Create the dataloader
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
+    return dataloader
+def get_parser():
+    parser = argparse.ArgumentParser(description='StegaStamp Attack')
+    parser.add_argument('--file_path', type=str, default='/pubdata/ldd/models/wm_encdec/stegastamp/models.py', help='Path to the stegastamp models.py file')
+    parser.add_argument('--encoder_path', type=str, default='/pubdata/ldd/models/wm_encdec/stegastamp/ckpts/stegastamp_coco_256_onefactor/stegastamp/checkpoints/stegastamp_encoder_099.pth', help='Path to the encoder weights')
+    parser.add_argument('--decoder_path', type=str, default='/pubdata/ldd/models/wm_encdec/stegastamp/ckpts/stegastamp_coco_256_onefactor/stegastamp/checkpoints/stegastamp_decoder_099.pth', help='Path to the decoder weights')
+    parser.add_argument('--data_dir', type=str, default='/pubdata/ldd/projects/sd-lora-wm/smattacks/from_222/smattacks/gen_with_prompt_lawa_test/019-exps/images', help='Path to the dataset')
+    parser.add_argument('--images_dir', type=str, default='', help='Path to save the images')
+    parser.add_argument('--batch_size', type=int, default=1, help='Batch size')
+    parser.add_argument('--seed', type=int, default=1337, help='Random seed')
+    return parser
+def main(args):
+    # 指定文件路径
+    # args.file_path = '/data4/ldd/projects/RobustWM/wm_encdec/stegastamp/models.py'
+    module_dir = os.path.dirname(args.file_path)
+    sys.path.append(module_dir)
+    # 加载模块
+    spec = importlib.util.spec_from_file_location("stagastamp_models", args.file_path)
+    stagastamp_models = importlib.util.module_from_spec(spec)
+    sys.modules["stagastamp_models"] = stagastamp_models
+    spec.loader.exec_module(stagastamp_models)
+    encoder = stagastamp_models.StegaStampEncoder(256, 3, 200, return_residual=False)
+    decoder = stagastamp_models.StegaStampDecoder(256, 3, 200)
+    # args.encoder_path = '/data4/ldd/projects/RobustWM/wm_encdec/stegastamp/ckpts/stegastamp_coco_256_onefactor/stegastamp/checkpoints/stegastamp_encoder_099.pth'
+    # args.decoder_path = '/data4/ldd/projects/RobustWM/wm_encdec/stegastamp/ckpts/stegastamp_coco_256_onefactor/stegastamp/checkpoints/stegastamp_decoder_099.pth'
+    # Load weights
+    encoder.load_state_dict(torch.load(args.encoder_path, map_location='cuda'))
+    decoder.load_state_dict(torch.load(args.decoder_path, map_location='cuda'))
+    encoder = encoder.to('cuda')
+    decoder = decoder.to('cuda')
+    # args.data_dir = '/pubdata/ldd/projects/EW-LoRA/experiments/evals'
+    # args.images_dir = '/pubdata/ldd/projects/EW-LoRA/experiments/evals'
+    # if not os.path.exists(args.images_dir):
+    #     os.makedirs(args.images_dir)
+    args.batch_size = 1
+    default_transform = transforms.Compose([
+        transforms.ToTensor(),
+        # normalize_vqgan,
+    ])
+    args.seed = 1337
+    set_seed(args.seed)
+    def generate_random_fingerprints(fingerprint_length, batch_size=4, size=(400, 400)):
+        z = torch.zeros((batch_size, fingerprint_length), dtype=torch.float).random_(0, 2)
+        return z
+    args.seed = 42
+    torch.manual_seed(args.seed)
+    fingerprints = generate_random_fingerprints(200, batch_size=1, size=(256, 3))
+    # 定义多个 checkpoint 前缀
+    ckpt_prefixes = [
+        "SS_fix_weights",
+        "SS_dlwt",
+        "WMA_fix_weights",
+        "WMA_dlwt",
+        "LaWa_fix_weights",
+        "LaWa_dlwt",
+        "EW-LoRA_fix_weights",
+        "EW-LoRA_dlwt"
+    ]
+    for ckpt_prefix in ckpt_prefixes:
+        dataloader = get_dataloader(os.path.join(args.data_dir, f'save_imgs_' + ckpt_prefix), transform=default_transform, batch_size=args.batch_size)
+        df = pd.DataFrame(columns=["iteration", "bit_acc_avg"])
+        bit_accs_avg_list = []
+        psnr_avg_list = []
+        for i, (images, _) in enumerate(tqdm(dataloader)):
+            fingerprints = fingerprints.to('cuda')
+            images = images.to('cuda')
+            fingerprinted_images = encoder(fingerprints, images)
+            decoder_output = decoder(fingerprinted_images)
+            save_image_path = os.path.join(args.data_dir, f'overwrite_stegastamp_' + ckpt_prefix)
+            if not os.path.exists(save_image_path):
+                os.makedirs(save_image_path)
+            save_image(fingerprinted_images, os.path.join(save_image_path, f'overwrite_img_w_{i:07}.png'))
+            # msg stats
+            ori_msgs = torch.sign(fingerprints) > 0
+            decoded_msgs = torch.sign(decoder_output) > 0  # b k -> b k
+            diff = (~torch.logical_xor(ori_msgs, decoded_msgs))  # b k -> b k
+            bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1]  # b k -> b
+            bit_accs_avg = torch.mean(bit_accs).item()
+            psnr_avg = psnr(fingerprinted_images, images).mean().item()
+            psnr_avg_list.append(psnr_avg)
+            bit_accs_avg_list.append(bit_accs_avg)
+            df = df._append({"iteration": i, "bit_acc_avg": bit_accs_avg, "psnr_avg": psnr_avg}, ignore_index=True)
+            df.to_csv(os.path.join(args.data_dir, f'overwrite_att_' + ckpt_prefix, "bit_acc_stegastamp.csv"), index=False)
+        overall_avg_bit_accs = sum(bit_accs_avg_list) / len(bit_accs_avg_list)
+        overall_avg_psnr = sum(psnr_avg_list) / len(psnr_avg_list)
+        print(f"Model: {ckpt_prefix}, ACC: {overall_avg_bit_accs}, PSNR: {overall_avg_psnr}")
+if __name__ == '__main__':
+    # generate parser / parse parameters
+    parser = get_parser()
+    args = parser.parse_args()
+    main(args)

overwrite_attack/utils_img.py ADDED Viewed

	@@ -0,0 +1,176 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+# pyright: reportMissingModuleSource=false
+import numpy as np
+from augly.image import functional as aug_functional
+import torch
+from torchvision import transforms
+from torchvision.transforms import functional
+from torch.autograd.variable import Variable
+import torch.nn.functional as F
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+default_transform = transforms.Compose([
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+])
+normalize_vqgan = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # Normalize (x - 0.5) / 0.5
+unnormalize_vqgan = transforms.Normalize(mean=[-1, -1, -1], std=[1/0.5, 1/0.5, 1/0.5]) # Unnormalize (x * 0.5) + 0.5
+normalize_img = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize (x - mean) / std
+unnormalize_img = transforms.Normalize(mean=[-0.485/0.229, -0.456/0.224, -0.406/0.225], std=[1/0.229, 1/0.224, 1/0.225]) # Unnormalize (x * std) + mean
+def psnr(x, y, img_space='vqgan'):
+    """
+    Return PSNR
+    Args:
+        x: Image tensor with values approx. between [-1,1]
+        y: Image tensor with values approx. between [-1,1], ex: original image
+    """
+    if img_space == 'vqgan':
+        delta = torch.clamp(unnormalize_vqgan(x), 0, 1) - torch.clamp(unnormalize_vqgan(y), 0, 1)
+    elif img_space == 'img':
+        delta = torch.clamp(unnormalize_img(x), 0, 1) - torch.clamp(unnormalize_img(y), 0, 1)
+    else:
+        delta = x - y
+    delta = 255 * delta
+    delta = delta.reshape(-1, x.shape[-3], x.shape[-2], x.shape[-1]) # BxCxHxW
+    psnr = 20*np.log10(255) - 10*torch.log10(torch.mean(delta**2, dim=(1,2,3)))  # B
+    return psnr
+def center_crop(x, scale):
+    """ Perform center crop such that the target area of the crop is at a given scale
+    Args:
+        x: PIL image
+        scale: target area scale
+    """
+    scale = np.sqrt(scale)
+    new_edges_size = [int(s*scale) for s in x.shape[-2:]][::-1]
+    return functional.center_crop(x, new_edges_size)
+def resize(x, scale):
+    """ Perform center crop such that the target area of the crop is at a given scale
+    Args:
+        x: PIL image
+        scale: target area scale
+    """
+    scale = np.sqrt(scale)
+    new_edges_size = [int(s*scale) for s in x.shape[-2:]][::-1]
+    return functional.resize(x, new_edges_size)
+def rotate(x, angle):
+    """ Rotate image by angle
+    Args:
+        x: image (PIl or tensor)
+        angle: angle in degrees
+    """
+    return functional.rotate(x, angle)
+def flip(x, direction='horizontal'):
+    """ Rotate image by angle
+    Args:
+        x: image (PIl or tensor)
+        angle: angle in degrees
+    """
+    if direction == 'horizontal':
+        return functional.hflip(x)
+    elif direction == 'vertical':
+        return functional.vflip(x)
+def adjust_brightness(x, brightness_factor):
+    """ Adjust brightness of an image
+    Args:
+        x: PIL image
+        brightness_factor: brightness factor
+    """
+    return normalize_img(functional.adjust_brightness(unnormalize_img(x), brightness_factor))
+def adjust_contrast(x, contrast_factor):
+    """ Adjust contrast of an image
+    Args:
+        x: PIL image
+        contrast_factor: contrast factor
+    """
+    return normalize_img(functional.adjust_contrast(unnormalize_img(x), contrast_factor))
+def adjust_saturation(x, saturation_factor):
+    """ Adjust saturation of an image
+    Args:
+        x: PIL image
+        saturation_factor: saturation factor
+    """
+    return normalize_img(functional.adjust_saturation(unnormalize_img(x), saturation_factor))
+def adjust_hue(x, hue_factor):
+    """ Adjust hue of an image
+    Args:
+        x: PIL image
+        hue_factor: hue factor
+    """
+    return normalize_img(functional.adjust_hue(unnormalize_img(x), hue_factor))
+def adjust_gamma(x, gamma, gain=1):
+    """ Adjust gamma of an image
+    Args:
+        x: PIL image
+        gamma: gamma factor
+        gain: gain factor
+    """
+    return normalize_img(functional.adjust_gamma(unnormalize_img(x), gamma, gain))
+def adjust_sharpness(x, sharpness_factor):
+    """ Adjust sharpness of an image
+    Args:
+        x: PIL image
+        sharpness_factor: sharpness factor
+    """
+    return normalize_img(functional.adjust_sharpness(unnormalize_img(x), sharpness_factor))
+def overlay_text(x, text='Lorem Ipsum'):
+    """ Overlay text on image
+    Args:
+        x: PIL image
+        text: text to overlay
+        font_path: path to font
+        font_size: font size
+        color: text color
+        position: text position
+    """
+    to_pil = transforms.ToPILImage()
+    to_tensor = transforms.ToTensor()
+    img_aug = torch.zeros_like(x, device=x.device)
+    for ii,img in enumerate(x):
+        pil_img = to_pil(unnormalize_img(img))
+        img_aug[ii] = to_tensor(aug_functional.overlay_text(pil_img, text=text))
+    return normalize_img(img_aug)
+def jpeg_compress(x, quality_factor):
+    """ Apply jpeg compression to image
+    Args:
+        x: PIL image
+        quality_factor: quality factor
+    """
+    to_pil = transforms.ToPILImage()
+    to_tensor = transforms.ToTensor()
+    img_aug = torch.zeros_like(x, device=x.device)
+    for ii,img in enumerate(x):
+        pil_img = to_pil(unnormalize_img(img))
+        img_aug[ii] = to_tensor(aug_functional.encoding_quality(pil_img, quality=quality_factor))
+    return normalize_img(img_aug)
+def gaussian_noise(input, stddev):
+    # noise = Variable(input.data.new(input.size()).normal_(0, stddev))
+    # output = torch.clamp(input + noise, -1, 1)
+    # output = A.GaussNoise(var_limit=stddev, p=1)
+    output = torch.clamp(unnormalize_img(input).clone() + (torch.randn(
+        [input.shape[0], input.shape[1], input.shape[2], input.shape[3]]) * (stddev**0.5)).to(input.device), -1, 1)
+    return normalize_img(output)
+def adjust_gaussian_blur(img, ks):
+    return normalize_img(functional.gaussian_blur(unnormalize_img(img), kernel_size=ks))

requirements.txt ADDED Viewed

	@@ -0,0 +1,119 @@

+absl-py==2.1.0
+accelerate==1.0.1
+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+antlr4-python3-runtime==4.9.3
+async-timeout==4.0.3
+attrs==24.2.0
+augly==1.0.0
+bm3d==4.0.1
+bm4d==4.2.3
+cachetools==5.5.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+clip==0.2.0
+cmake==3.30.2
+coloredlogs==15.0.1
+compressai==1.2.6
+contourpy==1.1.1
+cycler==0.12.1
+datasets==3.0.1
+diffusers==0.30.3
+dill==0.3.8
+einops==0.6.1
+filelock==3.15.4
+flatbuffers==24.3.25
+fonttools==4.54.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+ftfy==6.2.3
+future==1.0.0
+google-auth==2.34.0
+google-auth-oauthlib==1.0.0
+grpcio==1.65.5
+huggingface-hub==0.25.2
+humanfriendly==10.0
+idna==3.7
+imageio==2.35.1
+imhist==0.0.4
+importlib_metadata==8.4.0
+importlib_resources==6.4.5
+invisible-watermark==0.2.0
+iopath==0.1.10
+Jinja2==3.1.4
+kiwisolver==1.4.7
+kornia==0.7.3
+kornia_rs==0.1.5
+lazy_loader==0.4
+lightning-utilities==0.11.6
+lit==18.1.8
+lpips==0.1.4
+Markdown==3.7
+MarkupSafe==2.1.5
+matplotlib==3.7.5
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+networkx==3.1
+numpy==1.24.4
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+oauthlib==3.2.2
+omegaconf==2.3.0
+onnxruntime==1.19.2
+open_clip_torch==2.26.1
+opencv-python==4.8.1.78
+pandas==1.5.3
+peft==0.13.2
+pillow==10.4.0
+portalocker==2.10.1
+protobuf==5.27.3
+pyarrow==17.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pyDeprecate==0.3.1
+pyparsing==3.1.4
+python-magic==0.4.27
+pytorch-lightning==1.5.0
+pytorch-msssim==1.0.0
+pytz==2024.1
+PyWavelets==1.4.1
+PyYAML==6.0.2
+regex==2024.7.24
+requests==2.32.3
+requests-oauthlib==2.0.0
+rsa==4.9
+safetensors==0.4.5
+scikit-image==0.21.0
+scipy==1.10.1
+sympy==1.13.2
+taming-transformers-rom1504==0.0.6
+tensorboard==2.14.0
+tensorboard-data-server==0.7.2
+test_tube==0.7.5
+tifffile==2023.7.10
+timm==1.0.9
+tokenizers==0.20.1
+torch==2.0.1
+torch-fidelity==0.3.0
+torch-geometric==2.6.1
+torchmetrics==1.4.1
+torchvision==0.15.2
+tqdm==4.66.5
+transformers==4.45.2
+triton==2.0.0
+urllib3==2.2.2
+Werkzeug==3.0.4
+xxhash==3.5.0
+yarl==1.9.4
+zipp==3.20.0

watermarker/LaWa/configs/SD14_LaWa.yaml ADDED Viewed

	@@ -0,0 +1,108 @@

+model:
+  target: models.modifiedAEDecoder.LaWa
+  params:
+    scale_factor: 1.0  #0.18215
+    extraction_resize: False
+    start_attack_acc_thresh: 0.5
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 100
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: /pubdata/ldd/models/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.MessageDecoder
+      params:
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # dlwt configs:
+    apply_dlwt: False
+    psnr_threshold: 30.0
+    bitacc_target: 0.95
+    delta: 1.0
+    # loss config:  (set message_absolute_loss_weight=0 if dlwt is applied)
+    recon_type: rgb
+    recon_loss_weight: 0.1
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 2.0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['none'] #['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']
+data:
+  target: tools.dataset.DataModule
+  params:
+    batch_size: 8
+    num_workers: 8
+    use_worker_init_fn: true
+    train:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/train_100k.csv
+        resize: 256
+    validation:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/val_1k.csv
+        resize: 256
+        limit_samples: 100
+lightning:
+  callbacks:
+    image_logger:
+      target: models.logger.ImageLogger
+      params:
+        batch_frequency: 1
+        max_images: 0
+        increase_log_steps: False
+        fixed_input: True
+    progress_bar:
+      target: pytorch_lightning.callbacks.ProgressBar
+      params:
+        refresh_rate: 4
+    checkpoint:
+      target: pytorch_lightning.callbacks.ModelCheckpoint
+      params:
+        verbose: true
+        filename: '{epoch:06}-{step:09}'
+        every_n_train_steps: 5000
+  trainer:
+    benchmark: True
+    base_learning_rate: 2e-5
+    accumulate_grad_batches: 1

watermarker/LaWa/configs/SD14_LaWa_dlwt.yaml ADDED Viewed

	@@ -0,0 +1,108 @@

+model:
+  target: models.modifiedAEDecoder.LaWa
+  params:
+    scale_factor: 1.0  #0.18215
+    extraction_resize: False
+    start_attack_acc_thresh: 0.5
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 100
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: /pubdata/ldd/models/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.MessageDecoder
+      params:
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # dlwt configs:
+    apply_dlwt: True
+    psnr_threshold: 30.0
+    bitacc_target: 0.95
+    delta: 1.0
+    # loss config:  (set message_absolute_loss_weight=0 if dlwt is applied)
+    recon_type: rgb
+    recon_loss_weight: 1.0
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 0.0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['none'] #['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']
+data:
+  target: tools.dataset.DataModule
+  params:
+    batch_size: 8
+    num_workers: 8
+    use_worker_init_fn: true
+    train:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/train_100k.csv
+        resize: 256
+    validation:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/val_1k.csv
+        resize: 256
+        limit_samples: 100
+lightning:
+  callbacks:
+    image_logger:
+      target: models.logger.ImageLogger
+      params:
+        batch_frequency: 1
+        max_images: -1
+        increase_log_steps: False
+        fixed_input: True
+    progress_bar:
+      target: pytorch_lightning.callbacks.ProgressBar
+      params:
+        refresh_rate: 4
+    checkpoint:
+      target: pytorch_lightning.callbacks.ModelCheckpoint
+      params:
+        verbose: true
+        filename: '{epoch:06}-{step:09}'
+        every_n_train_steps: 5000
+  trainer:
+    benchmark: True
+    base_learning_rate: 2e-5
+    accumulate_grad_batches: 1

watermarker/LaWa/configs/SD14_LaWa_inference.yaml ADDED Viewed

	@@ -0,0 +1,57 @@

+model:
+  target: models.modifiedAEDecoder.LaWa
+  params:
+    scale_factor: 0.18215 # 1.0
+    extraction_resize: False
+    start_attack_acc_thresh: 0.75
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 200
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: weights/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.MessageDecoder
+      params:
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # loss config:
+    recon_type: rgb
+    recon_loss_weight: 0.1
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 2.0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']

watermarker/LaWa/configs/SD14_LaWa_inference_dlwt.yaml ADDED Viewed

	@@ -0,0 +1,58 @@

+model:
+  target: models.modifiedAEDecoder.LaWa
+  params:
+    scale_factor: 0.18215 # 1.0
+    extraction_resize: False
+    start_attack_acc_thresh: 0.75
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 200
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: weights/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.HiDDeNDecoder #models.messageDecoder.MessageDecoder
+      params:
+        msg_decoder_dir: /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # loss config:
+    recon_type: rgb
+    recon_loss_weight: 0.1
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 2.0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']

watermarker/LaWa/configs/SD14_LaWa_ldm.yaml ADDED Viewed

	@@ -0,0 +1,107 @@

+model:
+  target: models.LaWaAEDecoder.LaWa
+  params:
+    scale_factor: 1.0  #0.18215
+    extraction_resize: False
+    start_attack_acc_thresh: 0.5
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 100
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: /pubdata/ldd/models/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.MessageDecoder
+      params:
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # dlwt configs:
+    apply_dlwt: False
+    psnr_threshold: 30
+    bitacc_target: 0.95
+    delta: 1.0
+    # loss config:  (set message_absolute_loss_weight=0 if dlwt is applied)
+    recon_type: rgb
+    recon_loss_weight: 0.1
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 2.0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['none'] #['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']
+data:
+  target: tools.dataset.DataModule
+  params:
+    batch_size: 8
+    num_workers: 8
+    use_worker_init_fn: true
+    train:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/train_100k.csv
+        resize: 256
+    validation:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: /pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/lawa_dataset/val_1k.csv
+        resize: 256
+lightning:
+  callbacks:
+    image_logger:
+      target: models.logger.ImageLogger
+      params:
+        batch_frequency: 1
+        max_images: 0
+        increase_log_steps: False
+        fixed_input: True
+    progress_bar:
+      target: pytorch_lightning.callbacks.ProgressBar
+      params:
+        refresh_rate: 4
+    checkpoint:
+      target: pytorch_lightning.callbacks.ModelCheckpoint
+      params:
+        verbose: true
+        filename: '{epoch:06}-{step:09}'
+        every_n_train_steps: 5000
+  trainer:
+    benchmark: True
+    base_learning_rate: 2e-5
+    accumulate_grad_batches: 1

watermarker/LaWa/configs/SD14_LaWa_modified.yaml ADDED Viewed

	@@ -0,0 +1,103 @@

+model:
+  target: models.modifiedAEDecoder.LaWa
+  params:
+    scale_factor: 1.0  #0.18215
+    extraction_resize: False
+    start_attack_acc_thresh: 0.5
+    watermark_addition_weight: 0.1
+    learning_rate: 0.00008
+    epoch_num: 100
+    dis_update_freq: 0
+    noise_block_size: 8
+    first_stage_config:
+      target: stable-diffusion.ldm.models.autoencoder.AutoencoderKL
+      params:
+        ckpt_path: weights/first_stage_models/first_stage_KL-f8.ckpt
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    decoder_config:
+      target: models.messageDecoder.HiDDeNDecoder #models.messageDecoder.MessageDecoder
+      params:
+        msg_decoder_dir: /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt
+        message_len: 48
+    discriminator_config:
+      target: models.modifiedAEDecoder.Discriminator1
+    # loss config:  (set message_absolute_loss_weight=0 if dlwt is applied)
+    recon_type: rgb
+    recon_loss_weight: 0.1
+    adversarial_loss_weight: 1.0
+    perceptual_loss_weight: 1.0
+    message_absolute_loss_weight: 0
+    noise_config:
+      target: models.transformations.TransformNet
+      params:
+        ramp: 10000
+        apply_many_crops: False
+        apply_required_attacks: True
+        required_attack_list: ['none'] #['rotation', 'resize','random_crop', 'center_crop', 'blur', 'noise','contrast','brightness', 'jpeg']
+data:
+  target: tools.dataset.DataModule
+  params:
+    batch_size: 8
+    num_workers: 8
+    use_worker_init_fn: true
+    train:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: lawa_dataset/train_100k.csv
+        resize: 256
+    validation:
+      target: tools.dataset.dataset
+      params:
+        data_dir: /pubdata/ldd/Datasets/Flicker
+        data_list: lawa_dataset/val_1k.csv
+        resize: 256
+lightning:
+  callbacks:
+    image_logger:
+      target: models.logger.ImageLogger
+      params:
+        batch_frequency: 1
+        max_images: 4
+        increase_log_steps: False
+        fixed_input: True
+    progress_bar:
+      target: pytorch_lightning.callbacks.ProgressBar
+      params:
+        refresh_rate: 4
+    checkpoint:
+      target: pytorch_lightning.callbacks.ModelCheckpoint
+      params:
+        verbose: true
+        filename: '{epoch:06}-{step:09}'
+        every_n_train_steps: 5000
+  trainer:
+    benchmark: True
+    base_learning_rate: 2e-5
+    accumulate_grad_batches: 1

watermarker/LaWa/dlwt.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import math
+def dynamic_lambda_scheduler(psnr, bitacc, lambda_i, lambda_w, psnr_threshold, bitacc_target, delta, min_increment=1e-6,
+                          acc_increment=0.05, psnr_increment=1.0, max_psnr_threshold=50, max_bitacc_target=1.0, patience=5):
+    """
+    Dynamic Loss Weight Tuning with Adaptive Strategy for BitAcc and PSNR Adjustment.
+    Args:
+        psnr (float): Current PSNR value.
+        bitacc (float): Current BitAcc value.
+        lambda_i (float or list of float): Current weight(s) for image quality loss.
+        lambda_w (float): Current weight for watermark accuracy loss.
+        psnr_threshold (float): Current PSNR threshold.
+        bitacc_target (float): Current BitAcc target.
+        delta (float): Scaling factor for weight adjustment.
+        min_increment (float): Minimum increment to use when lambda_w is zero.
+        acc_increment (float): Increment for bitacc_target when targets are met.
+        psnr_increment (float): Increment for psnr_threshold when targets are met.
+        max_psnr_threshold (float): Maximum limit for psnr_threshold.
+        max_bitacc_target (float): Maximum limit for bitacc_target.
+        patience (int): Number of iterations to adjust based on unmet target.
+    Returns:
+        tuple: Updated values of lambda_i, lambda_w, psnr_threshold, bitacc_target.
+    """
+    # Static variables to track performance history
+    if not hasattr(dynamic_lambda_scheduler, "psnr_threshold_history"):
+        dynamic_lambda_scheduler.psnr_threshold_history = psnr_threshold
+    if not hasattr(dynamic_lambda_scheduler, "bitacc_target_history"):
+        dynamic_lambda_scheduler.bitacc_target_history = bitacc_target
+    if not hasattr(dynamic_lambda_scheduler, "success_i_counter"):
+        dynamic_lambda_scheduler.success_i_counter = 0
+    if not hasattr(dynamic_lambda_scheduler, "success_w_counter"):
+        dynamic_lambda_scheduler.success_w_counter = 0
+    if not hasattr(dynamic_lambda_scheduler, "success_counter"):
+        dynamic_lambda_scheduler.success_counter = 0
+    # Define proportional growth factors
+    bitacc_diff = bitacc_target - bitacc
+    bitacc_growth_factor = min(math.log(1 + abs(bitacc_diff) / bitacc_target), 1)
+    psnr_diff = psnr_threshold - psnr
+    psnr_growth_factor = min(math.log(1 + abs(psnr_diff) / psnr_threshold), 1)
+    # bitacc_growth_factor = 0.05 * 2
+    # psnr_growth_factor = 0.05
+    # Helper function to adjust lambda values
+    def adjust_lambda(lambda_value, growth_factor):
+        if isinstance(lambda_value, list):
+            return [lv + delta * growth_factor for lv in lambda_value]
+        else:
+            return lambda_value + delta * growth_factor
+    def decrease_lambda(lambda_value, decrease_step=0.001):
+        if isinstance(lambda_value, list):
+            return [max(lv - decrease_step, 0.0) for lv in lambda_value]
+        else:
+            return max(lambda_value - decrease_step, 0.0)
+    # Adjustment strategies
+    if bitacc < bitacc_target:
+        # Increase lambda_w when bitacc is below target
+        lambda_w = max(adjust_lambda(lambda_w, bitacc_growth_factor), min_increment)
+        # dynamic_lambda_scheduler.success_i_counter += 1
+    else: # Let bitacc meet target first, then do the rest of the adjustment
+        # If bitacc meets target, reset success counter
+        dynamic_lambda_scheduler.success_i_counter += 1
+        if psnr < psnr_threshold:
+            # Increase lambda_i when psnr is below target
+            lambda_i = adjust_lambda(lambda_i, psnr_growth_factor)
+            # dynamic_lambda_scheduler.success_w_counter += 1
+        else:
+            # If psnr meets target, reset success counter
+            dynamic_lambda_scheduler.success_w_counter += 1
+            # Increment targets if both bitacc and psnr meet their thresholds consistently
+            # if bitacc >= bitacc_target and psnr >= psnr_threshold:
+            # dynamic_lambda_scheduler.success_counter += 1
+            if dynamic_lambda_scheduler.success_i_counter >= patience:
+                # Increment targets and reset success counter
+                bitacc_target = min(bitacc_target + acc_increment, max_bitacc_target)
+                lambda_i = adjust_lambda(lambda_i, psnr_growth_factor)
+                dynamic_lambda_scheduler.success_i_counter = 0
+            elif dynamic_lambda_scheduler.success_w_counter >= patience:
+                # Increment targets and reset success counter
+                psnr_threshold = min(psnr_threshold + psnr_increment, max_psnr_threshold)
+                lambda_w = adjust_lambda(lambda_w, bitacc_growth_factor)
+                dynamic_lambda_scheduler.success_w_counter = 0
+        # # Revert thresholds if `patience` limit reached without meeting targets
+        # if dynamic_lambda_scheduler.success_i_counter >= patience:
+        #     # Increase lambda_i when bitacc continuously misses target
+        #     lambda_i = adjust_lambda(lambda_i, bitacc_growth_factor)
+        #     dynamic_lambda_scheduler.success_i_counter = 0
+        # if dynamic_lambda_scheduler.success_w_counter >= patience:
+        #     # Increase lambda_w when psnr continuously misses target
+        #     lambda_w = adjust_lambda(lambda_w, psnr_growth_factor)
+        #     dynamic_lambda_scheduler.success_w_counter = 0
+    # # Apply a small, fixed decrease to lambda_i and lambda_w when both targets are met
+    # if bitacc >= bitacc_target and psnr >= psnr_threshold:
+    #     lambda_i = decrease_lambda(lambda_i)
+    #     lambda_w = decrease_lambda(lambda_w)
+    return lambda_i, lambda_w, psnr_threshold, bitacc_target
+# def adjust_multi_lambda_i(psnr, bitacc, lambda_i, lambda_w, psnr_threshold, bitacc_target, delta, min_increment=1e-6,
+#                           psnr_increment=0.5, max_psnr_threshold=50, patience=5):
+#     """
+#     Dynamic Loss Weight Tuning with Logarithmic Proportional Increase for Fast BitAcc Adjustment
+#     Adjusts the weights of two loss components based on the performance metrics (PSNR and BitACC).
+#     Initially prioritizes increasing lambda_w for fast BitAcc convergence. Once BitAcc reaches its target,
+#     adjusts lambda_i or lambda_w based on PSNR and BitAcc conditions. Also dynamically adjusts psnr_threshold.
+#     Args:
+#         psnr (float): Current PSNR value.
+#         bitacc (float): Current BitACC value.
+#         lambda_i (float or list of float): Current weight(s) for image quality loss.
+#         lambda_w (float): Current weight for watermark accuracy loss.
+#         psnr_threshold (float): Current PSNR threshold.
+#         bitacc_target (float): Target BitACC threshold.
+#         delta (float): Scaling factor for weight adjustment.
+#         min_increment (float): Minimum increment to use when lambda_w is zero.
+#         psnr_increment (float): Increment for psnr_threshold when bitacc_target is met.
+#         max_psnr_threshold (float): Maximum limit for psnr_threshold.
+#         patience (int): Number of iterations to allow bitacc below target before reverting psnr_threshold.
+#     Returns:
+#         tuple: Updated values of lambda_i, lambda_w, psnr_threshold, patience_counter.
+#     """
+#     # Static variables to hold dynamic adjustment states
+#     if not hasattr(adjust_multi_lambda_i, "psnr_threshold_history"):
+#         adjust_multi_lambda_i.psnr_threshold_history = psnr_threshold
+#     if not hasattr(adjust_multi_lambda_i, "patience_counter"):
+#         adjust_multi_lambda_i.patience_counter = 0
+#     # Define logarithmic growth factors based on differences
+#     bitacc_diff = bitacc_target - bitacc
+#     bitacc_growth_factor = math.log(1 + abs(bitacc_diff) / bitacc_target)
+#     psnr_diff = psnr_threshold - psnr
+#     psnr_growth_factor = math.log(1 + abs(psnr_diff) / psnr_threshold)
+#     # Helper function to handle single float or list for lambda_i
+#     def adjust_lambda(lambda_value, growth_factor):
+#         if isinstance(lambda_value, list):
+#             return [lv + delta * growth_factor for lv in lambda_value]
+#         else:
+#             return lambda_value + delta * growth_factor
+#     def decrease_lambda(lambda_value, decrease_step=0.001):
+#         if isinstance(lambda_value, list):
+#             return [max(lv - decrease_step, 0.0) for lv in lambda_value]
+#         else:
+#             return max(lambda_value - decrease_step, 0.0)
+#     # Adjusting strategy
+#     if bitacc < bitacc_target:
+#         # Stage 1: Prioritize increasing lambda_w to quickly improve bitacc
+#         lambda_w = max(adjust_lambda(lambda_w, bitacc_growth_factor), min_increment)
+#         adjust_multi_lambda_i.patience_counter += 1
+#     else:
+#         # Reset patience counter when bitacc meets the target
+#         adjust_multi_lambda_i.patience_counter = 0
+#         # Stage 2: After bitacc reaches target, increase psnr_threshold and adjust weights
+#         if psnr < psnr_threshold:
+#             lambda_i = adjust_lambda(lambda_i, psnr_growth_factor)
+#         else:
+#             lambda_w = adjust_lambda(lambda_w, bitacc_growth_factor)
+#         # Attempt to increase psnr_threshold, but respect max threshold
+#         if psnr_threshold < max_psnr_threshold:
+#             psnr_threshold += psnr_increment
+#             adjust_multi_lambda_i.psnr_threshold_history = psnr_threshold
+#     # Stage 3: When bitacc and psnr meet targets, apply a small, fixed decrease
+#     if bitacc >= bitacc_target and psnr >= psnr_threshold:
+#         lambda_i = decrease_lambda(lambda_i)
+#         lambda_w = decrease_lambda(lambda_w)
+#     # Revert psnr_threshold if patience is exceeded, 连续patience次bitacc未达标，降低psnr_threshold
+#     if adjust_multi_lambda_i.patience_counter >= patience:
+#         psnr_threshold = adjust_multi_lambda_i.psnr_threshold_history - psnr_increment
+#         adjust_multi_lambda_i.patience_counter = 0  # Reset patience
+#     return lambda_i, lambda_w, psnr_threshold
+# def adjust_multi_lambda_i(psnr, bitacc, lambda_i, lambda_w, psnr_threshold, bitacc_target, delta, min_increment=1e-6):
+#     """
+#     Dynamic Loss Weight Tuning with Logarithmic Proportional Increase for Fast BitAcc Adjustment
+#     Adjusts the weights of two loss components based on the performance metrics (PSNR and BitACC).
+#     Initially prioritizes increasing lambda_w for fast BitAcc convergence. Once BitAcc reaches its target,
+#     adjusts lambda_i or lambda_w based on PSNR and BitAcc conditions.
+#     Args:
+#         psnr (float): Current PSNR value.
+#         bitacc (float): Current BitACC value.
+#         lambda_i (float or list of float): Current weight(s) for image quality loss.
+#         lambda_w (float): Current weight for watermark accuracy loss.
+#         psnr_threshold (float): Target PSNR threshold.
+#         bitacc_target (float): Target BitACC threshold.
+#         delta (float): Scaling factor for weight adjustment.
+#         min_increment (float): Minimum increment to use when lambda_w is zero.
+#     Returns:
+#         tuple: Updated values of lambda_i (float or list) and lambda_w.
+#     """
+#     # Define logarithmic growth factors based on differences
+#     bitacc_diff = bitacc_target - bitacc
+#     bitacc_growth_factor = math.log(1 + abs(bitacc_diff) / bitacc_target)
+#     psnr_diff = psnr_threshold - psnr
+#     psnr_growth_factor = math.log(1 + min(abs(psnr_diff), 100) / psnr_threshold)
+#     # Helper function to handle single float or list for lambda_i
+#     def adjust_lambda(lambda_value, growth_factor):
+#         if isinstance(lambda_value, list):
+#             return [lv + delta * growth_factor for lv in lambda_value]
+#         else:
+#             return lambda_value + delta * growth_factor
+#     def decrease_lambda(lambda_value, decrease_step=0.001):
+#         if isinstance(lambda_value, list):
+#             return [max(lv - decrease_step, 0.0) for lv in lambda_value]
+#         else:
+#             return max(lambda_value - decrease_step, 0.0)
+#     # Adjusting strategy
+#     if bitacc < bitacc_target:
+#         # Stage 1: Prioritize increasing lambda_w to quickly improve bitacc
+#         lambda_w = max(adjust_lambda(lambda_w, bitacc_growth_factor), min_increment)
+#     else:
+#         # Stage 2: After bitacc reaches target, adjust based on psnr
+#         if psnr < psnr_threshold:
+#             # If psnr is below threshold, increase lambda_i to improve image quality
+#             lambda_i = adjust_lambda(lambda_i, psnr_growth_factor)
+#         else:
+#             # If psnr is above threshold, continue increasing lambda_w for better embedding
+#             lambda_w = adjust_lambda(lambda_w, bitacc_growth_factor)
+#     # Stage 3: When both bitacc and psnr meet targets, apply a small, fixed decrease
+#     if bitacc >= bitacc_target and psnr >= psnr_threshold:
+#         lambda_i = decrease_lambda(lambda_i)
+#         lambda_w = decrease_lambda(lambda_w)
+#     return lambda_i, lambda_w

watermarker/LaWa/ecc.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import bchlib
+import numpy as np
+from typing import List, Tuple
+import random
+from copy import deepcopy
+class RSC(object):
+    def __init__(self, data_bytes=16, ecc_bytes=4, verbose=False, **kwargs):
+        from reedsolo import RSCodec
+        self.rs = RSCodec(ecc_bytes)
+        if verbose:
+            print(f'Reed-Solomon ECC len: {ecc_bytes*8} bits')
+        self.data_len = data_bytes
+        self.dlen = data_bytes * 8  # data length in bits
+        self.ecc_len = ecc_bytes * 8  # ecc length in bits
+    def get_total_len(self):
+        return self.dlen + self.ecc_len
+    def encode_text(self, text: List[str]):
+        return np.array([self._encode_text(t) for t in text])
+    def _encode_text(self, text: str):
+        text = text + ' ' * (self.dlen // 8 - len(text))
+        out = self.rs.encode(text.encode('utf-8'))  # bytearray
+        out = ''.join(format(x, '08b') for x in out)  # bit string
+        out = np.array([int(x) for x in out], dtype=np.float32)
+        return out
+    def decode_text(self, data: np.array):
+        assert len(data.shape)==2
+        return [self._decode_text(d) for d in data]
+    def _decode_text(self, data: np.array):
+        assert len(data.shape)==1
+        data = ''.join([str(int(bit)) for bit in data])
+        data = bytes(int(data[i: i + 8], 2) for i in range(0, len(data), 8))
+        data = bytearray(data)
+        try:
+            data = self.rs.decode(data)[0]
+            data = data.decode('utf-8').strip()
+        except:
+            print('Error: Decode failed')
+            data = get_random_unicode(self.get_total_len()//8)
+        return data
+def get_random_unicode(length):
+    # Update this to include code point ranges to be sampled
+    include_ranges = [
+        ( 0x0021, 0x0021 ),
+        ( 0x0023, 0x0026 ),
+        ( 0x0028, 0x007E ),
+        ( 0x00A1, 0x00AC ),
+        ( 0x00AE, 0x00FF ),
+        ( 0x0100, 0x017F ),
+        ( 0x0180, 0x024F ),
+        ( 0x2C60, 0x2C7F ),
+        ( 0x16A0, 0x16F0 ),
+        ( 0x0370, 0x0377 ),
+        ( 0x037A, 0x037E ),
+        ( 0x0384, 0x038A ),
+        ( 0x038C, 0x038C ),
+    ]
+    alphabet = [
+        chr(code_point) for current_range in include_ranges
+            for code_point in range(current_range[0], current_range[1] + 1)
+    ]
+    return ''.join(random.choice(alphabet) for i in range(length))
+class BCH(object):
+    def __init__(self, BCH_POLYNOMIAL = 137, BCH_BITS = 5, payload_len=100, verbose=True,**kwargs):
+        self.bch = bchlib.BCH(BCH_POLYNOMIAL, BCH_BITS)
+        self.payload_len = payload_len  # in bits
+        self.data_len = (self.payload_len - self.bch.ecc_bytes*8)//7  # in ascii characters
+        assert self.data_len*7+self.bch.ecc_bytes*8 <= self.bch.n, f'Error! BCH with poly {BCH_POLYNOMIAL} and bits {BCH_BITS} can only encode max {self.bch.n//8} bytes of total payload'
+        if verbose:
+            print(f'BCH: POLYNOMIAL={BCH_POLYNOMIAL}, protected bits={BCH_BITS}, payload_len={payload_len} bits, data_len={self.data_len*7} bits ({self.data_len} ascii chars), ecc len={self.bch.ecc_bytes*8} bits')
+    def get_total_len(self):
+        return self.payload_len
+    def encode_text(self, text: List[str]):
+        return np.array([self._encode_text(t) for t in text])
+    def _encode_text(self, text: str):
+        text = text + ' ' * (self.data_len - len(text))
+        # data = text.encode('utf-8')  # bytearray
+        data = encode_text_ascii(text)  # bytearray
+        ecc = self.bch.encode(data)  # bytearray
+        packet = data + ecc  # payload in bytearray
+        packet = ''.join(format(x, '08b') for x in packet)
+        packet = [int(x) for x in packet]
+        packet.extend([0]*(self.payload_len - len(packet)))
+        packet = np.array(packet, dtype=np.float32)
+        return packet
+    def decode_text(self, data: np.array):
+        assert len(data.shape)==2
+        return [self._decode_text(d) for d in data]
+    def _decode_text(self, packet: np.array):
+        assert len(packet.shape)==1
+        packet = ''.join([str(int(bit)) for bit in packet])  # bit string
+        packet = packet[:(len(packet)//8*8)]  # trim to multiple of 8 bits
+        packet = bytes(int(packet[i: i + 8], 2) for i in range(0, len(packet), 8))
+        packet = bytearray(packet)
+        # assert len(packet) == self.data_len + self.bch.ecc_bytes
+        data, ecc = packet[:-self.bch.ecc_bytes], packet[-self.bch.ecc_bytes:]
+        data0 = decode_text_ascii(deepcopy(data)).strip()
+        bitflips = self.bch.decode_inplace(data, ecc)
+        if bitflips == -1:  # error, return random text
+            data = data0
+        else:
+            # data = data.decode('utf-8').strip()
+            data = decode_text_ascii(data).strip()
+        return data
+def encode_text_ascii(text: str):
+    # encode text to 7-bit ascii
+    # input: text, str
+    # output: encoded text, bytearray
+    text_int7 = [ord(t) & 127 for t in text]
+    text_bitstr = ''.join(format(t,'07b') for t in text_int7)
+    if len(text_bitstr) % 8 != 0:
+        text_bitstr =  '0'*(8-len(text_bitstr)%8) + text_bitstr  # pad to multiple of 8
+    text_int8 = [int(text_bitstr[i:i+8], 2) for i in range(0, len(text_bitstr), 8)]
+    return bytearray(text_int8)
+def decode_text_ascii(text: bytearray):
+    # decode text from 7-bit ascii
+    # input: text, bytearray
+    # output: decoded text, str
+    text_bitstr = ''.join(format(t,'08b') for t in text)  # bit string
+    pad = len(text_bitstr) % 7
+    if pad != 0:  # has padding, remove
+        text_bitstr = text_bitstr[pad:]
+    text_int7 = [int(text_bitstr[i:i+7], 2) for i in range(0, len(text_bitstr), 7)]
+    text_bytes = bytes(text_int7)
+    return text_bytes.decode('utf-8')
+class ECC(object):
+    def __init__(self, BCH_POLYNOMIAL = 137, BCH_BITS = 5, **kwargs):
+        self.bch = bchlib.BCH(BCH_POLYNOMIAL, BCH_BITS)
+    def get_total_len(self):
+        return 100
+    def _encode(self, x):
+        # x: 56 bits, {0, 1}, np.array
+        # return: 100 bits, {0, 1}, np.array
+        dlen = len(x)
+        data_str = ''.join(str(x) for x in x.astype(int))
+        packet = bytes(int(data_str[i: i + 8], 2) for i in range(0, dlen, 8))
+        packet = bytearray(packet)
+        ecc = self.bch.encode(packet)
+        packet = packet + ecc  # 96 bits
+        packet = ''.join(format(x, '08b') for x in packet)
+        packet = [int(x) for x in packet]
+        packet.extend([0, 0, 0, 0])
+        packet = np.array(packet, dtype=np.float32)  # 100
+        return packet
+    def _decode(self, x):
+        # x: 100 bits, {0, 1}, np.array
+        # return: 56 bits, {0, 1}, np.array
+        packet_binary = "".join([str(int(bit)) for bit in x])
+        packet = bytes(int(packet_binary[i: i + 8], 2) for i in range(0, len(packet_binary), 8))
+        packet = bytearray(packet)
+        data, ecc = packet[:-self.bch.ecc_bytes], packet[-self.bch.ecc_bytes:]
+        bitflips = self.bch.decode_inplace(data, ecc)
+        if bitflips == -1:  # error, return random data
+            data = np.random.binomial(1, .5, 56)
+        else:
+            data = ''.join(format(x, '08b') for x in data)
+            data = np.array([int(x) for x in data], dtype=np.float32)
+        return data  # 56 bits
+    def _generate(self):
+        dlen = 56
+        data= np.random.binomial(1, .5, dlen)
+        packet = self._encode(data)
+        return packet, data
+    def generate(self, nsamples=1):
+        # generate random 56 bit secret
+        data = [self._generate() for _ in range(nsamples)]
+        data = (np.array([d[0] for d in data]), np.array([d[1] for d in data]))
+        return data  # data with ecc, data org
+    def _to_text(self, data):
+        # data:  {0, 1}, np.array
+        # return: str
+        data = ''.join([str(int(bit)) for bit in data])
+        all_bytes = [ data[i: i+8] for i in range(0, len(data), 8) ]
+        text = ''.join([chr(int(byte, 2)) for byte in all_bytes])
+        return text.strip()
+    def _to_binary(self, s):
+        if isinstance(s, str):
+            out = ''.join([ format(ord(i), "08b") for i in s ])
+        elif isinstance(s, bytes):
+            out = ''.join([ format(i, "08b") for i in s ])
+        elif isinstance(s, np.ndarray) and s.dtype is np.dtype(bool):
+            out = ''.join([chr(int(i)) for i in s])
+        elif isinstance(s, int) or isinstance(s, np.uint8):
+            out = format(s, "08b")
+        elif isinstance(s, np.ndarray):
+            out = [ format(i, "08b") for i in s ]
+        else:
+            raise TypeError("Type not supported.")
+        return np.array([float(i) for i in out], dtype=np.float32)
+    def _encode_text(self, s):
+        s = s + ' '*(7-len(s))  # 7 chars
+        s = self._to_binary(s)  # 56 bits
+        packet = self._encode(s)  # 100 bits
+        return packet, s
+    def encode_text(self, secret_list, return_pre_ecc=False):
+        """encode secret with BCH ECC.
+        Input: secret (list of strings)
+        Output: secret (np array) with shape (B, 100) type float23, val {0,1}"""
+        assert np.all(np.array([len(s) for s in secret_list]) <= 7), 'Error! all strings must be less than 7 characters'
+        secret_list = [self._encode_text(s) for s in secret_list]
+        ecc = np.array([s[0] for s in secret_list], dtype=np.float32)
+        if return_pre_ecc:
+            return ecc, np.array([s[1] for s in secret_list], dtype=np.float32)
+        return ecc
+    def decode_text(self, data):
+        """Decode secret with BCH ECC and convert to string.
+        Input: secret (torch.tensor) with shape (B, 100) type bool
+        Output: secret (B, 56)"""
+        data = self.decode(data)
+        data = [self._to_text(d) for d in data]
+        return data
+    def decode(self, data):
+        """Decode secret with BCH ECC and convert to string.
+        Input: secret (torch.tensor) with shape (B, 100) type bool
+        Output: secret (B, 56)"""
+        data = data[:, :96]
+        data = [self._decode(d) for d in data]
+        return np.array(data)
+def test_ecc():
+    ecc = ECC()
+    batch_size = 10
+    secret_ecc, secret_org = ecc.generate(batch_size)  # 10x100 ecc secret, 10x56 org secret
+    # modify secret_ecc
+    secret_pred = secret_ecc.copy()
+    secret_pred[:,3:6] = 1 - secret_pred[:,3:6]
+    # pass secret_ecc to model and get predicted as secret_pred
+    secret_pred_org = ecc.decode(secret_pred)  # 10x56
+    assert np.all(secret_pred_org == secret_org)  # 10
+def test_bch():
+    # test 100 bit
+    def check(text, poly, k, l):
+        bch = BCH(poly, k, l)
+        # text = 'secrets'
+        encode = bch.encode_text([text])
+        for ind in np.random.choice(l, k):
+            encode[0, ind] = 1 - encode[0, ind]
+        text_recon = bch.decode_text(encode)[0]
+        assert text==text_recon
+    check('secrets', 137, 5, 100)
+    check('some secret', 285, 10, 160)
+if __name__ == '__main__':
+    test_ecc()
+    test_bch()

watermarker/LaWa/examples/gen_wmimgs_EW-LoRA_dlwt.ipynb ADDED Viewed

	@@ -0,0 +1,267 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n"
+     ]
+    },
+    {
+     "ename": "RuntimeError",
+     "evalue": "CUDA error: out of memory\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mRuntimeError\u001b[0m                              Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 40\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m>>> Building LDM model with config \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_config\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and weights from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_ckpt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m...\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     39\u001b[0m config \u001b[38;5;241m=\u001b[39m OmegaConf\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_config\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 40\u001b[0m ldm_ae: LatentDiffusion \u001b[38;5;241m=\u001b[39m \u001b[43mutils_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model_from_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mldm_ckpt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     41\u001b[0m ldm_ae: AutoencoderKL \u001b[38;5;241m=\u001b[39m ldm_ae\u001b[38;5;241m.\u001b[39mfirst_stage_model\n\u001b[1;32m     42\u001b[0m ldm_ae\u001b[38;5;241m.\u001b[39meval()\n",
+      "File \u001b[0;32m/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/utils_model.py:149\u001b[0m, in \u001b[0;36mload_model_from_config\u001b[0;34m(config, ckpt, verbose)\u001b[0m\n\u001b[1;32m    146\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munexpected keys:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    147\u001b[0m     \u001b[38;5;28mprint\u001b[39m(u)\n\u001b[0;32m--> 149\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    150\u001b[0m model\u001b[38;5;241m.\u001b[39meval()\n\u001b[1;32m    151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/lightning_fabric/utilities/device_dtype_mixin.py:73\u001b[0m, in \u001b[0;36m_DeviceDtypeModuleMixin.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m     71\u001b[0m     device \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39mdevice)\n\u001b[1;32m     72\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__update_properties(device\u001b[38;5;241m=\u001b[39mdevice)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:911\u001b[0m, in \u001b[0;36mModule.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m    894\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m    895\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Move all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m    896\u001b[0m \n\u001b[1;32m    897\u001b[0m \u001b[38;5;124;03m    This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    909\u001b[0m \u001b[38;5;124;03m        Module: self\u001b[39;00m\n\u001b[1;32m    910\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "    \u001b[0;31m[... skipping similar frames: Module._apply at line 802 (1 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:825\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m    822\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m    823\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m    824\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 825\u001b[0m     param_applied \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparam\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    826\u001b[0m should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m    827\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_use_set_data:\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:911\u001b[0m, in \u001b[0;36mModule.cuda.<locals>.<lambda>\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m    894\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m    895\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Move all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m    896\u001b[0m \n\u001b[1;32m    897\u001b[0m \u001b[38;5;124;03m    This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    909\u001b[0m \u001b[38;5;124;03m        Module: self\u001b[39;00m\n\u001b[1;32m    910\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply(\u001b[38;5;28;01mlambda\u001b[39;00m t: \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "\u001b[0;31mRuntimeError\u001b[0m: CUDA error: out of memory\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = True\n",
+    "ckpt_prefix = \"EW-LoRA_dlwt\" if apply_dlwt else \"EW-LoRA_fix_weights\"\n",
+    "exps_num = \"003-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "from peft import LoraConfig, get_peft_model\n",
+    "\n",
+    "wm_target = \"upsample.conv\"\n",
+    "rank = 4\n",
+    "lora_alpha = 4\n",
+    "\n",
+    "# Select the lora target model\n",
+    "def find_layers(model, wm_target=None):\n",
+    "    layers = []\n",
+    "    for name, layer in model.named_modules():\n",
+    "        if any(wm_target in name.lower() for keyword in name):\n",
+    "            layers.append(name)\n",
+    "        all_layers = [name for name, _ in model.named_modules()]\n",
+    "    return layers, all_layers\n",
+    "wm_target, _ = find_layers(ldm_decoder.decoder, wm_target)\n",
+    "\n",
+    "vae_lora_config = LoraConfig(\n",
+    "    r=rank,\n",
+    "    lora_alpha=lora_alpha,\n",
+    "    init_lora_weights=\"gaussian\",\n",
+    "    target_modules=wm_target,\n",
+    ")\n",
+    "vae_decoder_copy = get_peft_model(ldm_decoder.decoder, vae_lora_config)\n",
+    "trainable_params, all_param = vae_decoder_copy.get_nb_trainable_parameters()\n",
+    "print(f\"Parameters for PEFT watermarking: \"\n",
+    "    f\"Trainable params: {trainable_params/1e6:.5f}M || \"\n",
+    "    f\"PEFT Model size: {trainable_params*4/(1024*1024):.5f}M || \"\n",
+    "    f\"All params: {all_param/1e6:.5f}M || \"\n",
+    "    f\"Trainable%: {100 * trainable_params / all_param:.5f}\"\n",
+    ")\n",
+    "ldm_decoder.decoder = vae_decoder_copy\n",
+    "\n",
+    "saveimgs_dir = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}'\n",
+    "os.makedirs(saveimgs_dir, exist_ok=True)\n",
+    "vae_decoder_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ckpt_dir}\")\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "df_EWLoRA = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    \n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_EWLoRA = df_EWLoRA._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir, f'{ii:03}_wm_orig.png'))\n",
+    "df_EWLoRA.to_csv(os.path.join(saveimgs_dir, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/examples/gen_wmimgs_EW-LoRA_fix_weights.ipynb ADDED Viewed

	@@ -0,0 +1,275 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n"
+     ]
+    },
+    {
+     "ename": "OutOfMemoryError",
+     "evalue": "CUDA out of memory. Tried to allocate 58.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 45.44 MiB is free. Process 3764900 has 3.92 GiB memory in use. Process 3831541 has 582.00 MiB memory in use. Process 3843346 has 4.55 GiB memory in use. Process 3844737 has 4.55 GiB memory in use. Process 3849706 has 4.55 GiB memory in use. Process 3850599 has 4.55 GiB memory in use. Including non-PyTorch memory, this process has 892.00 MiB memory in use. Of the allocated memory 482.41 MiB is allocated by PyTorch, and 25.59 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 40\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m>>> Building LDM model with config \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_config\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and weights from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_ckpt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m...\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m     39\u001b[0m config \u001b[38;5;241m=\u001b[39m OmegaConf\u001b[38;5;241m.\u001b[39mload(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mldm_config\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 40\u001b[0m ldm_ae: LatentDiffusion \u001b[38;5;241m=\u001b[39m \u001b[43mutils_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model_from_config\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mldm_ckpt\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     41\u001b[0m ldm_ae: AutoencoderKL \u001b[38;5;241m=\u001b[39m ldm_ae\u001b[38;5;241m.\u001b[39mfirst_stage_model\n\u001b[1;32m     42\u001b[0m ldm_ae\u001b[38;5;241m.\u001b[39meval()\n",
+      "File \u001b[0;32m/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/utils_model.py:149\u001b[0m, in \u001b[0;36mload_model_from_config\u001b[0;34m(config, ckpt, verbose)\u001b[0m\n\u001b[1;32m    146\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munexpected keys:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    147\u001b[0m     \u001b[38;5;28mprint\u001b[39m(u)\n\u001b[0;32m--> 149\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    150\u001b[0m model\u001b[38;5;241m.\u001b[39meval()\n\u001b[1;32m    151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/lightning_fabric/utilities/device_dtype_mixin.py:73\u001b[0m, in \u001b[0;36m_DeviceDtypeModuleMixin.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m     71\u001b[0m     device \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mdevice(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda\u001b[39m\u001b[38;5;124m\"\u001b[39m, index\u001b[38;5;241m=\u001b[39mdevice)\n\u001b[1;32m     72\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__update_properties(device\u001b[38;5;241m=\u001b[39mdevice)\n\u001b[0;32m---> 73\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:911\u001b[0m, in \u001b[0;36mModule.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m    894\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m    895\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Move all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m    896\u001b[0m \n\u001b[1;32m    897\u001b[0m \u001b[38;5;124;03m    This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    909\u001b[0m \u001b[38;5;124;03m        Module: self\u001b[39;00m\n\u001b[1;32m    910\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "    \u001b[0;31m[... skipping similar frames: Module._apply at line 802 (4 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:802\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    800\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m    801\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 802\u001b[0m         \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    804\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m    805\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m    806\u001b[0m         \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m    807\u001b[0m         \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    812\u001b[0m         \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m    813\u001b[0m         \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:825\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m    821\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m    822\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m    823\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m    824\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 825\u001b[0m     param_applied \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparam\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    826\u001b[0m should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m    827\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_use_set_data:\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py:911\u001b[0m, in \u001b[0;36mModule.cuda.<locals>.<lambda>\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m    894\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m    895\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Move all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m    896\u001b[0m \n\u001b[1;32m    897\u001b[0m \u001b[38;5;124;03m    This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    909\u001b[0m \u001b[38;5;124;03m        Module: self\u001b[39;00m\n\u001b[1;32m    910\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 911\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply(\u001b[38;5;28;01mlambda\u001b[39;00m t: \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 58.00 MiB. GPU 0 has a total capacity of 23.64 GiB of which 45.44 MiB is free. Process 3764900 has 3.92 GiB memory in use. Process 3831541 has 582.00 MiB memory in use. Process 3843346 has 4.55 GiB memory in use. Process 3844737 has 4.55 GiB memory in use. Process 3849706 has 4.55 GiB memory in use. Process 3850599 has 4.55 GiB memory in use. Including non-PyTorch memory, this process has 892.00 MiB memory in use. Of the allocated memory 482.41 MiB is allocated by PyTorch, and 25.59 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = False\n",
+    "ckpt_prefix = \"EW-LoRA_dlwt\" if apply_dlwt else \"EW-LoRA_fix_weights\"\n",
+    "exps_num = \"002-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "from peft import LoraConfig, get_peft_model\n",
+    "\n",
+    "wm_target = \"upsample.conv\"\n",
+    "rank = 4\n",
+    "lora_alpha = 4\n",
+    "\n",
+    "# Select the lora target model\n",
+    "def find_layers(model, wm_target=None):\n",
+    "    layers = []\n",
+    "    for name, layer in model.named_modules():\n",
+    "        if any(wm_target in name.lower() for keyword in name):\n",
+    "            layers.append(name)\n",
+    "        all_layers = [name for name, _ in model.named_modules()]\n",
+    "    return layers, all_layers\n",
+    "wm_target, _ = find_layers(ldm_decoder.decoder, wm_target)\n",
+    "\n",
+    "vae_lora_config = LoraConfig(\n",
+    "    r=rank,\n",
+    "    lora_alpha=lora_alpha,\n",
+    "    init_lora_weights=\"gaussian\",\n",
+    "    target_modules=wm_target,\n",
+    ")\n",
+    "vae_decoder_copy = get_peft_model(ldm_decoder.decoder, vae_lora_config)\n",
+    "trainable_params, all_param = vae_decoder_copy.get_nb_trainable_parameters()\n",
+    "print(f\"Parameters for PEFT watermarking: \"\n",
+    "    f\"Trainable params: {trainable_params/1e6:.5f}M || \"\n",
+    "    f\"PEFT Model size: {trainable_params*4/(1024*1024):.5f}M || \"\n",
+    "    f\"All params: {all_param/1e6:.5f}M || \"\n",
+    "    f\"Trainable%: {100 * trainable_params / all_param:.5f}\"\n",
+    ")\n",
+    "ldm_decoder.decoder = vae_decoder_copy\n",
+    "\n",
+    "saveimgs_dir = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}'\n",
+    "os.makedirs(saveimgs_dir, exist_ok=True)\n",
+    "vae_decoder_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ckpt_dir}\")\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "df_EWLoRA = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    \n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_EWLoRA = df_EWLoRA._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir, f'{ii:03}_wm_orig.png'))\n",
+    "df_EWLoRA.to_csv(os.path.join(saveimgs_dir, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/examples/gen_wmimgs_SS_dlwt.ipynb ADDED Viewed

	@@ -0,0 +1,225 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      ">>> Building hidden decoder with weights from /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt...\n",
+      "Key: 111010110101000001010111010011010100010000100111\n",
+      "Loaded the Stable Signature checkpoint from /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_SS_dlwt/005-exps/checkpoints/checkpoint-latest\n"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = True\n",
+    "ckpt_prefix = \"SS_dlwt\" if apply_dlwt else \"SS_fix_weights\"\n",
+    "exps_num = \"005-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "saveimgs_dir_SS = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}_fix_weights'\n",
+    "os.makedirs(saveimgs_dir_SS, exist_ok=True)\n",
+    "vae_decoder_ss_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ss_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ss_ckpt_dir}\")\n",
+    "\n",
+    "df_SS = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_SS = df_SS._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir_SS, f'{ii:03}_wm_orig.png'))\n",
+    "df_SS.to_csv(os.path.join(saveimgs_dir_SS, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/examples/gen_wmimgs_SS_fix_weights.ipynb ADDED Viewed

	@@ -0,0 +1,236 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      ">>> Building hidden decoder with weights from /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt...\n",
+      "Key: 111010110101000001010111010011010100010000100111\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "Tried to find /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_SS_fix_weights/005-exps/checkpoints/checkpoint-latest but folder does not exist",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 86\u001b[0m\n\u001b[1;32m     81\u001b[0m vae_decoder_ss_ckpt_dir \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mckpt_prefix\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mexps_num\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m/checkpoints/checkpoint-latest\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m     83\u001b[0m msg_decoder, ldm_decoder, val_loader, key \u001b[38;5;241m=\u001b[39m accelerator\u001b[38;5;241m.\u001b[39mprepare(\n\u001b[1;32m     84\u001b[0m     msg_decoder, ldm_decoder, val_loader, key\n\u001b[1;32m     85\u001b[0m )\n\u001b[0;32m---> 86\u001b[0m \u001b[43maccelerator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_state\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvae_decoder_ss_ckpt_dir\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m  \u001b[38;5;66;03m# Load the LoRA watermark checkpoint\u001b[39;00m\n\u001b[1;32m     87\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLoaded the Stable Signature checkpoint from \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvae_decoder_ss_ckpt_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     89\u001b[0m df_SS \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(columns\u001b[38;5;241m=\u001b[39m[\n\u001b[1;32m     90\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124miteration\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     91\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpsnr\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     92\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbit_acc_avg\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m     93\u001b[0m ])\n",
+      "File \u001b[0;32m~/miniconda3/envs/ldm/lib/python3.8/site-packages/accelerate/accelerator.py:2851\u001b[0m, in \u001b[0;36mAccelerator.load_state\u001b[0;34m(self, input_dir, **load_model_func_kwargs)\u001b[0m\n\u001b[1;32m   2849\u001b[0m     input_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mexpanduser(input_dir)\n\u001b[1;32m   2850\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39misdir(input_dir):\n\u001b[0;32m-> 2851\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTried to find \u001b[39m\u001b[38;5;132;01m{\u001b[39;00minput_dir\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m but folder does not exist\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   2852\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproject_configuration\u001b[38;5;241m.\u001b[39mautomatic_checkpoint_naming:\n\u001b[1;32m   2853\u001b[0m     \u001b[38;5;66;03m# Pick up from automatic checkpoint naming\u001b[39;00m\n\u001b[1;32m   2854\u001b[0m     input_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mproject_dir, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcheckpoints\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mValueError\u001b[0m: Tried to find /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_SS_fix_weights/005-exps/checkpoints/checkpoint-latest but folder does not exist"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = False\n",
+    "ckpt_prefix = \"SS_dlwt\" if apply_dlwt else \"SS_fix_weights\"\n",
+    "exps_num = \"002-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "saveimgs_dir_SS = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}_fix_weights'\n",
+    "os.makedirs(saveimgs_dir_SS, exist_ok=True)\n",
+    "vae_decoder_ss_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ss_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ss_ckpt_dir}\")\n",
+    "\n",
+    "df_SS = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_SS = df_SS._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir_SS, f'{ii:03}_wm_orig.png'))\n",
+    "df_SS.to_csv(os.path.join(saveimgs_dir_SS, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/examples/gen_wmimgs_WMA_dlwt.ipynb ADDED Viewed

	@@ -0,0 +1,233 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      ">>> Building hidden decoder with weights from /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt...\n",
+      "Key: 111010110101000001010111010011010100010000100111\n",
+      "Loaded the Stable Signature checkpoint from /pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_WMA_dlwt/004-exps/checkpoints/checkpoint-latest\n"
+     ]
+    },
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mThe Kernel crashed while executing code in the the current cell or a previous cell. Please review the code in the cell(s) to identify a possible cause of the failure. Click <a href='https://aka.ms/vscodeJupyterKernelCrash'>here</a> for more info. View Jupyter <a href='command:jupyter.viewOutput'>log</a> for further details."
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = True\n",
+    "ckpt_prefix = \"WMA_dlwt\" if apply_dlwt else \"WMA_fix_weights\"\n",
+    "exps_num = \"004-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "import wmadapter.wmadapter as wmadapter\n",
+    "\n",
+    "wm_adapter = wmadapter.Fuser(img_channels_list=[4, 512, 512, 256, 512, 512], watermark_bits=key)\n",
+    "vae_with_adapter = wmadapter.VAEWithAdapter(ldm_ae.decoder, wm_adapter)\n",
+    "ldm_decoder.decoder = vae_with_adapter\n",
+    "\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "saveimgs_dir = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}'\n",
+    "os.makedirs(saveimgs_dir, exist_ok=True)\n",
+    "vae_decoder_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ckpt_dir}\")\n",
+    "\n",
+    "df_WMA = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    \n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_WMA = df_WMA._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir, f'{ii:03}_wm_orig.png'))\n",
+    "df_WMA.to_csv(os.path.join(saveimgs_dir, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/examples/gen_wmimgs_WMA_fix_weights.ipynb ADDED Viewed

	@@ -0,0 +1,225 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No module 'xformers'. Proceeding without it.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ldd/miniconda3/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/utilities/distributed.py:258: LightningDeprecationWarning: `pytorch_lightning.utilities.distributed.rank_zero_only` has been deprecated in v1.8.1 and will be removed in v2.0.0. You can import it from `pytorch_lightning.utilities` instead.\n",
+      "  rank_zero_deprecation(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      ">>> Building LDM model with config /pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml and weights from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt...\n",
+      "Loading model from /pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\n",
+      "Global Step: 470000\n",
+      "LatentDiffusion: Running in eps-prediction mode\n",
+      "DiffusionWrapper has 859.52 M params.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      "Working with z of shape (1, 4, 32, 32) = 4096 dimensions.\n",
+      "making attention of type 'vanilla' with 512 in_channels\n",
+      ">>> Building hidden decoder with weights from /pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt...\n",
+      "Key: 111010110101000001010111010011010100010000100111\n",
+      "Loaded the Stable Signature checkpoint from /pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/outputs/train_WMA_fix_weights/002-exps/checkpoints/checkpoint-latest\n"
+     ]
+    }
+   ],
+   "source": [
+    "from omegaconf import OmegaConf\n",
+    "from ldm.models.autoencoder import AutoencoderKL\n",
+    "from ldm.models.diffusion.ddpm import LatentDiffusion\n",
+    "\n",
+    "import os\n",
+    "import torch\n",
+    "import utils\n",
+    "import utils_model\n",
+    "import utils_img\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "from torchvision.utils import save_image\n",
+    "from accelerate import Accelerator\n",
+    "accelerator = Accelerator()\n",
+    "\n",
+    "\n",
+    "apply_dlwt = False\n",
+    "ckpt_prefix = \"WMA_dlwt\" if apply_dlwt else \"WMA_fix_weights\"\n",
+    "exps_num = \"002-exps\"\n",
+    "\n",
+    "img_size = 256\n",
+    "batch_size = 4\n",
+    "seed = 0\n",
+    "ldm_config = \"/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml\"\n",
+    "ldm_ckpt = \"/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt\"\n",
+    "msg_decoder_path = \"/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt\"\n",
+    "val_dir = \"/pubdata/ldd/Datasets/coco2017/val2017\"\n",
+    "\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed_all(seed)\n",
+    "np.random.seed(seed)\n",
+    "\n",
+    "# Loads LDM auto-encoder models\n",
+    "print(f'>>> Building LDM model with config {ldm_config} and weights from {ldm_ckpt}...')\n",
+    "config = OmegaConf.load(f\"{ldm_config}\")\n",
+    "ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, ldm_ckpt)\n",
+    "ldm_ae: AutoencoderKL = ldm_ae.first_stage_model\n",
+    "ldm_ae.eval()\n",
+    "ldm_ae.to(accelerator.device)\n",
+    "\n",
+    "# Loads hidden decoder\n",
+    "print(f'>>> Building hidden decoder with weights from {msg_decoder_path}...')\n",
+    "if 'torchscript' in msg_decoder_path: \n",
+    "    msg_decoder = torch.jit.load(msg_decoder_path)\n",
+    "\n",
+    "msg_decoder.eval()\n",
+    "nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]\n",
+    "\n",
+    "# Freeze LDM and hidden decoder\n",
+    "for param in [*msg_decoder.parameters(), *ldm_ae.parameters()]:\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "vqgan_transform = transforms.Compose([\n",
+    "    transforms.Resize(img_size),\n",
+    "    transforms.CenterCrop(img_size),\n",
+    "    transforms.ToTensor(),\n",
+    "    utils_img.normalize_vqgan,\n",
+    "])\n",
+    "val_loader = utils.get_dataloader(val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)\n",
+    "vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])\n",
+    "\n",
+    "key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)\n",
+    "key_str = \"\".join([ str(int(ii)) for ii in key.tolist()[0]])\n",
+    "print(f'Key: {key_str}')\n",
+    "\n",
+    "# Copy the LDM decoder and finetune the copy\n",
+    "ldm_decoder = deepcopy(ldm_ae)\n",
+    "ldm_decoder.encoder = nn.Identity()\n",
+    "ldm_decoder.quant_conv = nn.Identity()\n",
+    "# ldm_decoder.to(device)\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "import wmadapter.wmadapter as wmadapter\n",
+    "\n",
+    "wm_adapter = wmadapter.Fuser(img_channels_list=[4, 512, 512, 256, 512, 512], watermark_bits=key)\n",
+    "vae_with_adapter = wmadapter.VAEWithAdapter(ldm_ae.decoder, wm_adapter)\n",
+    "ldm_decoder.decoder = vae_with_adapter\n",
+    "\n",
+    "for param in ldm_decoder.parameters():\n",
+    "    param.requires_grad = False\n",
+    "\n",
+    "saveimgs_dir = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{ckpt_prefix}'\n",
+    "os.makedirs(saveimgs_dir, exist_ok=True)\n",
+    "vae_decoder_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/Watermarker/stable_signature/outputs/train_{ckpt_prefix}/{exps_num}/checkpoints/checkpoint-latest'\n",
+    "\n",
+    "msg_decoder, ldm_decoder, val_loader, key = accelerator.prepare(\n",
+    "    msg_decoder, ldm_decoder, val_loader, key\n",
+    ")\n",
+    "accelerator.load_state(os.path.join(vae_decoder_ckpt_dir))  # Load the LoRA watermark checkpoint\n",
+    "print(f\"Loaded the Stable Signature checkpoint from {vae_decoder_ckpt_dir}\")\n",
+    "\n",
+    "df_WMA = pd.DataFrame(columns=[\n",
+    "    \"iteration\",\n",
+    "    \"psnr\",\n",
+    "    \"bit_acc_avg\",\n",
+    "])\n",
+    "attacks = {\n",
+    "    'none': lambda x: x,\n",
+    "    'crop_01': lambda x: utils_img.center_crop(x, 0.1),\n",
+    "    'crop_05': lambda x: utils_img.center_crop(x, 0.5),\n",
+    "    'rot_25': lambda x: utils_img.rotate(x, 25),\n",
+    "    'rot_90': lambda x: utils_img.rotate(x, 90),\n",
+    "    'resize_03': lambda x: utils_img.resize(x, 0.3),\n",
+    "    'resize_07': lambda x: utils_img.resize(x, 0.7),\n",
+    "    'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),\n",
+    "    'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),\n",
+    "    'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),\n",
+    "    'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),\n",
+    "    'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),\n",
+    "    'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),\n",
+    "    'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),\n",
+    "    'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),\n",
+    "}\n",
+    "for ii, imgs in enumerate(val_loader):\n",
+    "    imgs = imgs.to(accelerator.device)\n",
+    "    keys = key.repeat(imgs.shape[0], 1)\n",
+    "\n",
+    "    imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f\n",
+    "    imgs_z = imgs_z.mode()\n",
+    "\n",
+    "    # decode latents with original and finetuned decoder\n",
+    "    imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "    imgs_w = ldm_decoder.decode(imgs_z) # b z h/f w/f -> b c h w\n",
+    "\n",
+    "    # extract watermark\n",
+    "    decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k\n",
+    "    diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "    bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "\n",
+    "    log_stats = {\n",
+    "        \"iteration\": ii,\n",
+    "        \"psnr\": utils_img.psnr(imgs_w, imgs_d0).mean().item(),\n",
+    "        \"bit_acc_avg\": torch.mean(bit_accs).item(),\n",
+    "    }\n",
+    "    \n",
+    "    for name, attack in attacks.items():\n",
+    "        imgs_aug = attack(vqgan_to_imnet(imgs_w))\n",
+    "        decoded = msg_decoder(imgs_aug) # b c h w -> b k\n",
+    "        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k\n",
+    "        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b\n",
+    "        word_accs = (bit_accs == 1) # b\n",
+    "        log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()\n",
+    "\n",
+    "    df_WMA = df_WMA._append(log_stats, ignore_index=True)\n",
+    "    save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir, f'{ii:03}_wm_orig.png'))\n",
+    "df_WMA.to_csv(os.path.join(saveimgs_dir, 'bitacc.csv'), index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ldm",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

watermarker/LaWa/gen_wm_imgs.py ADDED Viewed

	@@ -0,0 +1,177 @@

+from omegaconf import OmegaConf
+from ldm.models.autoencoder import AutoencoderKL
+from ldm.models.diffusion.ddpm import LatentDiffusion
+import utils as utils
+import utils_model as utils_model
+import utils_img as utils_img
+import os
+import torch
+import torch.nn as nn
+import numpy as np
+import argparse
+from copy import deepcopy
+from torchvision import transforms
+import os
+import pandas as pd
+from torchvision.utils import save_image
+from accelerate import Accelerator
+accelerator = Accelerator()
+from ldm.util import instantiate_from_config
+def main(args):
+    # args.apply_dlwt = True
+    # args.ckpt_prefix = "SS_dlwt" if args.apply_dlwt else "SS_fix_weights"
+    # args.exps_num = "005-exps"
+    # args.img_size = 256
+    # args.batch_size = 4
+    # args.seed = 0
+    # args.ldm_config = "/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml"
+    # args.ldm_ckpt = "/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt"
+    # args.msg_decoder_path = "/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt"
+    # args.val_dir = "/pubdata/ldd/Datasets/coco2017/val2017"
+    # Loads LDM auto-encoder models
+    print(f'>>> Building LDM model with config {args.ldm_config} and weights from {args.ldm_ckpt}...')
+    config = OmegaConf.load(f"{args.ldm_config}")
+    ldm_ae: LatentDiffusion = utils_model.load_model_from_config(config, args.ldm_ckpt)
+    ldm_ae: AutoencoderKL = ldm_ae.first_stage_model
+    ldm_ae.eval()
+    ldm_ae.to(accelerator.device)
+    saveimgs_dir = f'/pubdata/ldd/projects/EW-LoRA/experiments/evals/save_imgs_{args.ckpt_prefix}'
+    os.makedirs(saveimgs_dir, exist_ok=True)
+    vae_decoder_ckpt_dir = f'/pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/outputs/train_{args.ckpt_prefix}/checkpoints/epoch=000000-step=000024999.ckpt'
+    ### Load the pre-trained modified decoder model
+    config = OmegaConf.load(args.ldm_wm_config).model
+    message_len = config.params.decoder_config.params.message_len
+    if int(args.message_len) != message_len:
+        raise Exception(f"Provided message_len argument does not match the message length in the config file!")
+    ldm_decoder = instantiate_from_config(config)
+    # print(ldm_decoder.decoder)
+    state_dict = torch.load(vae_decoder_ckpt_dir, map_location=torch.device('cpu'))
+    if 'global_step' in state_dict:
+        print(f'Global step: {state_dict["global_step"]}, epoch: {state_dict["epoch"]}')
+    if 'state_dict' in state_dict:
+        state_dict = state_dict['state_dict']
+    misses, ignores = ldm_decoder.load_state_dict(state_dict, strict=False)
+    print(f'Missed keys: {misses}\nIgnore keys: {ignores}')
+    ldm_decoder.eval()
+    ldm_decoder.to(accelerator.device)
+    # Loads hidden decoder
+    print(f'>>> Building hidden decoder with weights from {args.msg_decoder_path}...')
+    if 'torchscript' in args.msg_decoder_path:
+        msg_decoder = torch.jit.load(args.msg_decoder_path)
+    msg_decoder.eval()
+    nbit = msg_decoder(torch.zeros(1, 3, 128, 128).to(accelerator.device)).shape[-1]
+    msg_decoder.to(accelerator.device)
+    # Freeze LDM and hidden decoder
+    for param in [*msg_decoder.parameters(), *ldm_ae.parameters(), *ldm_decoder.parameters()]:
+        param.requires_grad = False
+    vqgan_transform = transforms.Compose([
+        transforms.Resize(args.img_size),
+        transforms.CenterCrop(args.img_size),
+        transforms.ToTensor(),
+        utils_img.normalize_vqgan,
+    ])
+    val_loader = utils.get_dataloader(args.val_dir, vqgan_transform, 1, num_imgs=1000, shuffle=False, num_workers=4, collate_fn=None)
+    vqgan_to_imnet = transforms.Compose([utils_img.unnormalize_vqgan, utils_img.normalize_img])
+    torch.manual_seed(args.seed)
+    torch.cuda.manual_seed_all(args.seed)
+    np.random.seed(args.seed)
+    key = torch.randint(0, 2, (1, nbit), dtype=torch.float32, device=accelerator.device)
+    key_str = "".join([ str(int(ii)) for ii in key.tolist()[0]])
+    print(f'Key: {key_str}')
+    df_SS = pd.DataFrame(columns=[
+        "iteration",
+        "psnr",
+        "bit_acc_avg",
+    ])
+    attacks = {
+        'none': lambda x: x,
+        'crop_01': lambda x: utils_img.center_crop(x, 0.1),
+        'crop_05': lambda x: utils_img.center_crop(x, 0.5),
+        'rot_25': lambda x: utils_img.rotate(x, 25),
+        'rot_90': lambda x: utils_img.rotate(x, 90),
+        'resize_03': lambda x: utils_img.resize(x, 0.3),
+        'resize_07': lambda x: utils_img.resize(x, 0.7),
+        'brightness_1p5': lambda x: utils_img.adjust_brightness(x, 1.5),
+        'brightness_2': lambda x: utils_img.adjust_brightness(x, 2),
+        'contrast_1p5': lambda x: utils_img.adjust_contrast(x, 1.5),
+        'contrast_2': lambda x: utils_img.adjust_contrast(x, 2),
+        'sharpness_1p5': lambda x: utils_img.adjust_sharpness(x, 1.5),
+        'sharpness_2': lambda x: utils_img.adjust_sharpness(x, 2),
+        'jpeg_80': lambda x: utils_img.jpeg_compress(x, 80),
+        'jpeg_50': lambda x: utils_img.jpeg_compress(x, 50),
+    }
+    for ii, imgs in enumerate(val_loader):
+        imgs = imgs.to(accelerator.device)
+        keys = key.repeat(imgs.shape[0], 1).to(accelerator.device)
+        imgs_z = ldm_ae.encode(imgs) # b c h w -> b z h/f w/f
+        imgs_z = imgs_z.mode()
+        # decode latents with original and finetuned decoder
+        imgs_d0 = ldm_ae.decode(imgs_z) # b z h/f w/f -> b c h w
+        post_quant_noise = ldm_decoder.ae.post_quant_conv(imgs_z)
+        _, imgs_w = ldm_decoder(post_quant_noise, None, (2*keys-1)) # b z h/f w/f -> b c h w #TODO: Must do the stupid op. to get the correct message
+        # extract watermark
+        decoded = ldm_decoder.decoder(imgs_w.to("cuda"))
+        # decoded = msg_decoder(vqgan_to_imnet(imgs_w)) # b c h w -> b k
+        diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k
+        bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b
+        log_stats = {
+            "iteration": ii,
+            "psnr": utils_img.psnr(imgs_w, imgs_d0).mean().item(),
+            "bit_acc_avg": torch.mean(bit_accs).item(),
+        }
+        for name, attack in attacks.items():
+            imgs_aug = attack(imgs_w)
+            # decoded = msg_decoder(imgs_aug) # b c h w -> b k
+            decoded = ldm_decoder.decoder(imgs_aug.to("cuda"))
+            diff = (~torch.logical_xor(decoded>0, keys>0)) # b k -> b k
+            bit_accs = torch.sum(diff, dim=-1) / diff.shape[-1] # b k -> b
+            word_accs = (bit_accs == 1) # b
+            log_stats[f'bit_acc_{name}'] = torch.mean(bit_accs).item()
+        df_SS = df_SS._append(log_stats, ignore_index=True)
+        save_image(utils_img.unnormalize_vqgan(imgs_w), os.path.join(saveimgs_dir, f'{ii:03}_wm_orig.png'))
+    df_SS.to_csv(os.path.join(saveimgs_dir, 'bitacc.csv'), index=False)
+def get_parser():
+    parser = argparse.ArgumentParser(description="Simple example of a training script.")
+    parser.add_argument("--batch_size", type=int, default=4, help="Batch size.")
+    parser.add_argument("--img_size", type=int, default=256, help="Image size.")
+    parser.add_argument("--seed", type=int, default=0, help="Seed.")
+    parser.add_argument("--message_len", type=int, default=48, help="Message length.")
+    parser.add_argument("--ldm_config", type=str, default="/pubdata/ldd/projects/EW-LoRA/watermarker/stable_signature/configs/stable-diffusion/v1-inference.yaml", help="LDM config.")
+    parser.add_argument("--ldm_wm_config", type=str, default="/pubdata/ldd/projects/EW-LoRA/watermarker/LaWa/configs/SD14_LaWa.yaml", help="LDM config.")
+    parser.add_argument("--ldm_ckpt", type=str, default="/pubdata/ldd/models/ldm_ckpts/sd-v1-4-full-ema.ckpt", help="LDM checkpoint.")
+    parser.add_argument("--msg_decoder_path", type=str, default="/pubdata/ldd/models/wm_encdec/hidden/ckpts/dec_48b_whit.torchscript.pt", help="Message decoder path.")
+    parser.add_argument("--val_dir", type=str, default="/pubdata/ldd/Datasets/coco2017/val2017", help="Validation directory.")
+    # parser.add_argument("--apply_dlwt", action="store_true", help="Apply DLWT.")
+    parser.add_argument("--ckpt_prefix", type=str, default="SS_dlwt", help="Checkpoint prefix.")
+    parser.add_argument("--exps_num", type=str, default="005-exps", help="Experiments number.")
+    return parser
+if __name__ == '__main__':
+    # generate parser / parse parameters
+    parser = get_parser()
+    args = parser.parse_args()
+    # run experiment
+    main(args)

watermarker/LaWa/lawa_dataset/train_100k.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

watermarker/LaWa/lawa_dataset/train_200k.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

watermarker/LaWa/lawa_dataset/val_10k.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

watermarker/LaWa/lawa_dataset/val_1k.csv ADDED Viewed

	@@ -0,0 +1,1001 @@

+id,path
+99000,9/99000.jpg
+99001,9/99001.jpg
+99002,9/99002.jpg
+99003,9/99003.jpg
+99004,9/99004.jpg
+99005,9/99005.jpg
+99006,9/99006.jpg
+99007,9/99007.jpg
+99008,9/99008.jpg
+99009,9/99009.jpg
+99010,9/99010.jpg
+99011,9/99011.jpg
+99012,9/99012.jpg
+99013,9/99013.jpg
+99014,9/99014.jpg
+99015,9/99015.jpg
+99016,9/99016.jpg
+99017,9/99017.jpg
+99018,9/99018.jpg
+99019,9/99019.jpg
+99020,9/99020.jpg
+99021,9/99021.jpg
+99022,9/99022.jpg
+99023,9/99023.jpg
+99024,9/99024.jpg
+99025,9/99025.jpg
+99026,9/99026.jpg
+99027,9/99027.jpg
+99028,9/99028.jpg
+99029,9/99029.jpg
+99030,9/99030.jpg
+99031,9/99031.jpg
+99032,9/99032.jpg
+99033,9/99033.jpg
+99034,9/99034.jpg
+99035,9/99035.jpg
+99036,9/99036.jpg
+99037,9/99037.jpg
+99038,9/99038.jpg
+99039,9/99039.jpg
+99040,9/99040.jpg
+99041,9/99041.jpg
+99042,9/99042.jpg
+99043,9/99043.jpg
+99044,9/99044.jpg
+99045,9/99045.jpg
+99046,9/99046.jpg
+99047,9/99047.jpg
+99048,9/99048.jpg
+99049,9/99049.jpg
+99050,9/99050.jpg
+99051,9/99051.jpg
+99052,9/99052.jpg
+99053,9/99053.jpg
+99054,9/99054.jpg
+99055,9/99055.jpg
+99056,9/99056.jpg
+99057,9/99057.jpg
+99058,9/99058.jpg
+99059,9/99059.jpg
+99060,9/99060.jpg
+99061,9/99061.jpg
+99062,9/99062.jpg
+99063,9/99063.jpg
+99064,9/99064.jpg
+99065,9/99065.jpg
+99066,9/99066.jpg
+99067,9/99067.jpg
+99068,9/99068.jpg
+99069,9/99069.jpg
+99070,9/99070.jpg
+99071,9/99071.jpg
+99072,9/99072.jpg
+99073,9/99073.jpg
+99074,9/99074.jpg
+99075,9/99075.jpg
+99076,9/99076.jpg
+99077,9/99077.jpg
+99078,9/99078.jpg
+99079,9/99079.jpg
+99080,9/99080.jpg
+99081,9/99081.jpg
+99082,9/99082.jpg
+99083,9/99083.jpg
+99084,9/99084.jpg
+99085,9/99085.jpg
+99086,9/99086.jpg
+99087,9/99087.jpg
+99088,9/99088.jpg
+99089,9/99089.jpg
+99090,9/99090.jpg
+99091,9/99091.jpg
+99092,9/99092.jpg
+99093,9/99093.jpg
+99094,9/99094.jpg
+99095,9/99095.jpg
+99096,9/99096.jpg
+99097,9/99097.jpg
+99098,9/99098.jpg
+99099,9/99099.jpg
+99100,9/99100.jpg
+99101,9/99101.jpg
+99102,9/99102.jpg
+99103,9/99103.jpg
+99104,9/99104.jpg
+99105,9/99105.jpg
+99106,9/99106.jpg
+99107,9/99107.jpg
+99108,9/99108.jpg
+99109,9/99109.jpg
+99110,9/99110.jpg
+99111,9/99111.jpg
+99112,9/99112.jpg
+99113,9/99113.jpg
+99114,9/99114.jpg
+99115,9/99115.jpg
+99116,9/99116.jpg
+99117,9/99117.jpg
+99118,9/99118.jpg
+99119,9/99119.jpg
+99120,9/99120.jpg
+99121,9/99121.jpg
+99122,9/99122.jpg
+99123,9/99123.jpg
+99124,9/99124.jpg
+99125,9/99125.jpg
+99126,9/99126.jpg
+99127,9/99127.jpg
+99128,9/99128.jpg
+99129,9/99129.jpg
+99130,9/99130.jpg
+99131,9/99131.jpg
+99132,9/99132.jpg
+99133,9/99133.jpg
+99134,9/99134.jpg
+99135,9/99135.jpg
+99136,9/99136.jpg
+99137,9/99137.jpg
+99138,9/99138.jpg
+99139,9/99139.jpg
+99140,9/99140.jpg
+99141,9/99141.jpg
+99142,9/99142.jpg
+99143,9/99143.jpg
+99144,9/99144.jpg
+99145,9/99145.jpg
+99146,9/99146.jpg
+99147,9/99147.jpg
+99148,9/99148.jpg
+99149,9/99149.jpg
+99150,9/99150.jpg
+99151,9/99151.jpg
+99152,9/99152.jpg
+99153,9/99153.jpg
+99154,9/99154.jpg
+99155,9/99155.jpg
+99156,9/99156.jpg
+99157,9/99157.jpg
+99158,9/99158.jpg
+99159,9/99159.jpg
+99160,9/99160.jpg
+99161,9/99161.jpg
+99162,9/99162.jpg
+99163,9/99163.jpg
+99164,9/99164.jpg
+99165,9/99165.jpg
+99166,9/99166.jpg
+99167,9/99167.jpg
+99168,9/99168.jpg
+99169,9/99169.jpg
+99170,9/99170.jpg
+99171,9/99171.jpg
+99172,9/99172.jpg
+99173,9/99173.jpg
+99174,9/99174.jpg
+99175,9/99175.jpg
+99176,9/99176.jpg
+99177,9/99177.jpg
+99178,9/99178.jpg
+99179,9/99179.jpg
+99180,9/99180.jpg
+99181,9/99181.jpg
+99182,9/99182.jpg
+99183,9/99183.jpg
+99184,9/99184.jpg
+99185,9/99185.jpg
+99186,9/99186.jpg
+99187,9/99187.jpg
+99188,9/99188.jpg
+99189,9/99189.jpg
+99190,9/99190.jpg
+99191,9/99191.jpg
+99192,9/99192.jpg
+99193,9/99193.jpg
+99194,9/99194.jpg
+99195,9/99195.jpg
+99196,9/99196.jpg
+99197,9/99197.jpg
+99198,9/99198.jpg
+99199,9/99199.jpg
+99200,9/99200.jpg
+99201,9/99201.jpg
+99202,9/99202.jpg
+99203,9/99203.jpg
+99204,9/99204.jpg
+99205,9/99205.jpg
+99206,9/99206.jpg
+99207,9/99207.jpg
+99208,9/99208.jpg
+99209,9/99209.jpg
+99210,9/99210.jpg
+99211,9/99211.jpg
+99212,9/99212.jpg
+99213,9/99213.jpg
+99214,9/99214.jpg
+99215,9/99215.jpg
+99216,9/99216.jpg
+99217,9/99217.jpg
+99218,9/99218.jpg
+99219,9/99219.jpg
+99220,9/99220.jpg
+99221,9/99221.jpg
+99222,9/99222.jpg
+99223,9/99223.jpg
+99224,9/99224.jpg
+99225,9/99225.jpg
+99226,9/99226.jpg
+99227,9/99227.jpg
+99228,9/99228.jpg
+99229,9/99229.jpg
+99230,9/99230.jpg
+99231,9/99231.jpg
+99232,9/99232.jpg
+99233,9/99233.jpg
+99234,9/99234.jpg
+99235,9/99235.jpg
+99236,9/99236.jpg
+99237,9/99237.jpg
+99238,9/99238.jpg
+99239,9/99239.jpg
+99240,9/99240.jpg
+99241,9/99241.jpg
+99242,9/99242.jpg
+99243,9/99243.jpg
+99244,9/99244.jpg
+99245,9/99245.jpg
+99246,9/99246.jpg
+99247,9/99247.jpg
+99248,9/99248.jpg
+99249,9/99249.jpg
+99250,9/99250.jpg
+99251,9/99251.jpg
+99252,9/99252.jpg
+99253,9/99253.jpg
+99254,9/99254.jpg
+99255,9/99255.jpg
+99256,9/99256.jpg
+99257,9/99257.jpg
+99258,9/99258.jpg
+99259,9/99259.jpg
+99260,9/99260.jpg
+99261,9/99261.jpg
+99262,9/99262.jpg
+99263,9/99263.jpg
+99264,9/99264.jpg
+99265,9/99265.jpg
+99266,9/99266.jpg
+99267,9/99267.jpg
+99268,9/99268.jpg
+99269,9/99269.jpg
+99270,9/99270.jpg
+99271,9/99271.jpg
+99272,9/99272.jpg
+99273,9/99273.jpg
+99274,9/99274.jpg
+99275,9/99275.jpg
+99276,9/99276.jpg
+99277,9/99277.jpg
+99278,9/99278.jpg
+99279,9/99279.jpg
+99280,9/99280.jpg
+99281,9/99281.jpg
+99282,9/99282.jpg
+99283,9/99283.jpg
+99284,9/99284.jpg
+99285,9/99285.jpg
+99286,9/99286.jpg
+99287,9/99287.jpg
+99288,9/99288.jpg
+99289,9/99289.jpg
+99290,9/99290.jpg
+99291,9/99291.jpg
+99292,9/99292.jpg
+99293,9/99293.jpg
+99294,9/99294.jpg
+99295,9/99295.jpg
+99296,9/99296.jpg
+99297,9/99297.jpg
+99298,9/99298.jpg
+99299,9/99299.jpg
+99300,9/99300.jpg
+99301,9/99301.jpg
+99302,9/99302.jpg
+99303,9/99303.jpg
+99304,9/99304.jpg
+99305,9/99305.jpg
+99306,9/99306.jpg
+99307,9/99307.jpg
+99308,9/99308.jpg
+99309,9/99309.jpg
+99310,9/99310.jpg
+99311,9/99311.jpg
+99312,9/99312.jpg
+99313,9/99313.jpg
+99314,9/99314.jpg
+99315,9/99315.jpg
+99316,9/99316.jpg
+99317,9/99317.jpg
+99318,9/99318.jpg
+99319,9/99319.jpg
+99320,9/99320.jpg
+99321,9/99321.jpg
+99322,9/99322.jpg
+99323,9/99323.jpg
+99324,9/99324.jpg
+99325,9/99325.jpg
+99326,9/99326.jpg
+99327,9/99327.jpg
+99328,9/99328.jpg
+99329,9/99329.jpg
+99330,9/99330.jpg
+99331,9/99331.jpg
+99332,9/99332.jpg
+99333,9/99333.jpg
+99334,9/99334.jpg
+99335,9/99335.jpg
+99336,9/99336.jpg
+99337,9/99337.jpg
+99338,9/99338.jpg
+99339,9/99339.jpg
+99340,9/99340.jpg
+99341,9/99341.jpg
+99342,9/99342.jpg
+99343,9/99343.jpg
+99344,9/99344.jpg
+99345,9/99345.jpg
+99346,9/99346.jpg
+99347,9/99347.jpg
+99348,9/99348.jpg
+99349,9/99349.jpg
+99350,9/99350.jpg
+99351,9/99351.jpg
+99352,9/99352.jpg
+99353,9/99353.jpg
+99354,9/99354.jpg
+99355,9/99355.jpg
+99356,9/99356.jpg
+99357,9/99357.jpg
+99358,9/99358.jpg
+99359,9/99359.jpg
+99360,9/99360.jpg
+99361,9/99361.jpg
+99362,9/99362.jpg
+99363,9/99363.jpg
+99364,9/99364.jpg
+99365,9/99365.jpg
+99366,9/99366.jpg
+99367,9/99367.jpg
+99368,9/99368.jpg
+99369,9/99369.jpg
+99370,9/99370.jpg
+99371,9/99371.jpg
+99372,9/99372.jpg
+99373,9/99373.jpg
+99374,9/99374.jpg
+99375,9/99375.jpg
+99376,9/99376.jpg
+99377,9/99377.jpg
+99378,9/99378.jpg
+99379,9/99379.jpg
+99380,9/99380.jpg
+99381,9/99381.jpg
+99382,9/99382.jpg
+99383,9/99383.jpg
+99384,9/99384.jpg
+99385,9/99385.jpg
+99386,9/99386.jpg
+99387,9/99387.jpg
+99388,9/99388.jpg
+99389,9/99389.jpg
+99390,9/99390.jpg
+99391,9/99391.jpg
+99392,9/99392.jpg
+99393,9/99393.jpg
+99394,9/99394.jpg
+99395,9/99395.jpg
+99396,9/99396.jpg
+99397,9/99397.jpg
+99398,9/99398.jpg
+99399,9/99399.jpg
+99400,9/99400.jpg
+99401,9/99401.jpg
+99402,9/99402.jpg
+99403,9/99403.jpg
+99404,9/99404.jpg
+99405,9/99405.jpg
+99406,9/99406.jpg
+99407,9/99407.jpg
+99408,9/99408.jpg
+99409,9/99409.jpg
+99410,9/99410.jpg
+99411,9/99411.jpg
+99412,9/99412.jpg
+99413,9/99413.jpg
+99414,9/99414.jpg
+99415,9/99415.jpg
+99416,9/99416.jpg
+99417,9/99417.jpg
+99418,9/99418.jpg
+99419,9/99419.jpg
+99420,9/99420.jpg
+99421,9/99421.jpg
+99422,9/99422.jpg
+99423,9/99423.jpg
+99424,9/99424.jpg
+99425,9/99425.jpg
+99426,9/99426.jpg
+99427,9/99427.jpg
+99428,9/99428.jpg
+99429,9/99429.jpg
+99430,9/99430.jpg
+99431,9/99431.jpg
+99432,9/99432.jpg
+99433,9/99433.jpg
+99434,9/99434.jpg
+99435,9/99435.jpg
+99436,9/99436.jpg
+99437,9/99437.jpg
+99438,9/99438.jpg
+99439,9/99439.jpg
+99440,9/99440.jpg
+99441,9/99441.jpg
+99442,9/99442.jpg
+99443,9/99443.jpg
+99444,9/99444.jpg
+99445,9/99445.jpg
+99446,9/99446.jpg
+99447,9/99447.jpg
+99448,9/99448.jpg
+99449,9/99449.jpg
+99450,9/99450.jpg
+99451,9/99451.jpg
+99452,9/99452.jpg
+99453,9/99453.jpg
+99454,9/99454.jpg
+99455,9/99455.jpg
+99456,9/99456.jpg
+99457,9/99457.jpg
+99458,9/99458.jpg
+99459,9/99459.jpg
+99460,9/99460.jpg
+99461,9/99461.jpg
+99462,9/99462.jpg
+99463,9/99463.jpg
+99464,9/99464.jpg
+99465,9/99465.jpg
+99466,9/99466.jpg
+99467,9/99467.jpg
+99468,9/99468.jpg
+99469,9/99469.jpg
+99470,9/99470.jpg
+99471,9/99471.jpg
+99472,9/99472.jpg
+99473,9/99473.jpg
+99474,9/99474.jpg
+99475,9/99475.jpg
+99476,9/99476.jpg
+99477,9/99477.jpg
+99478,9/99478.jpg
+99479,9/99479.jpg
+99480,9/99480.jpg
+99481,9/99481.jpg
+99482,9/99482.jpg
+99483,9/99483.jpg
+99484,9/99484.jpg
+99485,9/99485.jpg
+99486,9/99486.jpg
+99487,9/99487.jpg
+99488,9/99488.jpg
+99489,9/99489.jpg
+99490,9/99490.jpg
+99491,9/99491.jpg
+99492,9/99492.jpg
+99493,9/99493.jpg
+99494,9/99494.jpg
+99495,9/99495.jpg
+99496,9/99496.jpg
+99497,9/99497.jpg
+99498,9/99498.jpg
+99499,9/99499.jpg
+99500,9/99500.jpg
+99501,9/99501.jpg
+99502,9/99502.jpg
+99503,9/99503.jpg
+99504,9/99504.jpg
+99505,9/99505.jpg
+99506,9/99506.jpg
+99507,9/99507.jpg
+99508,9/99508.jpg
+99509,9/99509.jpg
+99510,9/99510.jpg
+99511,9/99511.jpg
+99512,9/99512.jpg
+99513,9/99513.jpg
+99514,9/99514.jpg
+99515,9/99515.jpg
+99516,9/99516.jpg
+99517,9/99517.jpg
+99518,9/99518.jpg
+99519,9/99519.jpg
+99520,9/99520.jpg
+99521,9/99521.jpg
+99522,9/99522.jpg
+99523,9/99523.jpg
+99524,9/99524.jpg
+99525,9/99525.jpg
+99526,9/99526.jpg
+99527,9/99527.jpg
+99528,9/99528.jpg
+99529,9/99529.jpg
+99530,9/99530.jpg
+99531,9/99531.jpg
+99532,9/99532.jpg
+99533,9/99533.jpg
+99534,9/99534.jpg
+99535,9/99535.jpg
+99536,9/99536.jpg
+99537,9/99537.jpg
+99538,9/99538.jpg
+99539,9/99539.jpg
+99540,9/99540.jpg
+99541,9/99541.jpg
+99542,9/99542.jpg
+99543,9/99543.jpg
+99544,9/99544.jpg
+99545,9/99545.jpg
+99546,9/99546.jpg
+99547,9/99547.jpg
+99548,9/99548.jpg
+99549,9/99549.jpg
+99550,9/99550.jpg
+99551,9/99551.jpg
+99552,9/99552.jpg
+99553,9/99553.jpg
+99554,9/99554.jpg
+99555,9/99555.jpg
+99556,9/99556.jpg
+99557,9/99557.jpg
+99558,9/99558.jpg
+99559,9/99559.jpg
+99560,9/99560.jpg
+99561,9/99561.jpg
+99562,9/99562.jpg
+99563,9/99563.jpg
+99564,9/99564.jpg
+99565,9/99565.jpg
+99566,9/99566.jpg
+99567,9/99567.jpg
+99568,9/99568.jpg
+99569,9/99569.jpg
+99570,9/99570.jpg
+99571,9/99571.jpg
+99572,9/99572.jpg
+99573,9/99573.jpg
+99574,9/99574.jpg
+99575,9/99575.jpg
+99576,9/99576.jpg
+99577,9/99577.jpg
+99578,9/99578.jpg
+99579,9/99579.jpg
+99580,9/99580.jpg
+99581,9/99581.jpg
+99582,9/99582.jpg
+99583,9/99583.jpg
+99584,9/99584.jpg
+99585,9/99585.jpg
+99586,9/99586.jpg
+99587,9/99587.jpg
+99588,9/99588.jpg
+99589,9/99589.jpg
+99590,9/99590.jpg
+99591,9/99591.jpg
+99592,9/99592.jpg
+99593,9/99593.jpg
+99594,9/99594.jpg
+99595,9/99595.jpg
+99596,9/99596.jpg
+99597,9/99597.jpg
+99598,9/99598.jpg
+99599,9/99599.jpg
+99600,9/99600.jpg
+99601,9/99601.jpg
+99602,9/99602.jpg
+99603,9/99603.jpg
+99604,9/99604.jpg
+99605,9/99605.jpg
+99606,9/99606.jpg
+99607,9/99607.jpg
+99608,9/99608.jpg
+99609,9/99609.jpg
+99610,9/99610.jpg
+99611,9/99611.jpg
+99612,9/99612.jpg
+99613,9/99613.jpg
+99614,9/99614.jpg
+99615,9/99615.jpg
+99616,9/99616.jpg
+99617,9/99617.jpg
+99618,9/99618.jpg
+99619,9/99619.jpg
+99620,9/99620.jpg
+99621,9/99621.jpg
+99622,9/99622.jpg
+99623,9/99623.jpg
+99624,9/99624.jpg
+99625,9/99625.jpg
+99626,9/99626.jpg
+99627,9/99627.jpg
+99628,9/99628.jpg
+99629,9/99629.jpg
+99630,9/99630.jpg
+99631,9/99631.jpg
+99632,9/99632.jpg
+99633,9/99633.jpg
+99634,9/99634.jpg
+99635,9/99635.jpg
+99636,9/99636.jpg
+99637,9/99637.jpg
+99638,9/99638.jpg
+99639,9/99639.jpg
+99640,9/99640.jpg
+99641,9/99641.jpg
+99642,9/99642.jpg
+99643,9/99643.jpg
+99644,9/99644.jpg
+99645,9/99645.jpg
+99646,9/99646.jpg
+99647,9/99647.jpg
+99648,9/99648.jpg
+99649,9/99649.jpg
+99650,9/99650.jpg
+99651,9/99651.jpg
+99652,9/99652.jpg
+99653,9/99653.jpg
+99654,9/99654.jpg
+99655,9/99655.jpg
+99656,9/99656.jpg
+99657,9/99657.jpg
+99658,9/99658.jpg
+99659,9/99659.jpg
+99660,9/99660.jpg
+99661,9/99661.jpg
+99662,9/99662.jpg
+99663,9/99663.jpg
+99664,9/99664.jpg
+99665,9/99665.jpg
+99666,9/99666.jpg
+99667,9/99667.jpg
+99668,9/99668.jpg
+99669,9/99669.jpg
+99670,9/99670.jpg
+99671,9/99671.jpg
+99672,9/99672.jpg
+99673,9/99673.jpg
+99674,9/99674.jpg
+99675,9/99675.jpg
+99676,9/99676.jpg
+99677,9/99677.jpg
+99678,9/99678.jpg
+99679,9/99679.jpg
+99680,9/99680.jpg
+99681,9/99681.jpg
+99682,9/99682.jpg
+99683,9/99683.jpg
+99684,9/99684.jpg
+99685,9/99685.jpg
+99686,9/99686.jpg
+99687,9/99687.jpg
+99688,9/99688.jpg
+99689,9/99689.jpg
+99690,9/99690.jpg
+99691,9/99691.jpg
+99692,9/99692.jpg
+99693,9/99693.jpg
+99694,9/99694.jpg
+99695,9/99695.jpg
+99696,9/99696.jpg
+99697,9/99697.jpg
+99698,9/99698.jpg
+99699,9/99699.jpg
+99700,9/99700.jpg
+99701,9/99701.jpg
+99702,9/99702.jpg
+99703,9/99703.jpg
+99704,9/99704.jpg
+99705,9/99705.jpg
+99706,9/99706.jpg
+99707,9/99707.jpg
+99708,9/99708.jpg
+99709,9/99709.jpg
+99710,9/99710.jpg
+99711,9/99711.jpg
+99712,9/99712.jpg
+99713,9/99713.jpg
+99714,9/99714.jpg
+99715,9/99715.jpg
+99716,9/99716.jpg
+99717,9/99717.jpg
+99718,9/99718.jpg
+99719,9/99719.jpg
+99720,9/99720.jpg
+99721,9/99721.jpg
+99722,9/99722.jpg
+99723,9/99723.jpg
+99724,9/99724.jpg
+99725,9/99725.jpg
+99726,9/99726.jpg
+99727,9/99727.jpg
+99728,9/99728.jpg
+99729,9/99729.jpg
+99730,9/99730.jpg
+99731,9/99731.jpg
+99732,9/99732.jpg
+99733,9/99733.jpg
+99734,9/99734.jpg
+99735,9/99735.jpg
+99736,9/99736.jpg
+99737,9/99737.jpg
+99738,9/99738.jpg
+99739,9/99739.jpg
+99740,9/99740.jpg
+99741,9/99741.jpg
+99742,9/99742.jpg
+99743,9/99743.jpg
+99744,9/99744.jpg
+99745,9/99745.jpg
+99746,9/99746.jpg
+99747,9/99747.jpg
+99748,9/99748.jpg
+99749,9/99749.jpg
+99750,9/99750.jpg
+99751,9/99751.jpg
+99752,9/99752.jpg
+99753,9/99753.jpg
+99754,9/99754.jpg
+99755,9/99755.jpg
+99756,9/99756.jpg
+99757,9/99757.jpg
+99758,9/99758.jpg
+99759,9/99759.jpg
+99760,9/99760.jpg
+99761,9/99761.jpg
+99762,9/99762.jpg
+99763,9/99763.jpg
+99764,9/99764.jpg
+99765,9/99765.jpg
+99766,9/99766.jpg
+99767,9/99767.jpg
+99768,9/99768.jpg
+99769,9/99769.jpg
+99770,9/99770.jpg
+99771,9/99771.jpg
+99772,9/99772.jpg
+99773,9/99773.jpg
+99774,9/99774.jpg
+99775,9/99775.jpg
+99776,9/99776.jpg
+99777,9/99777.jpg
+99778,9/99778.jpg
+99779,9/99779.jpg
+99780,9/99780.jpg
+99781,9/99781.jpg
+99782,9/99782.jpg
+99783,9/99783.jpg
+99784,9/99784.jpg
+99785,9/99785.jpg
+99786,9/99786.jpg
+99787,9/99787.jpg
+99788,9/99788.jpg
+99789,9/99789.jpg
+99790,9/99790.jpg
+99791,9/99791.jpg
+99792,9/99792.jpg
+99793,9/99793.jpg
+99794,9/99794.jpg
+99795,9/99795.jpg
+99796,9/99796.jpg
+99797,9/99797.jpg
+99798,9/99798.jpg
+99799,9/99799.jpg
+99800,9/99800.jpg
+99801,9/99801.jpg
+99802,9/99802.jpg
+99803,9/99803.jpg
+99804,9/99804.jpg
+99805,9/99805.jpg
+99806,9/99806.jpg
+99807,9/99807.jpg
+99808,9/99808.jpg
+99809,9/99809.jpg
+99810,9/99810.jpg
+99811,9/99811.jpg
+99812,9/99812.jpg
+99813,9/99813.jpg
+99814,9/99814.jpg
+99815,9/99815.jpg
+99816,9/99816.jpg
+99817,9/99817.jpg
+99818,9/99818.jpg
+99819,9/99819.jpg
+99820,9/99820.jpg
+99821,9/99821.jpg
+99822,9/99822.jpg
+99823,9/99823.jpg
+99824,9/99824.jpg
+99825,9/99825.jpg
+99826,9/99826.jpg
+99827,9/99827.jpg
+99828,9/99828.jpg
+99829,9/99829.jpg
+99830,9/99830.jpg
+99831,9/99831.jpg
+99832,9/99832.jpg
+99833,9/99833.jpg
+99834,9/99834.jpg
+99835,9/99835.jpg
+99836,9/99836.jpg
+99837,9/99837.jpg
+99838,9/99838.jpg
+99839,9/99839.jpg
+99840,9/99840.jpg
+99841,9/99841.jpg
+99842,9/99842.jpg
+99843,9/99843.jpg
+99844,9/99844.jpg
+99845,9/99845.jpg
+99846,9/99846.jpg
+99847,9/99847.jpg
+99848,9/99848.jpg
+99849,9/99849.jpg
+99850,9/99850.jpg
+99851,9/99851.jpg
+99852,9/99852.jpg
+99853,9/99853.jpg
+99854,9/99854.jpg
+99855,9/99855.jpg
+99856,9/99856.jpg
+99857,9/99857.jpg
+99858,9/99858.jpg
+99859,9/99859.jpg
+99860,9/99860.jpg
+99861,9/99861.jpg
+99862,9/99862.jpg
+99863,9/99863.jpg
+99864,9/99864.jpg
+99865,9/99865.jpg
+99866,9/99866.jpg
+99867,9/99867.jpg
+99868,9/99868.jpg
+99869,9/99869.jpg
+99870,9/99870.jpg
+99871,9/99871.jpg
+99872,9/99872.jpg
+99873,9/99873.jpg
+99874,9/99874.jpg
+99875,9/99875.jpg
+99876,9/99876.jpg
+99877,9/99877.jpg
+99878,9/99878.jpg
+99879,9/99879.jpg
+99880,9/99880.jpg
+99881,9/99881.jpg
+99882,9/99882.jpg
+99883,9/99883.jpg
+99884,9/99884.jpg
+99885,9/99885.jpg
+99886,9/99886.jpg
+99887,9/99887.jpg
+99888,9/99888.jpg
+99889,9/99889.jpg
+99890,9/99890.jpg
+99891,9/99891.jpg
+99892,9/99892.jpg
+99893,9/99893.jpg
+99894,9/99894.jpg
+99895,9/99895.jpg
+99896,9/99896.jpg
+99897,9/99897.jpg
+99898,9/99898.jpg
+99899,9/99899.jpg
+99900,9/99900.jpg
+99901,9/99901.jpg
+99902,9/99902.jpg
+99903,9/99903.jpg
+99904,9/99904.jpg
+99905,9/99905.jpg
+99906,9/99906.jpg
+99907,9/99907.jpg
+99908,9/99908.jpg
+99909,9/99909.jpg
+99910,9/99910.jpg
+99911,9/99911.jpg
+99912,9/99912.jpg
+99913,9/99913.jpg
+99914,9/99914.jpg
+99915,9/99915.jpg
+99916,9/99916.jpg
+99917,9/99917.jpg
+99918,9/99918.jpg
+99919,9/99919.jpg
+99920,9/99920.jpg
+99921,9/99921.jpg
+99922,9/99922.jpg
+99923,9/99923.jpg
+99924,9/99924.jpg
+99925,9/99925.jpg
+99926,9/99926.jpg
+99927,9/99927.jpg
+99928,9/99928.jpg
+99929,9/99929.jpg
+99930,9/99930.jpg
+99931,9/99931.jpg
+99932,9/99932.jpg
+99933,9/99933.jpg
+99934,9/99934.jpg
+99935,9/99935.jpg
+99936,9/99936.jpg
+99937,9/99937.jpg
+99938,9/99938.jpg
+99939,9/99939.jpg
+99940,9/99940.jpg
+99941,9/99941.jpg
+99942,9/99942.jpg
+99943,9/99943.jpg
+99944,9/99944.jpg
+99945,9/99945.jpg
+99946,9/99946.jpg
+99947,9/99947.jpg
+99948,9/99948.jpg
+99949,9/99949.jpg
+99950,9/99950.jpg
+99951,9/99951.jpg
+99952,9/99952.jpg
+99953,9/99953.jpg
+99954,9/99954.jpg
+99955,9/99955.jpg
+99956,9/99956.jpg
+99957,9/99957.jpg
+99958,9/99958.jpg
+99959,9/99959.jpg
+99960,9/99960.jpg
+99961,9/99961.jpg
+99962,9/99962.jpg
+99963,9/99963.jpg
+99964,9/99964.jpg
+99965,9/99965.jpg
+99966,9/99966.jpg
+99967,9/99967.jpg
+99968,9/99968.jpg
+99969,9/99969.jpg
+99970,9/99970.jpg
+99971,9/99971.jpg
+99972,9/99972.jpg
+99973,9/99973.jpg
+99974,9/99974.jpg
+99975,9/99975.jpg
+99976,9/99976.jpg
+99977,9/99977.jpg
+99978,9/99978.jpg
+99979,9/99979.jpg
+99980,9/99980.jpg
+99981,9/99981.jpg
+99982,9/99982.jpg
+99983,9/99983.jpg
+99984,9/99984.jpg
+99985,9/99985.jpg
+99986,9/99986.jpg
+99987,9/99987.jpg
+99988,9/99988.jpg
+99989,9/99989.jpg
+99990,9/99990.jpg
+99991,9/99991.jpg
+99992,9/99992.jpg
+99993,9/99993.jpg
+99994,9/99994.jpg
+99995,9/99995.jpg
+99996,9/99996.jpg
+99997,9/99997.jpg
+99998,9/99998.jpg
+99999,9/99999.jpg

watermarker/LaWa/ldm/__pycache__/util.cpython-38.pyc ADDED Viewed

Binary file (6.59 kB). View file

watermarker/LaWa/ldm/data/__init__.py ADDED Viewed

File without changes

watermarker/LaWa/ldm/data/util.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import torch
+from ldm.modules.midas.api import load_midas_transform
+class AddMiDaS(object):
+    def __init__(self, model_type):
+        super().__init__()
+        self.transform = load_midas_transform(model_type)
+    def pt2np(self, x):
+        x = ((x + 1.0) * .5).detach().cpu().numpy()
+        return x
+    def np2pt(self, x):
+        x = torch.from_numpy(x) * 2 - 1.
+        return x
+    def __call__(self, sample):
+        # sample['jpg'] is tensor hwc in [-1, 1] at this point
+        x = self.pt2np(sample['jpg'])
+        x = self.transform({"image": x})["image"]
+        sample['midas_in'] = x
+        return sample

watermarker/LaWa/ldm/models/__pycache__/autoencoder.cpython-38.pyc ADDED Viewed

Binary file (14.8 kB). View file

watermarker/LaWa/ldm/models/autoencoder.py ADDED Viewed

	@@ -0,0 +1,492 @@

+import torch
+import pytorch_lightning as pl
+import torch.nn.functional as F
+from contextlib import contextmanager
+from ldm.modules.diffusionmodules.model import Encoder, Decoder
+from ldm.modules.distributions.distributions import DiagonalGaussianDistribution
+from ldm.util import instantiate_from_config
+from ldm.modules.ema import LitEma
+class AutoencoderKL(pl.LightningModule):
+    def __init__(self,
+                 ddconfig,
+                 lossconfig,
+                 embed_dim,
+                 ckpt_path=None,
+                 ignore_keys=[],
+                 image_key="image",
+                 colorize_nlabels=None,
+                 monitor=None,
+                 ema_decay=None,
+                 learn_logvar=False
+                 ):
+        super().__init__()
+        self.learn_logvar = learn_logvar
+        self.image_key = image_key
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        self.loss = instantiate_from_config(lossconfig)
+        assert ddconfig["double_z"]
+        self.quant_conv = torch.nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        self.embed_dim = embed_dim
+        if colorize_nlabels is not None:
+            assert type(colorize_nlabels)==int
+            self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+        if monitor is not None:
+            self.monitor = monitor
+        self.use_ema = ema_decay is not None
+        if self.use_ema:
+            self.ema_decay = ema_decay
+            assert 0. < ema_decay < 1.
+            self.model_ema = LitEma(self, decay=ema_decay)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+    def init_from_ckpt(self, path, ignore_keys=list()):
+        sd = torch.load(path, map_location="cpu")["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        self.load_state_dict(sd, strict=False)
+        print(f"Restored from {path}")
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.parameters())
+            self.model_ema.copy_to(self)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self)
+    def encode(self, x):
+        h = self.encoder(x)
+        moments = self.quant_conv(h)
+        posterior = DiagonalGaussianDistribution(moments)
+        return posterior
+    def decode(self, z):
+        z = self.post_quant_conv(z)
+        dec = self.decoder(z)
+        return dec
+    def forward(self, input, sample_posterior=True):
+        posterior = self.encode(input)
+        if sample_posterior:
+            z = posterior.sample()
+        else:
+            z = posterior.mode()
+        dec = self.decode(z)
+        return dec, posterior
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
+        return x
+    def training_step(self, batch, batch_idx, optimizer_idx):
+        inputs = self.get_input(batch, self.image_key)
+        reconstructions, posterior = self(inputs)
+        if optimizer_idx == 0:
+            # train encoder+decoder+logvar
+            aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train")
+            self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True)
+            self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False)
+            return aeloss
+        if optimizer_idx == 1:
+            # train the discriminator
+            discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step,
+                                                last_layer=self.get_last_layer(), split="train")
+            self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True)
+            self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False)
+            return discloss
+    def validation_step(self, batch, batch_idx):
+        log_dict = self._validation_step(batch, batch_idx)
+        with self.ema_scope():
+            log_dict_ema = self._validation_step(batch, batch_idx, postfix="_ema")
+        return log_dict
+    def _validation_step(self, batch, batch_idx, postfix=""):
+        inputs = self.get_input(batch, self.image_key)
+        reconstructions, posterior = self(inputs)
+        aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, 0, self.global_step,
+                                        last_layer=self.get_last_layer(), split="val"+postfix)
+        discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, 1, self.global_step,
+                                            last_layer=self.get_last_layer(), split="val"+postfix)
+        self.log(f"val{postfix}/rec_loss", log_dict_ae[f"val{postfix}/rec_loss"])
+        self.log_dict(log_dict_ae)
+        self.log_dict(log_dict_disc)
+        return self.log_dict
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        ae_params_list = list(self.encoder.parameters()) + list(self.decoder.parameters()) + list(
+            self.quant_conv.parameters()) + list(self.post_quant_conv.parameters())
+        if self.learn_logvar:
+            print(f"{self.__class__.__name__}: Learning logvar")
+            ae_params_list.append(self.loss.logvar)
+        opt_ae = torch.optim.Adam(ae_params_list,
+                                  lr=lr, betas=(0.5, 0.9))
+        opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
+                                    lr=lr, betas=(0.5, 0.9))
+        return [opt_ae, opt_disc], []
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+    @torch.no_grad()
+    def log_images(self, batch, only_inputs=False, log_ema=False, **kwargs):
+        log = dict()
+        x = self.get_input(batch, self.image_key)
+        x = x.to(self.device)
+        if not only_inputs:
+            xrec, posterior = self(x)
+            if x.shape[1] > 3:
+                # colorize with random projection
+                assert xrec.shape[1] > 3
+                x = self.to_rgb(x)
+                xrec = self.to_rgb(xrec)
+            log["samples"] = self.decode(torch.randn_like(posterior.sample()))
+            log["reconstructions"] = xrec
+            if log_ema or self.use_ema:
+                with self.ema_scope():
+                    xrec_ema, posterior_ema = self(x)
+                    if x.shape[1] > 3:
+                        # colorize with random projection
+                        assert xrec_ema.shape[1] > 3
+                        xrec_ema = self.to_rgb(xrec_ema)
+                    log["samples_ema"] = self.decode(torch.randn_like(posterior_ema.sample()))
+                    log["reconstructions_ema"] = xrec_ema
+        log["inputs"] = x
+        return log
+    def to_rgb(self, x):
+        assert self.image_key == "segmentation"
+        if not hasattr(self, "colorize"):
+            self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+        x = F.conv2d(x, weight=self.colorize)
+        x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
+        return x
+class IdentityFirstStage(torch.nn.Module):
+    def __init__(self, *args, vq_interface=False, **kwargs):
+        self.vq_interface = vq_interface
+        super().__init__()
+    def encode(self, x, *args, **kwargs):
+        return x
+    def decode(self, x, *args, **kwargs):
+        return x
+    def quantize(self, x, *args, **kwargs):
+        if self.vq_interface:
+            return x, None, [None, None, None]
+        return x
+    def forward(self, x, *args, **kwargs):
+        return x
+class VQModel(pl.LightningModule):
+    def __init__(self,
+                 ddconfig,
+                 lossconfig,
+                 n_embed,
+                 embed_dim,
+                 ckpt_path=None,
+                 ignore_keys=[],
+                 image_key="image",
+                 colorize_nlabels=None,
+                 monitor=None,
+                 batch_resize_range=None,
+                 scheduler_config=None,
+                 lr_g_factor=1.0,
+                 remap=None,
+                 sane_index_shape=False, # tell vector quantizer to return indices as bhw
+                 use_ema=False
+                 ):
+        super().__init__()
+        from taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer
+        self.embed_dim = embed_dim
+        self.n_embed = n_embed
+        self.image_key = image_key
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        self.loss = instantiate_from_config(lossconfig)
+        self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25,
+                                        remap=remap,
+                                        sane_index_shape=sane_index_shape)
+        self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        if colorize_nlabels is not None:
+            assert type(colorize_nlabels)==int
+            self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+        if monitor is not None:
+            self.monitor = monitor
+        self.batch_resize_range = batch_resize_range
+        if self.batch_resize_range is not None:
+            print(f"{self.__class__.__name__}: Using per-batch resizing in range {batch_resize_range}.")
+        self.use_ema = use_ema
+        if self.use_ema:
+            self.model_ema = LitEma(self)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+        self.scheduler_config = scheduler_config
+        self.lr_g_factor = lr_g_factor
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.parameters())
+            self.model_ema.copy_to(self)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    def init_from_ckpt(self, path, ignore_keys=list()):
+        sd = torch.load(path, map_location="cpu")["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        missing, unexpected = self.load_state_dict(sd, strict=False)
+        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+        if len(missing) > 0:
+            print(f"Missing Keys: {missing}")
+            print(f"Unexpected Keys: {unexpected}")
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self)
+    def encode(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        quant, emb_loss, info = self.quantize(h)
+        return quant, emb_loss, info
+    def encode_to_prequant(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        return h
+    def decode(self, quant):
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec
+    def decode_code(self, code_b):
+        quant_b = self.quantize.embed_code(code_b)
+        dec = self.decode(quant_b)
+        return dec
+    def forward(self, input, return_pred_indices=False):
+        quant, diff, (_,_,ind) = self.encode(input)
+        dec = self.decode(quant)
+        if return_pred_indices:
+            return dec, diff, ind
+        return dec, diff
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
+        if self.batch_resize_range is not None:
+            lower_size = self.batch_resize_range[0]
+            upper_size = self.batch_resize_range[1]
+            if self.global_step <= 4:
+                # do the first few batches with max size to avoid later oom
+                new_resize = upper_size
+            else:
+                new_resize = np.random.choice(np.arange(lower_size, upper_size+16, 16))
+            if new_resize != x.shape[2]:
+                x = F.interpolate(x, size=new_resize, mode="bicubic")
+            x = x.detach()
+        return x
+    def training_step(self, batch, batch_idx, optimizer_idx):
+        # https://github.com/pytorch/pytorch/issues/37142
+        # try not to fool the heuristics
+        x = self.get_input(batch, self.image_key)
+        xrec, qloss, ind = self(x, return_pred_indices=True)
+        if optimizer_idx == 0:
+            # autoencode
+            aeloss, log_dict_ae = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train",
+                                            predicted_indices=ind)
+            self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+            return aeloss
+        if optimizer_idx == 1:
+            # discriminator
+            discloss, log_dict_disc = self.loss(qloss, x, xrec, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train")
+            self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=True)
+            return discloss
+    def validation_step(self, batch, batch_idx):
+        log_dict = self._validation_step(batch, batch_idx)
+        with self.ema_scope():
+            log_dict_ema = self._validation_step(batch, batch_idx, suffix="_ema")
+        return log_dict
+    def _validation_step(self, batch, batch_idx, suffix=""):
+        x = self.get_input(batch, self.image_key)
+        xrec, qloss, ind = self(x, return_pred_indices=True)
+        aeloss, log_dict_ae = self.loss(qloss, x, xrec, 0,
+                                        self.global_step,
+                                        last_layer=self.get_last_layer(),
+                                        split="val"+suffix,
+                                        predicted_indices=ind
+                                        )
+        discloss, log_dict_disc = self.loss(qloss, x, xrec, 1,
+                                            self.global_step,
+                                            last_layer=self.get_last_layer(),
+                                            split="val"+suffix,
+                                            predicted_indices=ind
+                                            )
+        rec_loss = log_dict_ae[f"val{suffix}/rec_loss"]
+        self.log(f"val{suffix}/rec_loss", rec_loss,
+                   prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+        self.log(f"val{suffix}/aeloss", aeloss,
+                   prog_bar=True, logger=True, on_step=False, on_epoch=True, sync_dist=True)
+        if version.parse(pl.__version__) >= version.parse('1.4.0'):
+            del log_dict_ae[f"val{suffix}/rec_loss"]
+        self.log_dict(log_dict_ae)
+        self.log_dict(log_dict_disc)
+        return self.log_dict
+    def configure_optimizers(self):
+        lr_d = self.learning_rate
+        lr_g = self.lr_g_factor*self.learning_rate
+        print("lr_d", lr_d)
+        print("lr_g", lr_g)
+        opt_ae = torch.optim.Adam(list(self.encoder.parameters())+
+                                  list(self.decoder.parameters())+
+                                  list(self.quantize.parameters())+
+                                  list(self.quant_conv.parameters())+
+                                  list(self.post_quant_conv.parameters()),
+                                  lr=lr_g, betas=(0.5, 0.9))
+        opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
+                                    lr=lr_d, betas=(0.5, 0.9))
+        if self.scheduler_config is not None:
+            scheduler = instantiate_from_config(self.scheduler_config)
+            print("Setting up LambdaLR scheduler...")
+            scheduler = [
+                {
+                    'scheduler': LambdaLR(opt_ae, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                },
+                {
+                    'scheduler': LambdaLR(opt_disc, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                },
+            ]
+            return [opt_ae, opt_disc], scheduler
+        return [opt_ae, opt_disc], []
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+    def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs):
+        log = dict()
+        x = self.get_input(batch, self.image_key)
+        x = x.to(self.device)
+        if only_inputs:
+            log["inputs"] = x
+            return log
+        xrec, _ = self(x)
+        if x.shape[1] > 3:
+            # colorize with random projection
+            assert xrec.shape[1] > 3
+            x = self.to_rgb(x)
+            xrec = self.to_rgb(xrec)
+        log["inputs"] = x
+        log["reconstructions"] = xrec
+        if plot_ema:
+            with self.ema_scope():
+                xrec_ema, _ = self(x)
+                if x.shape[1] > 3: xrec_ema = self.to_rgb(xrec_ema)
+                log["reconstructions_ema"] = xrec_ema
+        return log
+    def to_rgb(self, x):
+        assert self.image_key == "segmentation"
+        if not hasattr(self, "colorize"):
+            self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+        x = F.conv2d(x, weight=self.colorize)
+        x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
+        return x
+class VQModelInterface(VQModel):
+    def __init__(self, embed_dim, *args, **kwargs):
+        super().__init__(embed_dim=embed_dim, *args, **kwargs)
+        self.embed_dim = embed_dim
+    def encode(self, x):
+        h = self.encoder(x)
+        h = self.quant_conv(h)
+        return h
+    def decode(self, h, force_not_quantize=False):
+        # also go through quantization layer
+        if not force_not_quantize:
+            quant, emb_loss, info = self.quantize(h)
+        else:
+            quant = h
+        quant = self.post_quant_conv(quant)
+        dec = self.decoder(quant)
+        return dec

watermarker/LaWa/ldm/models/diffusion/__init__.py ADDED Viewed

File without changes

watermarker/LaWa/ldm/models/diffusion/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (170 Bytes). View file

watermarker/LaWa/ldm/models/diffusion/__pycache__/ddim.cpython-38.pyc ADDED Viewed

Binary file (9.38 kB). View file

watermarker/LaWa/ldm/models/diffusion/__pycache__/ddpm.cpython-38.pyc ADDED Viewed

Binary file (53 kB). View file

watermarker/LaWa/ldm/models/diffusion/__pycache__/plms.cpython-38.pyc ADDED Viewed

Binary file (7.53 kB). View file

watermarker/LaWa/ldm/models/diffusion/__pycache__/sampling_util.cpython-38.pyc ADDED Viewed

Binary file (1.07 kB). View file

watermarker/LaWa/ldm/models/diffusion/ddim.py ADDED Viewed

	@@ -0,0 +1,339 @@

+"""SAMPLING ONLY."""
+import torch
+import numpy as np
+from tqdm import tqdm
+from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like, extract_into_tensor
+class DDIMSampler(object):
+    def __init__(self, model, schedule="linear", **kwargs):
+        super().__init__()
+        self.model = model
+        self.ddpm_num_timesteps = model.num_timesteps
+        self.schedule = schedule
+    def register_buffer(self, name, attr):
+        if type(attr) == torch.Tensor:
+            if attr.device != torch.device("cuda"):
+                attr = attr.to(torch.device("cuda"))
+        setattr(self, name, attr)
+    def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
+        self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps,
+                                                  num_ddpm_timesteps=self.ddpm_num_timesteps,verbose=verbose)
+        alphas_cumprod = self.model.alphas_cumprod
+        assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep'
+        to_torch = lambda x: x.clone().detach().to(torch.float32).to(self.model.device)
+        self.register_buffer('betas', to_torch(self.model.betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev))
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu())))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1)))
+        # ddim sampling parameters
+        ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(),
+                                                                                   ddim_timesteps=self.ddim_timesteps,
+                                                                                   eta=ddim_eta,verbose=verbose)
+        self.register_buffer('ddim_sigmas', ddim_sigmas)
+        self.register_buffer('ddim_alphas', ddim_alphas)
+        self.register_buffer('ddim_alphas_prev', ddim_alphas_prev)
+        self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas))
+        sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt(
+            (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * (
+                        1 - self.alphas_cumprod / self.alphas_cumprod_prev))
+        self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps)
+    @torch.no_grad()
+    def sample(self,
+               S,
+               batch_size,
+               shape,
+               conditioning=None,
+               callback=None,
+               normals_sequence=None,
+               img_callback=None,
+               quantize_x0=False,
+               eta=0.,
+               mask=None,
+               x0=None,
+               temperature=1.,
+               noise_dropout=0.,
+               score_corrector=None,
+               corrector_kwargs=None,
+               verbose=True,
+               x_T=None,
+               log_every_t=100,
+               unconditional_guidance_scale=1.,
+               unconditional_conditioning=None, # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+               dynamic_threshold=None,
+               ucg_schedule=None,
+               **kwargs
+               ):
+        if conditioning is not None:
+            if isinstance(conditioning, dict):
+                ctmp = conditioning[list(conditioning.keys())[0]]
+                while isinstance(ctmp, list): ctmp = ctmp[0]
+                cbs = ctmp.shape[0]
+                if cbs != batch_size:
+                    print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+            elif isinstance(conditioning, list):
+                for ctmp in conditioning:
+                    if ctmp.shape[0] != batch_size:
+                        print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+            else:
+                if conditioning.shape[0] != batch_size:
+                    print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+        self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
+        # sampling
+        C, H, W = shape
+        size = (batch_size, C, H, W)
+        if verbose:
+            print(f'Data shape for DDIM sampling is {size}, eta {eta}')
+        samples, intermediates = self.ddim_sampling(conditioning, size,
+                                                    callback=callback,
+                                                    img_callback=img_callback,
+                                                    quantize_denoised=quantize_x0,
+                                                    mask=mask, x0=x0,
+                                                    ddim_use_original_steps=False,
+                                                    noise_dropout=noise_dropout,
+                                                    temperature=temperature,
+                                                    score_corrector=score_corrector,
+                                                    corrector_kwargs=corrector_kwargs,
+                                                    x_T=x_T,
+                                                    log_every_t=log_every_t,
+                                                    unconditional_guidance_scale=unconditional_guidance_scale,
+                                                    unconditional_conditioning=unconditional_conditioning,
+                                                    dynamic_threshold=dynamic_threshold,
+                                                    ucg_schedule=ucg_schedule, verbose=verbose
+                                                    )
+        return samples, intermediates
+    @torch.no_grad()
+    def ddim_sampling(self, cond, shape,
+                      x_T=None, ddim_use_original_steps=False,
+                      callback=None, timesteps=None, quantize_denoised=False,
+                      mask=None, x0=None, img_callback=None, log_every_t=100,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None, dynamic_threshold=None,
+                      ucg_schedule=None, verbose=True, **kwargs):
+        device = self.model.betas.device
+        b = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=device)
+        else:
+            img = x_T
+        if timesteps is None:
+            timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
+        elif timesteps is not None and not ddim_use_original_steps:
+            subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
+            timesteps = self.ddim_timesteps[:subset_end]
+        intermediates = {'x_inter': [img], 'pred_x0': [img]}
+        time_range = reversed(range(0,timesteps)) if ddim_use_original_steps else np.flip(timesteps)
+        total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
+        if verbose:
+            print(f"Running DDIM Sampling with {total_steps} timesteps")
+            iterator = tqdm(time_range, desc='DDIM Sampler', total=total_steps, miniters=total_steps//5, mininterval=300)
+        else:
+            iterator = time_range
+        for i, step in enumerate(iterator):
+            index = total_steps - i - 1
+            ts = torch.full((b,), step, device=device, dtype=torch.long)
+            if mask is not None:
+                assert x0 is not None
+                img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
+                img = img_orig * mask + (1. - mask) * img
+            if ucg_schedule is not None:
+                assert len(ucg_schedule) == len(time_range)
+                unconditional_guidance_scale = ucg_schedule[i]
+            outs = self.p_sample_ddim(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
+                                      quantize_denoised=quantize_denoised, temperature=temperature,
+                                      noise_dropout=noise_dropout, score_corrector=score_corrector,
+                                      corrector_kwargs=corrector_kwargs,
+                                      unconditional_guidance_scale=unconditional_guidance_scale,
+                                      unconditional_conditioning=unconditional_conditioning,
+                                      dynamic_threshold=dynamic_threshold)
+            img, pred_x0 = outs
+            if callback: callback(i)
+            if img_callback: img_callback(pred_x0, i)
+            if index % log_every_t == 0 or index == total_steps - 1:
+                intermediates['x_inter'].append(img)
+                intermediates['pred_x0'].append(pred_x0)
+        return img, intermediates
+    @torch.no_grad()
+    def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None,
+                      dynamic_threshold=None):
+        b, *_, device = *x.shape, x.device
+        if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
+            model_output = self.model.apply_model(x, t, c)
+        else:
+            x_in = torch.cat([x] * 2)
+            t_in = torch.cat([t] * 2)
+            if isinstance(c, dict):
+                assert isinstance(unconditional_conditioning, dict)
+                c_in = dict()
+                for k in c:
+                    if isinstance(c[k], list):
+                        c_in[k] = [torch.cat([
+                            unconditional_conditioning[k][i],
+                            c[k][i]]) for i in range(len(c[k]))]
+                    else:
+                        c_in[k] = torch.cat([
+                                unconditional_conditioning[k],
+                                c[k]])
+            elif isinstance(c, list):
+                c_in = list()
+                assert isinstance(unconditional_conditioning, list)
+                for i in range(len(c)):
+                    c_in.append(torch.cat([unconditional_conditioning[i], c[i]]))
+            else:
+                c_in = torch.cat([unconditional_conditioning, c])
+            model_uncond, model_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
+            model_output = model_uncond + unconditional_guidance_scale * (model_t - model_uncond)
+        if self.model.parameterization == "v":
+            e_t = self.model.predict_eps_from_z_and_v(x, t, model_output)
+        else:
+            e_t = model_output
+        if score_corrector is not None:
+            assert self.model.parameterization == "eps", 'not implemented'
+            e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
+        alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
+        alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
+        sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
+        sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
+        # select parameters corresponding to the currently considered timestep
+        a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
+        a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
+        sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
+        sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
+        # current prediction for x_0
+        if self.model.parameterization != "v":
+            pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+        else:
+            pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output)
+        if quantize_denoised:
+            pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+        if dynamic_threshold is not None:
+            raise NotImplementedError()
+        # direction pointing to x_t
+        dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
+        noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
+        if noise_dropout > 0.:
+            noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+        x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
+        return x_prev, pred_x0
+    @torch.no_grad()
+    def encode(self, x0, c, t_enc, use_original_steps=False, return_intermediates=None,
+               unconditional_guidance_scale=1.0, unconditional_conditioning=None, callback=None):
+        num_reference_steps = self.ddpm_num_timesteps if use_original_steps else self.ddim_timesteps.shape[0]
+        assert t_enc <= num_reference_steps
+        num_steps = t_enc
+        if use_original_steps:
+            alphas_next = self.alphas_cumprod[:num_steps]
+            alphas = self.alphas_cumprod_prev[:num_steps]
+        else:
+            alphas_next = self.ddim_alphas[:num_steps]
+            alphas = torch.tensor(self.ddim_alphas_prev[:num_steps])
+        x_next = x0
+        intermediates = []
+        inter_steps = []
+        for i in tqdm(range(num_steps), desc='Encoding Image'):
+            t = torch.full((x0.shape[0],), i, device=self.model.device, dtype=torch.long)
+            if unconditional_guidance_scale == 1.:
+                noise_pred = self.model.apply_model(x_next, t, c)
+            else:
+                assert unconditional_conditioning is not None
+                e_t_uncond, noise_pred = torch.chunk(
+                    self.model.apply_model(torch.cat((x_next, x_next)), torch.cat((t, t)),
+                                           torch.cat((unconditional_conditioning, c))), 2)
+                noise_pred = e_t_uncond + unconditional_guidance_scale * (noise_pred - e_t_uncond)
+            xt_weighted = (alphas_next[i] / alphas[i]).sqrt() * x_next
+            weighted_noise_pred = alphas_next[i].sqrt() * (
+                    (1 / alphas_next[i] - 1).sqrt() - (1 / alphas[i] - 1).sqrt()) * noise_pred
+            x_next = xt_weighted + weighted_noise_pred
+            if return_intermediates and i % (
+                    num_steps // return_intermediates) == 0 and i < num_steps - 1:
+                intermediates.append(x_next)
+                inter_steps.append(i)
+            elif return_intermediates and i >= num_steps - 2:
+                intermediates.append(x_next)
+                inter_steps.append(i)
+            if callback: callback(i)
+        out = {'x_encoded': x_next, 'intermediate_steps': inter_steps}
+        if return_intermediates:
+            out.update({'intermediates': intermediates})
+        return x_next, out
+    @torch.no_grad()
+    def stochastic_encode(self, x0, t, use_original_steps=False, noise=None):
+        # fast, but does not allow for exact reconstruction
+        # t serves as an index to gather the correct alphas
+        if use_original_steps:
+            sqrt_alphas_cumprod = self.sqrt_alphas_cumprod
+            sqrt_one_minus_alphas_cumprod = self.sqrt_one_minus_alphas_cumprod
+        else:
+            sqrt_alphas_cumprod = torch.sqrt(self.ddim_alphas)
+            sqrt_one_minus_alphas_cumprod = self.ddim_sqrt_one_minus_alphas
+        if noise is None:
+            noise = torch.randn_like(x0)
+        return (extract_into_tensor(sqrt_alphas_cumprod, t, x0.shape) * x0 +
+                extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x0.shape) * noise)
+    @torch.no_grad()
+    def decode(self, x_latent, cond, t_start, unconditional_guidance_scale=1.0, unconditional_conditioning=None,
+               use_original_steps=False, callback=None):
+        timesteps = np.arange(self.ddpm_num_timesteps) if use_original_steps else self.ddim_timesteps
+        timesteps = timesteps[:t_start]
+        time_range = np.flip(timesteps)
+        total_steps = timesteps.shape[0]
+        print(f"Running DDIM Sampling with {total_steps} timesteps")
+        iterator = tqdm(time_range, desc='Decoding image', total=total_steps)
+        x_dec = x_latent
+        for i, step in enumerate(iterator):
+            index = total_steps - i - 1
+            ts = torch.full((x_latent.shape[0],), step, device=x_latent.device, dtype=torch.long)
+            x_dec, _ = self.p_sample_ddim(x_dec, cond, ts, index=index, use_original_steps=use_original_steps,
+                                          unconditional_guidance_scale=unconditional_guidance_scale,
+                                          unconditional_conditioning=unconditional_conditioning)
+            if callback: callback(i)
+        return x_dec

watermarker/LaWa/ldm/models/diffusion/ddpm.py ADDED Viewed

	@@ -0,0 +1,1798 @@

+"""
+wild mixture of
+https://github.com/lucidrains/denoising-diffusion-pytorch/blob/7706bdfc6f527f58d33f84b7b522e61e6e3164b3/denoising_diffusion_pytorch/denoising_diffusion_pytorch.py
+https://github.com/openai/improved-diffusion/blob/e94489283bb876ac1477d5dd7709bbbd2d9902ce/improved_diffusion/gaussian_diffusion.py
+https://github.com/CompVis/taming-transformers
+-- merci
+"""
+import torch
+import torch.nn as nn
+import numpy as np
+import pytorch_lightning as pl
+from torch.optim.lr_scheduler import LambdaLR
+from einops import rearrange, repeat
+from contextlib import contextmanager, nullcontext
+from functools import partial
+import itertools
+from tqdm import tqdm
+from torchvision.utils import make_grid
+from pytorch_lightning.utilities.distributed import rank_zero_only
+# from pytorch_lightning.utilities.rank_zero import rank_zero_only
+from omegaconf import ListConfig
+from ldm.util import log_txt_as_img, exists, default, ismap, isimage, mean_flat, count_params, instantiate_from_config
+from ldm.modules.ema import LitEma
+from ldm.modules.distributions.distributions import normal_kl, DiagonalGaussianDistribution
+from ldm.models.autoencoder import IdentityFirstStage, AutoencoderKL
+from ldm.modules.diffusionmodules.util import make_beta_schedule, extract_into_tensor, noise_like
+from ldm.models.diffusion.ddim import DDIMSampler
+__conditioning_keys__ = {'concat': 'c_concat',
+                         'crossattn': 'c_crossattn',
+                         'adm': 'y'}
+def disabled_train(self, mode=True):
+    """Overwrite model.train with this function to make sure train/eval mode
+    does not change anymore."""
+    return self
+def uniform_on_device(r1, r2, shape, device):
+    return (r1 - r2) * torch.rand(*shape, device=device) + r2
+class DDPM(pl.LightningModule):
+    # classic DDPM with Gaussian diffusion, in image space
+    def __init__(self,
+                 unet_config,
+                 timesteps=1000,
+                 beta_schedule="linear",
+                 loss_type="l2",
+                 ckpt_path=None,
+                 ignore_keys=[],
+                 load_only_unet=False,
+                 monitor="val/loss",
+                 use_ema=True,
+                 first_stage_key="image",
+                 image_size=256,
+                 channels=3,
+                 log_every_t=100,
+                 clip_denoised=True,
+                 linear_start=1e-4,
+                 linear_end=2e-2,
+                 cosine_s=8e-3,
+                 given_betas=None,
+                 original_elbo_weight=0.,
+                 v_posterior=0.,  # weight for choosing posterior variance as sigma = (1-v) * beta_tilde + v * beta
+                 l_simple_weight=1.,
+                 conditioning_key=None,
+                 parameterization="eps",  # all assuming fixed variance schedules
+                 scheduler_config=None,
+                 use_positional_encodings=False,
+                 learn_logvar=False,
+                 logvar_init=0.,
+                 make_it_fit=False,
+                 ucg_training=None,
+                 reset_ema=False,
+                 reset_num_ema_updates=False,
+                 ):
+        super().__init__()
+        assert parameterization in ["eps", "x0", "v"], 'currently only supporting "eps" and "x0" and "v"'
+        self.parameterization = parameterization
+        print(f"{self.__class__.__name__}: Running in {self.parameterization}-prediction mode")
+        self.cond_stage_model = None
+        self.clip_denoised = clip_denoised
+        self.log_every_t = log_every_t
+        self.first_stage_key = first_stage_key
+        self.image_size = image_size  # try conv?
+        self.channels = channels
+        self.use_positional_encodings = use_positional_encodings
+        self.model = DiffusionWrapper(unet_config, conditioning_key)
+        count_params(self.model, verbose=True)
+        self.use_ema = use_ema
+        if self.use_ema:
+            self.model_ema = LitEma(self.model)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        self.use_scheduler = scheduler_config is not None
+        if self.use_scheduler:
+            self.scheduler_config = scheduler_config
+        self.v_posterior = v_posterior
+        self.original_elbo_weight = original_elbo_weight
+        self.l_simple_weight = l_simple_weight
+        if monitor is not None:
+            self.monitor = monitor
+        self.make_it_fit = make_it_fit
+        if reset_ema: assert exists(ckpt_path)
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys, only_model=load_only_unet)
+            if reset_ema:
+                assert self.use_ema
+                print(f"Resetting ema to pure model weights. This is useful when restoring from an ema-only checkpoint.")
+                self.model_ema = LitEma(self.model)
+        if reset_num_ema_updates:
+            print(" +++++++++++ WARNING: RESETTING NUM_EMA UPDATES TO ZERO +++++++++++ ")
+            assert self.use_ema
+            self.model_ema.reset_num_updates()
+        self.register_schedule(given_betas=given_betas, beta_schedule=beta_schedule, timesteps=timesteps,
+                               linear_start=linear_start, linear_end=linear_end, cosine_s=cosine_s)
+        self.loss_type = loss_type
+        self.learn_logvar = learn_logvar
+        logvar = torch.full(fill_value=logvar_init, size=(self.num_timesteps,))
+        if self.learn_logvar:
+            self.logvar = nn.Parameter(self.logvar, requires_grad=True)
+        else:
+            self.register_buffer('logvar', logvar)
+        self.ucg_training = ucg_training or dict()
+        if self.ucg_training:
+            self.ucg_prng = np.random.RandomState()
+    def register_schedule(self, given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+        if exists(given_betas):
+            betas = given_betas
+        else:
+            betas = make_beta_schedule(beta_schedule, timesteps, linear_start=linear_start, linear_end=linear_end,
+                                       cosine_s=cosine_s)
+        alphas = 1. - betas
+        alphas_cumprod = np.cumprod(alphas, axis=0)
+        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
+        timesteps, = betas.shape
+        self.num_timesteps = int(timesteps)
+        self.linear_start = linear_start
+        self.linear_end = linear_end
+        assert alphas_cumprod.shape[0] == self.num_timesteps, 'alphas have to be defined for each timestep'
+        to_torch = partial(torch.tensor, dtype=torch.float32)
+        self.register_buffer('betas', to_torch(betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
+        # calculations for posterior q(x_{t-1} | x_t, x_0)
+        posterior_variance = (1 - self.v_posterior) * betas * (1. - alphas_cumprod_prev) / (
+                1. - alphas_cumprod) + self.v_posterior * betas
+        # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
+        self.register_buffer('posterior_variance', to_torch(posterior_variance))
+        # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
+        self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20))))
+        self.register_buffer('posterior_mean_coef1', to_torch(
+            betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
+        self.register_buffer('posterior_mean_coef2', to_torch(
+            (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))
+        if self.parameterization == "eps":
+            lvlb_weights = self.betas ** 2 / (
+                    2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod))
+        elif self.parameterization == "x0":
+            lvlb_weights = 0.5 * np.sqrt(torch.Tensor(alphas_cumprod)) / (2. * 1 - torch.Tensor(alphas_cumprod))
+        elif self.parameterization == "v":
+            lvlb_weights = torch.ones_like(self.betas ** 2 / (
+                    2 * self.posterior_variance * to_torch(alphas) * (1 - self.alphas_cumprod)))
+        else:
+            raise NotImplementedError("mu not supported")
+        lvlb_weights[0] = lvlb_weights[1]
+        self.register_buffer('lvlb_weights', lvlb_weights, persistent=False)
+        assert not torch.isnan(self.lvlb_weights).all()
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.model.parameters())
+            self.model_ema.copy_to(self.model)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.model.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    @torch.no_grad()
+    def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
+        sd = torch.load(path, map_location="cpu")
+        if "state_dict" in list(sd.keys()):
+            sd = sd["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        if self.make_it_fit:
+            n_params = len([name for name, _ in
+                            itertools.chain(self.named_parameters(),
+                                            self.named_buffers())])
+            for name, param in tqdm(
+                    itertools.chain(self.named_parameters(),
+                                    self.named_buffers()),
+                    desc="Fitting old weights to new weights",
+                    total=n_params
+            ):
+                if not name in sd:
+                    continue
+                old_shape = sd[name].shape
+                new_shape = param.shape
+                assert len(old_shape) == len(new_shape)
+                if len(new_shape) > 2:
+                    # we only modify first two axes
+                    assert new_shape[2:] == old_shape[2:]
+                # assumes first axis corresponds to output dim
+                if not new_shape == old_shape:
+                    new_param = param.clone()
+                    old_param = sd[name]
+                    if len(new_shape) == 1:
+                        for i in range(new_param.shape[0]):
+                            new_param[i] = old_param[i % old_shape[0]]
+                    elif len(new_shape) >= 2:
+                        for i in range(new_param.shape[0]):
+                            for j in range(new_param.shape[1]):
+                                new_param[i, j] = old_param[i % old_shape[0], j % old_shape[1]]
+                        n_used_old = torch.ones(old_shape[1])
+                        for j in range(new_param.shape[1]):
+                            n_used_old[j % old_shape[1]] += 1
+                        n_used_new = torch.zeros(new_shape[1])
+                        for j in range(new_param.shape[1]):
+                            n_used_new[j] = n_used_old[j % old_shape[1]]
+                        n_used_new = n_used_new[None, :]
+                        while len(n_used_new.shape) < len(new_shape):
+                            n_used_new = n_used_new.unsqueeze(-1)
+                        new_param /= n_used_new
+                    sd[name] = new_param
+        missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(
+            sd, strict=False)
+        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+        if len(missing) > 0:
+            print(f"Missing Keys:\n {missing}")
+        if len(unexpected) > 0:
+            print(f"\nUnexpected Keys:\n {unexpected}")
+    def q_mean_variance(self, x_start, t):
+        """
+        Get the distribution q(x_t | x_0).
+        :param x_start: the [N x C x ...] tensor of noiseless inputs.
+        :param t: the number of diffusion steps (minus 1). Here, 0 means one step.
+        :return: A tuple (mean, variance, log_variance), all of x_start's shape.
+        """
+        mean = (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start)
+        variance = extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape)
+        log_variance = extract_into_tensor(self.log_one_minus_alphas_cumprod, t, x_start.shape)
+        return mean, variance, log_variance
+    def predict_start_from_noise(self, x_t, t, noise):
+        return (
+                extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
+                extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
+        )
+    def predict_start_from_z_and_v(self, x_t, t, v):
+        # self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
+        # self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
+        return (
+                extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t -
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v
+        )
+    def predict_eps_from_z_and_v(self, x_t, t, v):
+        return (
+                extract_into_tensor(self.sqrt_alphas_cumprod, t, x_t.shape) * v +
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * x_t
+        )
+    def q_posterior(self, x_start, x_t, t):
+        posterior_mean = (
+                extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start +
+                extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
+        )
+        posterior_variance = extract_into_tensor(self.posterior_variance, t, x_t.shape)
+        posterior_log_variance_clipped = extract_into_tensor(self.posterior_log_variance_clipped, t, x_t.shape)
+        return posterior_mean, posterior_variance, posterior_log_variance_clipped
+    def p_mean_variance(self, x, t, clip_denoised: bool):
+        model_out = self.model(x, t)
+        if self.parameterization == "eps":
+            x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
+        elif self.parameterization == "x0":
+            x_recon = model_out
+        if clip_denoised:
+            x_recon.clamp_(-1., 1.)
+        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
+        return model_mean, posterior_variance, posterior_log_variance
+    @torch.no_grad()
+    def p_sample(self, x, t, clip_denoised=True, repeat_noise=False):
+        b, *_, device = *x.shape, x.device
+        model_mean, _, model_log_variance = self.p_mean_variance(x=x, t=t, clip_denoised=clip_denoised)
+        noise = noise_like(x.shape, device, repeat_noise)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
+    @torch.no_grad()
+    def p_sample_loop(self, shape, return_intermediates=False):
+        device = self.betas.device
+        b = shape[0]
+        img = torch.randn(shape, device=device)
+        intermediates = [img]
+        for i in tqdm(reversed(range(0, self.num_timesteps)), desc='Sampling t', total=self.num_timesteps):
+            img = self.p_sample(img, torch.full((b,), i, device=device, dtype=torch.long),
+                                clip_denoised=self.clip_denoised)
+            if i % self.log_every_t == 0 or i == self.num_timesteps - 1:
+                intermediates.append(img)
+        if return_intermediates:
+            return img, intermediates
+        return img
+    @torch.no_grad()
+    def sample(self, batch_size=16, return_intermediates=False):
+        image_size = self.image_size
+        channels = self.channels
+        return self.p_sample_loop((batch_size, channels, image_size, image_size),
+                                  return_intermediates=return_intermediates)
+    def q_sample(self, x_start, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        return (extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise)
+    def get_v(self, x, noise, t):
+        return (
+                extract_into_tensor(self.sqrt_alphas_cumprod, t, x.shape) * noise -
+                extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x.shape) * x
+        )
+    def get_loss(self, pred, target, mean=True):
+        if self.loss_type == 'l1':
+            loss = (target - pred).abs()
+            if mean:
+                loss = loss.mean()
+        elif self.loss_type == 'l2':
+            if mean:
+                loss = torch.nn.functional.mse_loss(target, pred)
+            else:
+                loss = torch.nn.functional.mse_loss(target, pred, reduction='none')
+        else:
+            raise NotImplementedError("unknown loss type '{loss_type}'")
+        return loss
+    def p_losses(self, x_start, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+        model_out = self.model(x_noisy, t)
+        loss_dict = {}
+        if self.parameterization == "eps":
+            target = noise
+        elif self.parameterization == "x0":
+            target = x_start
+        elif self.parameterization == "v":
+            target = self.get_v(x_start, noise, t)
+        else:
+            raise NotImplementedError(f"Parameterization {self.parameterization} not yet supported")
+        loss = self.get_loss(model_out, target, mean=False).mean(dim=[1, 2, 3])
+        log_prefix = 'train' if self.training else 'val'
+        loss_dict.update({f'{log_prefix}/loss_simple': loss.mean()})
+        loss_simple = loss.mean() * self.l_simple_weight
+        loss_vlb = (self.lvlb_weights[t] * loss).mean()
+        loss_dict.update({f'{log_prefix}/loss_vlb': loss_vlb})
+        loss = loss_simple + self.original_elbo_weight * loss_vlb
+        loss_dict.update({f'{log_prefix}/loss': loss})
+        return loss, loss_dict
+    def forward(self, x, *args, **kwargs):
+        # b, c, h, w, device, img_size, = *x.shape, x.device, self.image_size
+        # assert h == img_size and w == img_size, f'height and width of image must be {img_size}'
+        t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
+        return self.p_losses(x, t, *args, **kwargs)
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = rearrange(x, 'b h w c -> b c h w')
+        x = x.to(memory_format=torch.contiguous_format).float()
+        return x
+    def shared_step(self, batch):
+        x = self.get_input(batch, self.first_stage_key)
+        loss, loss_dict = self(x)
+        return loss, loss_dict
+    def training_step(self, batch, batch_idx):
+        for k in self.ucg_training:
+            p = self.ucg_training[k]["p"]
+            val = self.ucg_training[k]["val"]
+            if val is None:
+                val = ""
+            for i in range(len(batch[k])):
+                if self.ucg_prng.choice(2, p=[1 - p, p]):
+                    batch[k][i] = val
+        loss, loss_dict = self.shared_step(batch)
+        self.log_dict(loss_dict, prog_bar=True,
+                      logger=True, on_step=True, on_epoch=True)
+        self.log("global_step", self.global_step,
+                 prog_bar=True, logger=True, on_step=True, on_epoch=False)
+        if self.use_scheduler:
+            lr = self.optimizers().param_groups[0]['lr']
+            self.log('lr_abs', lr, prog_bar=True, logger=True, on_step=True, on_epoch=False)
+        return loss
+    @torch.no_grad()
+    def validation_step(self, batch, batch_idx):
+        _, loss_dict_no_ema = self.shared_step(batch)
+        with self.ema_scope():
+            _, loss_dict_ema = self.shared_step(batch)
+            loss_dict_ema = {key + '_ema': loss_dict_ema[key] for key in loss_dict_ema}
+        self.log_dict(loss_dict_no_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
+        self.log_dict(loss_dict_ema, prog_bar=False, logger=True, on_step=False, on_epoch=True)
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self.model)
+    def _get_rows_from_list(self, samples):
+        n_imgs_per_row = len(samples)
+        denoise_grid = rearrange(samples, 'n b c h w -> b n c h w')
+        denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
+        denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
+        return denoise_grid
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=None, **kwargs):
+        log = dict()
+        x = self.get_input(batch, self.first_stage_key)
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        x = x.to(self.device)[:N]
+        log["inputs"] = x
+        # get diffusion row
+        diffusion_row = list()
+        x_start = x[:n_row]
+        for t in range(self.num_timesteps):
+            if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                t = t.to(self.device).long()
+                noise = torch.randn_like(x_start)
+                x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+                diffusion_row.append(x_noisy)
+        log["diffusion_row"] = self._get_rows_from_list(diffusion_row)
+        if sample:
+            # get denoise row
+            with self.ema_scope("Plotting"):
+                samples, denoise_row = self.sample(batch_size=N, return_intermediates=True)
+            log["samples"] = samples
+            log["denoise_row"] = self._get_rows_from_list(denoise_row)
+        if return_keys:
+            if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
+                return log
+            else:
+                return {key: log[key] for key in return_keys}
+        return log
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        params = list(self.model.parameters())
+        if self.learn_logvar:
+            params = params + [self.logvar]
+        opt = torch.optim.AdamW(params, lr=lr)
+        return opt
+class LatentDiffusion(DDPM):
+    """main class"""
+    def __init__(self,
+                 first_stage_config,
+                 cond_stage_config,
+                 num_timesteps_cond=None,
+                 cond_stage_key="image",
+                 cond_stage_trainable=False,
+                 concat_mode=True,
+                 cond_stage_forward=None,
+                 conditioning_key=None,
+                 scale_factor=1.0,
+                 scale_by_std=False,
+                 force_null_conditioning=False,
+                 *args, **kwargs):
+        self.force_null_conditioning = force_null_conditioning
+        self.num_timesteps_cond = default(num_timesteps_cond, 1)
+        self.scale_by_std = scale_by_std
+        assert self.num_timesteps_cond <= kwargs['timesteps']
+        # for backwards compatibility after implementation of DiffusionWrapper
+        if conditioning_key is None:
+            conditioning_key = 'concat' if concat_mode else 'crossattn'
+        if cond_stage_config == '__is_unconditional__' and not self.force_null_conditioning:
+            conditioning_key = None
+        ckpt_path = kwargs.pop("ckpt_path", None)
+        reset_ema = kwargs.pop("reset_ema", False)
+        reset_num_ema_updates = kwargs.pop("reset_num_ema_updates", False)
+        ignore_keys = kwargs.pop("ignore_keys", [])
+        super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
+        self.concat_mode = concat_mode
+        self.cond_stage_trainable = cond_stage_trainable
+        self.cond_stage_key = cond_stage_key
+        try:
+            self.num_downs = len(first_stage_config.params.ddconfig.ch_mult) - 1
+        except:
+            self.num_downs = 0
+        if not scale_by_std:
+            self.scale_factor = scale_factor
+        else:
+            self.register_buffer('scale_factor', torch.tensor(scale_factor))
+        self.instantiate_first_stage(first_stage_config)
+        self.instantiate_cond_stage(cond_stage_config)
+        self.cond_stage_forward = cond_stage_forward
+        self.clip_denoised = False
+        self.bbox_tokenizer = None
+        self.restarted_from_ckpt = False
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys)
+            self.restarted_from_ckpt = True
+            if reset_ema:
+                assert self.use_ema
+                print(
+                    f"Resetting ema to pure model weights. This is useful when restoring from an ema-only checkpoint.")
+                self.model_ema = LitEma(self.model)
+        if reset_num_ema_updates:
+            print(" +++++++++++ WARNING: RESETTING NUM_EMA UPDATES TO ZERO +++++++++++ ")
+            assert self.use_ema
+            self.model_ema.reset_num_updates()
+    def make_cond_schedule(self, ):
+        self.cond_ids = torch.full(size=(self.num_timesteps,), fill_value=self.num_timesteps - 1, dtype=torch.long)
+        ids = torch.round(torch.linspace(0, self.num_timesteps - 1, self.num_timesteps_cond)).long()
+        self.cond_ids[:self.num_timesteps_cond] = ids
+    @rank_zero_only
+    @torch.no_grad()
+    def on_train_batch_start(self, batch, batch_idx, dataloader_idx):
+        # only for very first batch
+        if self.scale_by_std and self.current_epoch == 0 and self.global_step == 0 and batch_idx == 0 and not self.restarted_from_ckpt:
+            assert self.scale_factor == 1., 'rather not use custom rescaling and std-rescaling simultaneously'
+            # set rescale weight to 1./std of encodings
+            print("### USING STD-RESCALING ###")
+            x = super().get_input(batch, self.first_stage_key)
+            x = x.to(self.device)
+            encoder_posterior = self.encode_first_stage(x)
+            z = self.get_first_stage_encoding(encoder_posterior).detach()
+            del self.scale_factor
+            self.register_buffer('scale_factor', 1. / z.flatten().std())
+            print(f"setting self.scale_factor to {self.scale_factor}")
+            print("### USING STD-RESCALING ###")
+    def register_schedule(self,
+                          given_betas=None, beta_schedule="linear", timesteps=1000,
+                          linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3):
+        super().register_schedule(given_betas, beta_schedule, timesteps, linear_start, linear_end, cosine_s)
+        self.shorten_cond_schedule = self.num_timesteps_cond > 1
+        if self.shorten_cond_schedule:
+            self.make_cond_schedule()
+    def instantiate_first_stage(self, config):
+        model = instantiate_from_config(config)
+        self.first_stage_model = model.eval()
+        self.first_stage_model.train = disabled_train
+        for param in self.first_stage_model.parameters():
+            param.requires_grad = False
+    def instantiate_cond_stage(self, config):
+        if not self.cond_stage_trainable:
+            if config == "__is_first_stage__":
+                print("Using first stage also as cond stage.")
+                self.cond_stage_model = self.first_stage_model
+            elif config == "__is_unconditional__":
+                print(f"Training {self.__class__.__name__} as an unconditional model.")
+                self.cond_stage_model = None
+                # self.be_unconditional = True
+            else:
+                model = instantiate_from_config(config)
+                self.cond_stage_model = model.eval()
+                self.cond_stage_model.train = disabled_train
+                for param in self.cond_stage_model.parameters():
+                    param.requires_grad = False
+        else:
+            assert config != '__is_first_stage__'
+            assert config != '__is_unconditional__'
+            model = instantiate_from_config(config)
+            self.cond_stage_model = model
+    def _get_denoise_row_from_list(self, samples, desc='', force_no_decoder_quantization=False):
+        denoise_row = []
+        for zd in tqdm(samples, desc=desc):
+            denoise_row.append(self.decode_first_stage(zd.to(self.device),
+                                                       force_not_quantize=force_no_decoder_quantization))
+        n_imgs_per_row = len(denoise_row)
+        denoise_row = torch.stack(denoise_row)  # n_log_step, n_row, C, H, W
+        denoise_grid = rearrange(denoise_row, 'n b c h w -> b n c h w')
+        denoise_grid = rearrange(denoise_grid, 'b n c h w -> (b n) c h w')
+        denoise_grid = make_grid(denoise_grid, nrow=n_imgs_per_row)
+        return denoise_grid
+    def get_first_stage_encoding(self, encoder_posterior):
+        if isinstance(encoder_posterior, DiagonalGaussianDistribution):
+            z = encoder_posterior.sample()
+        elif isinstance(encoder_posterior, torch.Tensor):
+            z = encoder_posterior
+        else:
+            raise NotImplementedError(f"encoder_posterior of type '{type(encoder_posterior)}' not yet implemented")
+        return self.scale_factor * z
+    def get_learned_conditioning(self, c):
+        if self.cond_stage_forward is None:
+            if hasattr(self.cond_stage_model, 'encode') and callable(self.cond_stage_model.encode):
+                c = self.cond_stage_model.encode(c)
+                if isinstance(c, DiagonalGaussianDistribution):
+                    c = c.mode()
+            else:
+                c = self.cond_stage_model(c)
+        else:
+            assert hasattr(self.cond_stage_model, self.cond_stage_forward)
+            c = getattr(self.cond_stage_model, self.cond_stage_forward)(c)
+        return c
+    def meshgrid(self, h, w):
+        y = torch.arange(0, h).view(h, 1, 1).repeat(1, w, 1)
+        x = torch.arange(0, w).view(1, w, 1).repeat(h, 1, 1)
+        arr = torch.cat([y, x], dim=-1)
+        return arr
+    def delta_border(self, h, w):
+        """
+        :param h: height
+        :param w: width
+        :return: normalized distance to image border,
+         wtith min distance = 0 at border and max dist = 0.5 at image center
+        """
+        lower_right_corner = torch.tensor([h - 1, w - 1]).view(1, 1, 2)
+        arr = self.meshgrid(h, w) / lower_right_corner
+        dist_left_up = torch.min(arr, dim=-1, keepdims=True)[0]
+        dist_right_down = torch.min(1 - arr, dim=-1, keepdims=True)[0]
+        edge_dist = torch.min(torch.cat([dist_left_up, dist_right_down], dim=-1), dim=-1)[0]
+        return edge_dist
+    def get_weighting(self, h, w, Ly, Lx, device):
+        weighting = self.delta_border(h, w)
+        weighting = torch.clip(weighting, self.split_input_params["clip_min_weight"],
+                               self.split_input_params["clip_max_weight"], )
+        weighting = weighting.view(1, h * w, 1).repeat(1, 1, Ly * Lx).to(device)
+        if self.split_input_params["tie_braker"]:
+            L_weighting = self.delta_border(Ly, Lx)
+            L_weighting = torch.clip(L_weighting,
+                                     self.split_input_params["clip_min_tie_weight"],
+                                     self.split_input_params["clip_max_tie_weight"])
+            L_weighting = L_weighting.view(1, 1, Ly * Lx).to(device)
+            weighting = weighting * L_weighting
+        return weighting
+    def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1):  # todo load once not every time, shorten code
+        """
+        :param x: img of size (bs, c, h, w)
+        :return: n img crops of size (n, bs, c, kernel_size[0], kernel_size[1])
+        """
+        bs, nc, h, w = x.shape
+        # number of crops in image
+        Ly = (h - kernel_size[0]) // stride[0] + 1
+        Lx = (w - kernel_size[1]) // stride[1] + 1
+        if uf == 1 and df == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold = torch.nn.Fold(output_size=x.shape[2:], **fold_params)
+            weighting = self.get_weighting(kernel_size[0], kernel_size[1], Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h, w)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0], kernel_size[1], Ly * Lx))
+        elif uf > 1 and df == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold_params2 = dict(kernel_size=(kernel_size[0] * uf, kernel_size[0] * uf),
+                                dilation=1, padding=0,
+                                stride=(stride[0] * uf, stride[1] * uf))
+            fold = torch.nn.Fold(output_size=(x.shape[2] * uf, x.shape[3] * uf), **fold_params2)
+            weighting = self.get_weighting(kernel_size[0] * uf, kernel_size[1] * uf, Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h * uf, w * uf)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0] * uf, kernel_size[1] * uf, Ly * Lx))
+        elif df > 1 and uf == 1:
+            fold_params = dict(kernel_size=kernel_size, dilation=1, padding=0, stride=stride)
+            unfold = torch.nn.Unfold(**fold_params)
+            fold_params2 = dict(kernel_size=(kernel_size[0] // df, kernel_size[0] // df),
+                                dilation=1, padding=0,
+                                stride=(stride[0] // df, stride[1] // df))
+            fold = torch.nn.Fold(output_size=(x.shape[2] // df, x.shape[3] // df), **fold_params2)
+            weighting = self.get_weighting(kernel_size[0] // df, kernel_size[1] // df, Ly, Lx, x.device).to(x.dtype)
+            normalization = fold(weighting).view(1, 1, h // df, w // df)  # normalizes the overlap
+            weighting = weighting.view((1, 1, kernel_size[0] // df, kernel_size[1] // df, Ly * Lx))
+        else:
+            raise NotImplementedError
+        return fold, unfold, normalization, weighting
+    @torch.no_grad()
+    def get_input(self, batch, k, return_first_stage_outputs=False, force_c_encode=False,
+                  cond_key=None, return_original_cond=False, bs=None, return_x=False):
+        x = super().get_input(batch, k)
+        if bs is not None:
+            x = x[:bs]
+        x = x.to(self.device)
+        encoder_posterior = self.encode_first_stage(x)
+        z = self.get_first_stage_encoding(encoder_posterior).detach()
+        if self.model.conditioning_key is not None and not self.force_null_conditioning:
+            if cond_key is None:
+                cond_key = self.cond_stage_key
+            if cond_key != self.first_stage_key:
+                if cond_key in ['caption', 'coordinates_bbox', "txt"]:
+                    xc = batch[cond_key]
+                elif cond_key in ['class_label', 'cls']:
+                    xc = batch
+                else:
+                    xc = super().get_input(batch, cond_key).to(self.device)
+            else:
+                xc = x
+            if not self.cond_stage_trainable or force_c_encode:
+                if isinstance(xc, dict) or isinstance(xc, list):
+                    c = self.get_learned_conditioning(xc)
+                else:
+                    c = self.get_learned_conditioning(xc.to(self.device))
+            else:
+                c = xc
+            if bs is not None:
+                c = c[:bs]
+            if self.use_positional_encodings:
+                pos_x, pos_y = self.compute_latent_shifts(batch)
+                ckey = __conditioning_keys__[self.model.conditioning_key]
+                c = {ckey: c, 'pos_x': pos_x, 'pos_y': pos_y}
+        else:
+            c = None
+            xc = None
+            if self.use_positional_encodings:
+                pos_x, pos_y = self.compute_latent_shifts(batch)
+                c = {'pos_x': pos_x, 'pos_y': pos_y}
+        out = [z, c]
+        if return_first_stage_outputs:
+            xrec = self.decode_first_stage(z)
+            out.extend([x, xrec])
+        if return_x:
+            out.extend([x])
+        if return_original_cond:
+            out.append(xc)
+        return out
+    @torch.no_grad()
+    def decode_first_stage(self, z, predict_cids=False, force_not_quantize=False):
+        if predict_cids:
+            if z.dim() == 4:
+                z = torch.argmax(z.exp(), dim=1).long()
+            z = self.first_stage_model.quantize.get_codebook_entry(z, shape=None)
+            z = rearrange(z, 'b h w c -> b c h w').contiguous()
+        z = 1. / self.scale_factor * z
+        return self.first_stage_model.decode(z)
+    @torch.no_grad()
+    def encode_first_stage(self, x):
+        return self.first_stage_model.encode(x)
+    def shared_step(self, batch, **kwargs):
+        x, c = self.get_input(batch, self.first_stage_key)
+        loss = self(x, c)
+        return loss
+    def forward(self, x, c, *args, **kwargs):
+        t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
+        if self.model.conditioning_key is not None:
+            assert c is not None
+            if self.cond_stage_trainable:
+                c = self.get_learned_conditioning(c)
+            if self.shorten_cond_schedule:  # TODO: drop this option
+                tc = self.cond_ids[t].to(self.device)
+                c = self.q_sample(x_start=c, t=tc, noise=torch.randn_like(c.float()))
+        return self.p_losses(x, c, t, *args, **kwargs)
+    def apply_model(self, x_noisy, t, cond, return_ids=False):
+        if isinstance(cond, dict):
+            # hybrid case, cond is expected to be a dict
+            pass
+        else:
+            if not isinstance(cond, list):
+                cond = [cond]
+            key = 'c_concat' if self.model.conditioning_key == 'concat' else 'c_crossattn'
+            cond = {key: cond}
+        x_recon = self.model(x_noisy, t, **cond)
+        if isinstance(x_recon, tuple) and not return_ids:
+            return x_recon[0]
+        else:
+            return x_recon
+    def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
+        return (extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - pred_xstart) / \
+               extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape)
+    def _prior_bpd(self, x_start):
+        """
+        Get the prior KL term for the variational lower-bound, measured in
+        bits-per-dim.
+        This term can't be optimized, as it only depends on the encoder.
+        :param x_start: the [N x C x ...] tensor of inputs.
+        :return: a batch of [N] KL values (in bits), one per batch element.
+        """
+        batch_size = x_start.shape[0]
+        t = torch.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device)
+        qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t)
+        kl_prior = normal_kl(mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0)
+        return mean_flat(kl_prior) / np.log(2.0)
+    def p_losses(self, x_start, cond, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_noisy = self.q_sample(x_start=x_start, t=t, noise=noise)
+        model_output = self.apply_model(x_noisy, t, cond)
+        loss_dict = {}
+        prefix = 'train' if self.training else 'val'
+        if self.parameterization == "x0":
+            target = x_start
+        elif self.parameterization == "eps":
+            target = noise
+        elif self.parameterization == "v":
+            target = self.get_v(x_start, noise, t)
+        else:
+            raise NotImplementedError()
+        loss_simple = self.get_loss(model_output, target, mean=False).mean([1, 2, 3])
+        loss_dict.update({f'{prefix}/loss_simple': loss_simple.mean()})
+        logvar_t = self.logvar[t].to(self.device)
+        loss = loss_simple / torch.exp(logvar_t) + logvar_t
+        # loss = loss_simple / torch.exp(self.logvar) + self.logvar
+        if self.learn_logvar:
+            loss_dict.update({f'{prefix}/loss_gamma': loss.mean()})
+            loss_dict.update({'logvar': self.logvar.data.mean()})
+        loss = self.l_simple_weight * loss.mean()
+        loss_vlb = self.get_loss(model_output, target, mean=False).mean(dim=(1, 2, 3))
+        loss_vlb = (self.lvlb_weights[t] * loss_vlb).mean()
+        loss_dict.update({f'{prefix}/loss_vlb': loss_vlb})
+        loss += (self.original_elbo_weight * loss_vlb)
+        loss_dict.update({f'{prefix}/loss': loss})
+        return loss, loss_dict
+    def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codebook_ids=False, quantize_denoised=False,
+                        return_x0=False, score_corrector=None, corrector_kwargs=None):
+        t_in = t
+        model_out = self.apply_model(x, t_in, c, return_ids=return_codebook_ids)
+        if score_corrector is not None:
+            assert self.parameterization == "eps"
+            model_out = score_corrector.modify_score(self, model_out, x, t, c, **corrector_kwargs)
+        if return_codebook_ids:
+            model_out, logits = model_out
+        if self.parameterization == "eps":
+            x_recon = self.predict_start_from_noise(x, t=t, noise=model_out)
+        elif self.parameterization == "x0":
+            x_recon = model_out
+        else:
+            raise NotImplementedError()
+        if clip_denoised:
+            x_recon.clamp_(-1., 1.)
+        if quantize_denoised:
+            x_recon, _, [_, _, indices] = self.first_stage_model.quantize(x_recon)
+        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
+        if return_codebook_ids:
+            return model_mean, posterior_variance, posterior_log_variance, logits
+        elif return_x0:
+            return model_mean, posterior_variance, posterior_log_variance, x_recon
+        else:
+            return model_mean, posterior_variance, posterior_log_variance
+    @torch.no_grad()
+    def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False,
+                 return_codebook_ids=False, quantize_denoised=False, return_x0=False,
+                 temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None):
+        b, *_, device = *x.shape, x.device
+        outputs = self.p_mean_variance(x=x, c=c, t=t, clip_denoised=clip_denoised,
+                                       return_codebook_ids=return_codebook_ids,
+                                       quantize_denoised=quantize_denoised,
+                                       return_x0=return_x0,
+                                       score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
+        if return_codebook_ids:
+            raise DeprecationWarning("Support dropped.")
+            model_mean, _, model_log_variance, logits = outputs
+        elif return_x0:
+            model_mean, _, model_log_variance, x0 = outputs
+        else:
+            model_mean, _, model_log_variance = outputs
+        noise = noise_like(x.shape, device, repeat_noise) * temperature
+        if noise_dropout > 0.:
+            noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        if return_codebook_ids:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, logits.argmax(dim=1)
+        if return_x0:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0
+        else:
+            return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise
+    @torch.no_grad()
+    def progressive_denoising(self, cond, shape, verbose=True, callback=None, quantize_denoised=False,
+                              img_callback=None, mask=None, x0=None, temperature=1., noise_dropout=0.,
+                              score_corrector=None, corrector_kwargs=None, batch_size=None, x_T=None, start_T=None,
+                              log_every_t=None):
+        if not log_every_t:
+            log_every_t = self.log_every_t
+        timesteps = self.num_timesteps
+        if batch_size is not None:
+            b = batch_size if batch_size is not None else shape[0]
+            shape = [batch_size] + list(shape)
+        else:
+            b = batch_size = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=self.device)
+        else:
+            img = x_T
+        intermediates = []
+        if cond is not None:
+            if isinstance(cond, dict):
+                cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
+                list(map(lambda x: x[:batch_size], cond[key])) for key in cond}
+            else:
+                cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
+        if start_T is not None:
+            timesteps = min(timesteps, start_T)
+        iterator = tqdm(reversed(range(0, timesteps)), desc='Progressive Generation',
+                        total=timesteps) if verbose else reversed(
+            range(0, timesteps))
+        if type(temperature) == float:
+            temperature = [temperature] * timesteps
+        for i in iterator:
+            ts = torch.full((b,), i, device=self.device, dtype=torch.long)
+            if self.shorten_cond_schedule:
+                assert self.model.conditioning_key != 'hybrid'
+                tc = self.cond_ids[ts].to(cond.device)
+                cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
+            img, x0_partial = self.p_sample(img, cond, ts,
+                                            clip_denoised=self.clip_denoised,
+                                            quantize_denoised=quantize_denoised, return_x0=True,
+                                            temperature=temperature[i], noise_dropout=noise_dropout,
+                                            score_corrector=score_corrector, corrector_kwargs=corrector_kwargs)
+            if mask is not None:
+                assert x0 is not None
+                img_orig = self.q_sample(x0, ts)
+                img = img_orig * mask + (1. - mask) * img
+            if i % log_every_t == 0 or i == timesteps - 1:
+                intermediates.append(x0_partial)
+            if callback: callback(i)
+            if img_callback: img_callback(img, i)
+        return img, intermediates
+    @torch.no_grad()
+    def p_sample_loop(self, cond, shape, return_intermediates=False,
+                      x_T=None, verbose=True, callback=None, timesteps=None, quantize_denoised=False,
+                      mask=None, x0=None, img_callback=None, start_T=None,
+                      log_every_t=None):
+        if not log_every_t:
+            log_every_t = self.log_every_t
+        device = self.betas.device
+        b = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=device)
+        else:
+            img = x_T
+        intermediates = [img]
+        if timesteps is None:
+            timesteps = self.num_timesteps
+        if start_T is not None:
+            timesteps = min(timesteps, start_T)
+        iterator = tqdm(reversed(range(0, timesteps)), desc='Sampling t', total=timesteps) if verbose else reversed(
+            range(0, timesteps))
+        if mask is not None:
+            assert x0 is not None
+            assert x0.shape[2:3] == mask.shape[2:3]  # spatial size has to match
+        for i in iterator:
+            ts = torch.full((b,), i, device=device, dtype=torch.long)
+            if self.shorten_cond_schedule:
+                assert self.model.conditioning_key != 'hybrid'
+                tc = self.cond_ids[ts].to(cond.device)
+                cond = self.q_sample(x_start=cond, t=tc, noise=torch.randn_like(cond))
+            img = self.p_sample(img, cond, ts,
+                                clip_denoised=self.clip_denoised,
+                                quantize_denoised=quantize_denoised)
+            if mask is not None:
+                img_orig = self.q_sample(x0, ts)
+                img = img_orig * mask + (1. - mask) * img
+            if i % log_every_t == 0 or i == timesteps - 1:
+                intermediates.append(img)
+            if callback: callback(i)
+            if img_callback: img_callback(img, i)
+        if return_intermediates:
+            return img, intermediates
+        return img
+    @torch.no_grad()
+    def sample(self, cond, batch_size=16, return_intermediates=False, x_T=None,
+               verbose=True, timesteps=None, quantize_denoised=False,
+               mask=None, x0=None, shape=None, **kwargs):
+        if shape is None:
+            shape = (batch_size, self.channels, self.image_size, self.image_size)
+        if cond is not None:
+            if isinstance(cond, dict):
+                cond = {key: cond[key][:batch_size] if not isinstance(cond[key], list) else
+                list(map(lambda x: x[:batch_size], cond[key])) for key in cond}
+            else:
+                cond = [c[:batch_size] for c in cond] if isinstance(cond, list) else cond[:batch_size]
+        return self.p_sample_loop(cond,
+                                  shape,
+                                  return_intermediates=return_intermediates, x_T=x_T,
+                                  verbose=verbose, timesteps=timesteps, quantize_denoised=quantize_denoised,
+                                  mask=mask, x0=x0)
+    @torch.no_grad()
+    def sample_log(self, cond, batch_size, ddim, ddim_steps, **kwargs):
+        if ddim:
+            ddim_sampler = DDIMSampler(self)
+            shape = (self.channels, self.image_size, self.image_size)
+            samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size,
+                                                         shape, cond, verbose=False, **kwargs)
+        else:
+            samples, intermediates = self.sample(cond=cond, batch_size=batch_size,
+                                                 return_intermediates=True, **kwargs)
+        return samples, intermediates
+    @torch.no_grad()
+    def get_unconditional_conditioning(self, batch_size, null_label=None):
+        if null_label is not None:
+            xc = null_label
+            if isinstance(xc, ListConfig):
+                xc = list(xc)
+            if isinstance(xc, dict) or isinstance(xc, list):
+                c = self.get_learned_conditioning(xc)
+            else:
+                if hasattr(xc, "to"):
+                    xc = xc.to(self.device)
+                c = self.get_learned_conditioning(xc)
+        else:
+            if self.cond_stage_key in ["class_label", "cls"]:
+                xc = self.cond_stage_model.get_unconditional_conditioning(batch_size, device=self.device)
+                return self.get_learned_conditioning(xc)
+            else:
+                raise NotImplementedError("todo")
+        if isinstance(c, list):  # in case the encoder gives us a list
+            for i in range(len(c)):
+                c[i] = repeat(c[i], '1 ... -> b ...', b=batch_size).to(self.device)
+        else:
+            c = repeat(c, '1 ... -> b ...', b=batch_size).to(self.device)
+        return c
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=50, ddim_eta=0., return_keys=None,
+                   quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True,
+                   plot_diffusion_rows=True, unconditional_guidance_scale=1., unconditional_guidance_label=None,
+                   use_ema_scope=True,
+                   **kwargs):
+        ema_scope = self.ema_scope if use_ema_scope else nullcontext
+        use_ddim = ddim_steps is not None
+        log = dict()
+        z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key,
+                                           return_first_stage_outputs=True,
+                                           force_c_encode=True,
+                                           return_original_cond=True,
+                                           bs=N)
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        log["inputs"] = x
+        log["reconstruction"] = xrec
+        if self.model.conditioning_key is not None:
+            if hasattr(self.cond_stage_model, "decode"):
+                xc = self.cond_stage_model.decode(c)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ["caption", "txt"]:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch[self.cond_stage_key], size=x.shape[2] // 25)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ['class_label', "cls"]:
+                try:
+                    xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"], size=x.shape[2] // 25)
+                    log['conditioning'] = xc
+                except KeyError:
+                    # probably no "human_label" in batch
+                    pass
+            elif isimage(xc):
+                log["conditioning"] = xc
+            if ismap(xc):
+                log["original_conditioning"] = self.to_rgb(xc)
+        if plot_diffusion_rows:
+            # get diffusion row
+            diffusion_row = list()
+            z_start = z[:n_row]
+            for t in range(self.num_timesteps):
+                if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                    t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                    t = t.to(self.device).long()
+                    noise = torch.randn_like(z_start)
+                    z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise)
+                    diffusion_row.append(self.decode_first_stage(z_noisy))
+            diffusion_row = torch.stack(diffusion_row)  # n_log_step, n_row, C, H, W
+            diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w')
+            diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w')
+            diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0])
+            log["diffusion_row"] = diffusion_grid
+        if sample:
+            # get denoise row
+            with ema_scope("Sampling"):
+                samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,
+                                                         ddim_steps=ddim_steps, eta=ddim_eta)
+                # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True)
+            x_samples = self.decode_first_stage(samples)
+            log["samples"] = x_samples
+            if plot_denoise_rows:
+                denoise_grid = self._get_denoise_row_from_list(z_denoise_row)
+                log["denoise_row"] = denoise_grid
+            if quantize_denoised and not isinstance(self.first_stage_model, AutoencoderKL) and not isinstance(
+                    self.first_stage_model, IdentityFirstStage):
+                # also display when quantizing x0 while sampling
+                with ema_scope("Plotting Quantized Denoised"):
+                    samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,
+                                                             ddim_steps=ddim_steps, eta=ddim_eta,
+                                                             quantize_denoised=True)
+                    # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True,
+                    #                                      quantize_denoised=True)
+                x_samples = self.decode_first_stage(samples.to(self.device))
+                log["samples_x0_quantized"] = x_samples
+        if unconditional_guidance_scale > 1.0:
+            uc = self.get_unconditional_conditioning(N, unconditional_guidance_label)
+            if self.model.conditioning_key == "crossattn-adm":
+                uc = {"c_crossattn": [uc], "c_adm": c["c_adm"]}
+            with ema_scope("Sampling with classifier-free guidance"):
+                samples_cfg, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,
+                                                 ddim_steps=ddim_steps, eta=ddim_eta,
+                                                 unconditional_guidance_scale=unconditional_guidance_scale,
+                                                 unconditional_conditioning=uc,
+                                                 )
+                x_samples_cfg = self.decode_first_stage(samples_cfg)
+                log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg
+        if inpaint:
+            # make a simple center square
+            b, h, w = z.shape[0], z.shape[2], z.shape[3]
+            mask = torch.ones(N, h, w).to(self.device)
+            # zeros will be filled in
+            mask[:, h // 4:3 * h // 4, w // 4:3 * w // 4] = 0.
+            mask = mask[:, None, ...]
+            with ema_scope("Plotting Inpaint"):
+                samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta,
+                                             ddim_steps=ddim_steps, x0=z[:N], mask=mask)
+            x_samples = self.decode_first_stage(samples.to(self.device))
+            log["samples_inpainting"] = x_samples
+            log["mask"] = mask
+            # outpaint
+            mask = 1. - mask
+            with ema_scope("Plotting Outpaint"):
+                samples, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim, eta=ddim_eta,
+                                             ddim_steps=ddim_steps, x0=z[:N], mask=mask)
+            x_samples = self.decode_first_stage(samples.to(self.device))
+            log["samples_outpainting"] = x_samples
+        if plot_progressive_rows:
+            with ema_scope("Plotting Progressives"):
+                img, progressives = self.progressive_denoising(c,
+                                                               shape=(self.channels, self.image_size, self.image_size),
+                                                               batch_size=N)
+            prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation")
+            log["progressive_row"] = prog_row
+        if return_keys:
+            if np.intersect1d(list(log.keys()), return_keys).shape[0] == 0:
+                return log
+            else:
+                return {key: log[key] for key in return_keys}
+        return log
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        params = list(self.model.parameters())
+        if self.cond_stage_trainable:
+            print(f"{self.__class__.__name__}: Also optimizing conditioner params!")
+            params = params + list(self.cond_stage_model.parameters())
+        if self.learn_logvar:
+            print('Diffusion model optimizing logvar')
+            params.append(self.logvar)
+        opt = torch.optim.AdamW(params, lr=lr)
+        if self.use_scheduler:
+            assert 'target' in self.scheduler_config
+            scheduler = instantiate_from_config(self.scheduler_config)
+            print("Setting up LambdaLR scheduler...")
+            scheduler = [
+                {
+                    'scheduler': LambdaLR(opt, lr_lambda=scheduler.schedule),
+                    'interval': 'step',
+                    'frequency': 1
+                }]
+            return [opt], scheduler
+        return opt
+    @torch.no_grad()
+    def to_rgb(self, x):
+        x = x.float()
+        if not hasattr(self, "colorize"):
+            self.colorize = torch.randn(3, x.shape[1], 1, 1).to(x)
+        x = nn.functional.conv2d(x, weight=self.colorize)
+        x = 2. * (x - x.min()) / (x.max() - x.min()) - 1.
+        return x
+class DiffusionWrapper(pl.LightningModule):
+    def __init__(self, diff_model_config, conditioning_key):
+        super().__init__()
+        self.sequential_cross_attn = diff_model_config.pop("sequential_crossattn", False)
+        self.diffusion_model = instantiate_from_config(diff_model_config)
+        self.conditioning_key = conditioning_key
+        assert self.conditioning_key in [None, 'concat', 'crossattn', 'hybrid', 'adm', 'hybrid-adm', 'crossattn-adm']
+    def forward(self, x, t, c_concat: list = None, c_crossattn: list = None, c_adm=None):
+        if self.conditioning_key is None:
+            out = self.diffusion_model(x, t)
+        elif self.conditioning_key == 'concat':
+            xc = torch.cat([x] + c_concat, dim=1)
+            out = self.diffusion_model(xc, t)
+        elif self.conditioning_key == 'crossattn':
+            if not self.sequential_cross_attn:
+                cc = torch.cat(c_crossattn, 1)
+            else:
+                cc = c_crossattn
+            out = self.diffusion_model(x, t, context=cc)
+        elif self.conditioning_key == 'hybrid':
+            xc = torch.cat([x] + c_concat, dim=1)
+            cc = torch.cat(c_crossattn, 1)
+            out = self.diffusion_model(xc, t, context=cc)
+        elif self.conditioning_key == 'hybrid-adm':
+            assert c_adm is not None
+            xc = torch.cat([x] + c_concat, dim=1)
+            cc = torch.cat(c_crossattn, 1)
+            out = self.diffusion_model(xc, t, context=cc, y=c_adm)
+        elif self.conditioning_key == 'crossattn-adm':
+            assert c_adm is not None
+            cc = torch.cat(c_crossattn, 1)
+            out = self.diffusion_model(x, t, context=cc, y=c_adm)
+        elif self.conditioning_key == 'adm':
+            cc = c_crossattn[0]
+            out = self.diffusion_model(x, t, y=cc)
+        else:
+            raise NotImplementedError()
+        return out
+class LatentUpscaleDiffusion(LatentDiffusion):
+    def __init__(self, *args, low_scale_config, low_scale_key="LR", noise_level_key=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        # assumes that neither the cond_stage nor the low_scale_model contain trainable params
+        assert not self.cond_stage_trainable
+        self.instantiate_low_stage(low_scale_config)
+        self.low_scale_key = low_scale_key
+        self.noise_level_key = noise_level_key
+    def instantiate_low_stage(self, config):
+        model = instantiate_from_config(config)
+        self.low_scale_model = model.eval()
+        self.low_scale_model.train = disabled_train
+        for param in self.low_scale_model.parameters():
+            param.requires_grad = False
+    @torch.no_grad()
+    def get_input(self, batch, k, cond_key=None, bs=None, log_mode=False):
+        if not log_mode:
+            z, c = super().get_input(batch, k, force_c_encode=True, bs=bs)
+        else:
+            z, c, x, xrec, xc = super().get_input(batch, self.first_stage_key, return_first_stage_outputs=True,
+                                                  force_c_encode=True, return_original_cond=True, bs=bs)
+        x_low = batch[self.low_scale_key][:bs]
+        x_low = rearrange(x_low, 'b h w c -> b c h w')
+        x_low = x_low.to(memory_format=torch.contiguous_format).float()
+        zx, noise_level = self.low_scale_model(x_low)
+        if self.noise_level_key is not None:
+            # get noise level from batch instead, e.g. when extracting a custom noise level for bsr
+            raise NotImplementedError('TODO')
+        all_conds = {"c_concat": [zx], "c_crossattn": [c], "c_adm": noise_level}
+        if log_mode:
+            # TODO: maybe disable if too expensive
+            x_low_rec = self.low_scale_model.decode(zx)
+            return z, all_conds, x, xrec, xc, x_low, x_low_rec, noise_level
+        return z, all_conds
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None,
+                   plot_denoise_rows=False, plot_progressive_rows=True, plot_diffusion_rows=True,
+                   unconditional_guidance_scale=1., unconditional_guidance_label=None, use_ema_scope=True,
+                   **kwargs):
+        ema_scope = self.ema_scope if use_ema_scope else nullcontext
+        use_ddim = ddim_steps is not None
+        log = dict()
+        z, c, x, xrec, xc, x_low, x_low_rec, noise_level = self.get_input(batch, self.first_stage_key, bs=N,
+                                                                          log_mode=True)
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        log["inputs"] = x
+        log["reconstruction"] = xrec
+        log["x_lr"] = x_low
+        log[f"x_lr_rec_@noise_levels{'-'.join(map(lambda x: str(x), list(noise_level.cpu().numpy())))}"] = x_low_rec
+        if self.model.conditioning_key is not None:
+            if hasattr(self.cond_stage_model, "decode"):
+                xc = self.cond_stage_model.decode(c)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ["caption", "txt"]:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch[self.cond_stage_key], size=x.shape[2] // 25)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ['class_label', 'cls']:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"], size=x.shape[2] // 25)
+                log['conditioning'] = xc
+            elif isimage(xc):
+                log["conditioning"] = xc
+            if ismap(xc):
+                log["original_conditioning"] = self.to_rgb(xc)
+        if plot_diffusion_rows:
+            # get diffusion row
+            diffusion_row = list()
+            z_start = z[:n_row]
+            for t in range(self.num_timesteps):
+                if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                    t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                    t = t.to(self.device).long()
+                    noise = torch.randn_like(z_start)
+                    z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise)
+                    diffusion_row.append(self.decode_first_stage(z_noisy))
+            diffusion_row = torch.stack(diffusion_row)  # n_log_step, n_row, C, H, W
+            diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w')
+            diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w')
+            diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0])
+            log["diffusion_row"] = diffusion_grid
+        if sample:
+            # get denoise row
+            with ema_scope("Sampling"):
+                samples, z_denoise_row = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,
+                                                         ddim_steps=ddim_steps, eta=ddim_eta)
+                # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True)
+            x_samples = self.decode_first_stage(samples)
+            log["samples"] = x_samples
+            if plot_denoise_rows:
+                denoise_grid = self._get_denoise_row_from_list(z_denoise_row)
+                log["denoise_row"] = denoise_grid
+        if unconditional_guidance_scale > 1.0:
+            uc_tmp = self.get_unconditional_conditioning(N, unconditional_guidance_label)
+            # TODO explore better "unconditional" choices for the other keys
+            # maybe guide away from empty text label and highest noise level and maximally degraded zx?
+            uc = dict()
+            for k in c:
+                if k == "c_crossattn":
+                    assert isinstance(c[k], list) and len(c[k]) == 1
+                    uc[k] = [uc_tmp]
+                elif k == "c_adm":  # todo: only run with text-based guidance?
+                    assert isinstance(c[k], torch.Tensor)
+                    #uc[k] = torch.ones_like(c[k]) * self.low_scale_model.max_noise_level
+                    uc[k] = c[k]
+                elif isinstance(c[k], list):
+                    uc[k] = [c[k][i] for i in range(len(c[k]))]
+                else:
+                    uc[k] = c[k]
+            with ema_scope("Sampling with classifier-free guidance"):
+                samples_cfg, _ = self.sample_log(cond=c, batch_size=N, ddim=use_ddim,
+                                                 ddim_steps=ddim_steps, eta=ddim_eta,
+                                                 unconditional_guidance_scale=unconditional_guidance_scale,
+                                                 unconditional_conditioning=uc,
+                                                 )
+                x_samples_cfg = self.decode_first_stage(samples_cfg)
+                log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg
+        if plot_progressive_rows:
+            with ema_scope("Plotting Progressives"):
+                img, progressives = self.progressive_denoising(c,
+                                                               shape=(self.channels, self.image_size, self.image_size),
+                                                               batch_size=N)
+            prog_row = self._get_denoise_row_from_list(progressives, desc="Progressive Generation")
+            log["progressive_row"] = prog_row
+        return log
+class LatentFinetuneDiffusion(LatentDiffusion):
+    """
+         Basis for different finetunas, such as inpainting or depth2image
+         To disable finetuning mode, set finetune_keys to None
+    """
+    def __init__(self,
+                 concat_keys: tuple,
+                 finetune_keys=("model.diffusion_model.input_blocks.0.0.weight",
+                                "model_ema.diffusion_modelinput_blocks00weight"
+                                ),
+                 keep_finetune_dims=4,
+                 # if model was trained without concat mode before and we would like to keep these channels
+                 c_concat_log_start=None,  # to log reconstruction of c_concat codes
+                 c_concat_log_end=None,
+                 *args, **kwargs
+                 ):
+        ckpt_path = kwargs.pop("ckpt_path", None)
+        ignore_keys = kwargs.pop("ignore_keys", list())
+        super().__init__(*args, **kwargs)
+        self.finetune_keys = finetune_keys
+        self.concat_keys = concat_keys
+        self.keep_dims = keep_finetune_dims
+        self.c_concat_log_start = c_concat_log_start
+        self.c_concat_log_end = c_concat_log_end
+        if exists(self.finetune_keys): assert exists(ckpt_path), 'can only finetune from a given checkpoint'
+        if exists(ckpt_path):
+            self.init_from_ckpt(ckpt_path, ignore_keys)
+    def init_from_ckpt(self, path, ignore_keys=list(), only_model=False):
+        sd = torch.load(path, map_location="cpu")
+        if "state_dict" in list(sd.keys()):
+            sd = sd["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+            # make it explicit, finetune by including extra input channels
+            if exists(self.finetune_keys) and k in self.finetune_keys:
+                new_entry = None
+                for name, param in self.named_parameters():
+                    if name in self.finetune_keys:
+                        print(
+                            f"modifying key '{name}' and keeping its original {self.keep_dims} (channels) dimensions only")
+                        new_entry = torch.zeros_like(param)  # zero init
+                assert exists(new_entry), 'did not find matching parameter to modify'
+                new_entry[:, :self.keep_dims, ...] = sd[k]
+                sd[k] = new_entry
+        missing, unexpected = self.load_state_dict(sd, strict=False) if not only_model else self.model.load_state_dict(
+            sd, strict=False)
+        print(f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys")
+        if len(missing) > 0:
+            print(f"Missing Keys: {missing}")
+        if len(unexpected) > 0:
+            print(f"Unexpected Keys: {unexpected}")
+    @torch.no_grad()
+    def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200, ddim_eta=1., return_keys=None,
+                   quantize_denoised=True, inpaint=True, plot_denoise_rows=False, plot_progressive_rows=True,
+                   plot_diffusion_rows=True, unconditional_guidance_scale=1., unconditional_guidance_label=None,
+                   use_ema_scope=True,
+                   **kwargs):
+        ema_scope = self.ema_scope if use_ema_scope else nullcontext
+        use_ddim = ddim_steps is not None
+        log = dict()
+        z, c, x, xrec, xc = self.get_input(batch, self.first_stage_key, bs=N, return_first_stage_outputs=True)
+        c_cat, c = c["c_concat"][0], c["c_crossattn"][0]
+        N = min(x.shape[0], N)
+        n_row = min(x.shape[0], n_row)
+        log["inputs"] = x
+        log["reconstruction"] = xrec
+        if self.model.conditioning_key is not None:
+            if hasattr(self.cond_stage_model, "decode"):
+                xc = self.cond_stage_model.decode(c)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ["caption", "txt"]:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch[self.cond_stage_key], size=x.shape[2] // 25)
+                log["conditioning"] = xc
+            elif self.cond_stage_key in ['class_label', 'cls']:
+                xc = log_txt_as_img((x.shape[2], x.shape[3]), batch["human_label"], size=x.shape[2] // 25)
+                log['conditioning'] = xc
+            elif isimage(xc):
+                log["conditioning"] = xc
+            if ismap(xc):
+                log["original_conditioning"] = self.to_rgb(xc)
+        if not (self.c_concat_log_start is None and self.c_concat_log_end is None):
+            log["c_concat_decoded"] = self.decode_first_stage(c_cat[:, self.c_concat_log_start:self.c_concat_log_end])
+        if plot_diffusion_rows:
+            # get diffusion row
+            diffusion_row = list()
+            z_start = z[:n_row]
+            for t in range(self.num_timesteps):
+                if t % self.log_every_t == 0 or t == self.num_timesteps - 1:
+                    t = repeat(torch.tensor([t]), '1 -> b', b=n_row)
+                    t = t.to(self.device).long()
+                    noise = torch.randn_like(z_start)
+                    z_noisy = self.q_sample(x_start=z_start, t=t, noise=noise)
+                    diffusion_row.append(self.decode_first_stage(z_noisy))
+            diffusion_row = torch.stack(diffusion_row)  # n_log_step, n_row, C, H, W
+            diffusion_grid = rearrange(diffusion_row, 'n b c h w -> b n c h w')
+            diffusion_grid = rearrange(diffusion_grid, 'b n c h w -> (b n) c h w')
+            diffusion_grid = make_grid(diffusion_grid, nrow=diffusion_row.shape[0])
+            log["diffusion_row"] = diffusion_grid
+        if sample:
+            # get denoise row
+            with ema_scope("Sampling"):
+                samples, z_denoise_row = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]},
+                                                         batch_size=N, ddim=use_ddim,
+                                                         ddim_steps=ddim_steps, eta=ddim_eta)
+                # samples, z_denoise_row = self.sample(cond=c, batch_size=N, return_intermediates=True)
+            x_samples = self.decode_first_stage(samples)
+            log["samples"] = x_samples
+            if plot_denoise_rows:
+                denoise_grid = self._get_denoise_row_from_list(z_denoise_row)
+                log["denoise_row"] = denoise_grid
+        if unconditional_guidance_scale > 1.0:
+            uc_cross = self.get_unconditional_conditioning(N, unconditional_guidance_label)
+            uc_cat = c_cat
+            uc_full = {"c_concat": [uc_cat], "c_crossattn": [uc_cross]}
+            with ema_scope("Sampling with classifier-free guidance"):
+                samples_cfg, _ = self.sample_log(cond={"c_concat": [c_cat], "c_crossattn": [c]},
+                                                 batch_size=N, ddim=use_ddim,
+                                                 ddim_steps=ddim_steps, eta=ddim_eta,
+                                                 unconditional_guidance_scale=unconditional_guidance_scale,
+                                                 unconditional_conditioning=uc_full,
+                                                 )
+                x_samples_cfg = self.decode_first_stage(samples_cfg)
+                log[f"samples_cfg_scale_{unconditional_guidance_scale:.2f}"] = x_samples_cfg
+        return log
+class LatentInpaintDiffusion(LatentFinetuneDiffusion):
+    """
+    can either run as pure inpainting model (only concat mode) or with mixed conditionings,
+    e.g. mask as concat and text via cross-attn.
+    To disable finetuning mode, set finetune_keys to None
+     """
+    def __init__(self,
+                 concat_keys=("mask", "masked_image"),
+                 masked_image_key="masked_image",
+                 *args, **kwargs
+                 ):
+        super().__init__(concat_keys, *args, **kwargs)
+        self.masked_image_key = masked_image_key
+        assert self.masked_image_key in concat_keys
+    @torch.no_grad()
+    def get_input(self, batch, k, cond_key=None, bs=None, return_first_stage_outputs=False):
+        # note: restricted to non-trainable encoders currently
+        assert not self.cond_stage_trainable, 'trainable cond stages not yet supported for inpainting'
+        z, c, x, xrec, xc = super().get_input(batch, self.first_stage_key, return_first_stage_outputs=True,
+                                              force_c_encode=True, return_original_cond=True, bs=bs)
+        assert exists(self.concat_keys)
+        c_cat = list()
+        for ck in self.concat_keys:
+            cc = rearrange(batch[ck], 'b h w c -> b c h w').to(memory_format=torch.contiguous_format).float()
+            if bs is not None:
+                cc = cc[:bs]
+                cc = cc.to(self.device)
+            bchw = z.shape
+            if ck != self.masked_image_key:
+                cc = torch.nn.functional.interpolate(cc, size=bchw[-2:])
+            else:
+                cc = self.get_first_stage_encoding(self.encode_first_stage(cc))
+            c_cat.append(cc)
+        c_cat = torch.cat(c_cat, dim=1)
+        all_conds = {"c_concat": [c_cat], "c_crossattn": [c]}
+        if return_first_stage_outputs:
+            return z, all_conds, x, xrec, xc
+        return z, all_conds
+    @torch.no_grad()
+    def log_images(self, *args, **kwargs):
+        log = super(LatentInpaintDiffusion, self).log_images(*args, **kwargs)
+        log["masked_image"] = rearrange(args[0]["masked_image"],
+                                        'b h w c -> b c h w').to(memory_format=torch.contiguous_format).float()
+        return log
+class LatentDepth2ImageDiffusion(LatentFinetuneDiffusion):
+    """
+    condition on monocular depth estimation
+    """
+    def __init__(self, depth_stage_config, concat_keys=("midas_in",), *args, **kwargs):
+        super().__init__(concat_keys=concat_keys, *args, **kwargs)
+        self.depth_model = instantiate_from_config(depth_stage_config)
+        self.depth_stage_key = concat_keys[0]
+    @torch.no_grad()
+    def get_input(self, batch, k, cond_key=None, bs=None, return_first_stage_outputs=False):
+        # note: restricted to non-trainable encoders currently
+        assert not self.cond_stage_trainable, 'trainable cond stages not yet supported for depth2img'
+        z, c, x, xrec, xc = super().get_input(batch, self.first_stage_key, return_first_stage_outputs=True,
+                                              force_c_encode=True, return_original_cond=True, bs=bs)
+        assert exists(self.concat_keys)
+        assert len(self.concat_keys) == 1
+        c_cat = list()
+        for ck in self.concat_keys:
+            cc = batch[ck]
+            if bs is not None:
+                cc = cc[:bs]
+                cc = cc.to(self.device)
+            cc = self.depth_model(cc)
+            cc = torch.nn.functional.interpolate(
+                cc,
+                size=z.shape[2:],
+                mode="bicubic",
+                align_corners=False,
+            )
+            depth_min, depth_max = torch.amin(cc, dim=[1, 2, 3], keepdim=True), torch.amax(cc, dim=[1, 2, 3],
+                                                                                           keepdim=True)
+            cc = 2. * (cc - depth_min) / (depth_max - depth_min + 0.001) - 1.
+            c_cat.append(cc)
+        c_cat = torch.cat(c_cat, dim=1)
+        all_conds = {"c_concat": [c_cat], "c_crossattn": [c]}
+        if return_first_stage_outputs:
+            return z, all_conds, x, xrec, xc
+        return z, all_conds
+    @torch.no_grad()
+    def log_images(self, *args, **kwargs):
+        log = super().log_images(*args, **kwargs)
+        depth = self.depth_model(args[0][self.depth_stage_key])
+        depth_min, depth_max = torch.amin(depth, dim=[1, 2, 3], keepdim=True), \
+                               torch.amax(depth, dim=[1, 2, 3], keepdim=True)
+        log["depth"] = 2. * (depth - depth_min) / (depth_max - depth_min) - 1.
+        return log
+class LatentUpscaleFinetuneDiffusion(LatentFinetuneDiffusion):
+    """
+        condition on low-res image (and optionally on some spatial noise augmentation)
+    """
+    def __init__(self, concat_keys=("lr",), reshuffle_patch_size=None,
+                 low_scale_config=None, low_scale_key=None, *args, **kwargs):
+        super().__init__(concat_keys=concat_keys, *args, **kwargs)
+        self.reshuffle_patch_size = reshuffle_patch_size
+        self.low_scale_model = None
+        if low_scale_config is not None:
+            print("Initializing a low-scale model")
+            assert exists(low_scale_key)
+            self.instantiate_low_stage(low_scale_config)
+            self.low_scale_key = low_scale_key
+    def instantiate_low_stage(self, config):
+        model = instantiate_from_config(config)
+        self.low_scale_model = model.eval()
+        self.low_scale_model.train = disabled_train
+        for param in self.low_scale_model.parameters():
+            param.requires_grad = False
+    @torch.no_grad()
+    def get_input(self, batch, k, cond_key=None, bs=None, return_first_stage_outputs=False):
+        # note: restricted to non-trainable encoders currently
+        assert not self.cond_stage_trainable, 'trainable cond stages not yet supported for upscaling-ft'
+        z, c, x, xrec, xc = super().get_input(batch, self.first_stage_key, return_first_stage_outputs=True,
+                                              force_c_encode=True, return_original_cond=True, bs=bs)
+        assert exists(self.concat_keys)
+        assert len(self.concat_keys) == 1
+        # optionally make spatial noise_level here
+        c_cat = list()
+        noise_level = None
+        for ck in self.concat_keys:
+            cc = batch[ck]
+            cc = rearrange(cc, 'b h w c -> b c h w')
+            if exists(self.reshuffle_patch_size):
+                assert isinstance(self.reshuffle_patch_size, int)
+                cc = rearrange(cc, 'b c (p1 h) (p2 w) -> b (p1 p2 c) h w',
+                               p1=self.reshuffle_patch_size, p2=self.reshuffle_patch_size)
+            if bs is not None:
+                cc = cc[:bs]
+                cc = cc.to(self.device)
+            if exists(self.low_scale_model) and ck == self.low_scale_key:
+                cc, noise_level = self.low_scale_model(cc)
+            c_cat.append(cc)
+        c_cat = torch.cat(c_cat, dim=1)
+        if exists(noise_level):
+            all_conds = {"c_concat": [c_cat], "c_crossattn": [c], "c_adm": noise_level}
+        else:
+            all_conds = {"c_concat": [c_cat], "c_crossattn": [c]}
+        if return_first_stage_outputs:
+            return z, all_conds, x, xrec, xc
+        return z, all_conds
+    @torch.no_grad()
+    def log_images(self, *args, **kwargs):
+        log = super().log_images(*args, **kwargs)
+        log["lr"] = rearrange(args[0]["lr"], 'b h w c -> b c h w')
+        return log

watermarker/LaWa/ldm/models/diffusion/dpm_solver/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .sampler import DPMSolverSampler

watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (211 Bytes). View file

watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/dpm_solver.cpython-38.pyc ADDED Viewed

Binary file (51.6 kB). View file

watermarker/LaWa/ldm/models/diffusion/dpm_solver/__pycache__/sampler.cpython-38.pyc ADDED Viewed

Binary file (2.79 kB). View file

watermarker/LaWa/ldm/models/diffusion/dpm_solver/dpm_solver.py ADDED Viewed

	@@ -0,0 +1,1154 @@

+import torch
+import torch.nn.functional as F
+import math
+from tqdm import tqdm
+class NoiseScheduleVP:
+    def __init__(
+            self,
+            schedule='discrete',
+            betas=None,
+            alphas_cumprod=None,
+            continuous_beta_0=0.1,
+            continuous_beta_1=20.,
+    ):
+        """Create a wrapper class for the forward SDE (VP type).
+        ***
+        Update: We support discrete-time diffusion models by implementing a picewise linear interpolation for log_alpha_t.
+                We recommend to use schedule='discrete' for the discrete-time diffusion models, especially for high-resolution images.
+        ***
+        The forward SDE ensures that the condition distribution q_{t|0}(x_t | x_0) = N ( alpha_t * x_0, sigma_t^2 * I ).
+        We further define lambda_t = log(alpha_t) - log(sigma_t), which is the half-logSNR (described in the DPM-Solver paper).
+        Therefore, we implement the functions for computing alpha_t, sigma_t and lambda_t. For t in [0, T], we have:
+            log_alpha_t = self.marginal_log_mean_coeff(t)
+            sigma_t = self.marginal_std(t)
+            lambda_t = self.marginal_lambda(t)
+        Moreover, as lambda(t) is an invertible function, we also support its inverse function:
+            t = self.inverse_lambda(lambda_t)
+        ===============================================================
+        We support both discrete-time DPMs (trained on n = 0, 1, ..., N-1) and continuous-time DPMs (trained on t in [t_0, T]).
+        1. For discrete-time DPMs:
+            For discrete-time DPMs trained on n = 0, 1, ..., N-1, we convert the discrete steps to continuous time steps by:
+                t_i = (i + 1) / N
+            e.g. for N = 1000, we have t_0 = 1e-3 and T = t_{N-1} = 1.
+            We solve the corresponding diffusion ODE from time T = 1 to time t_0 = 1e-3.
+            Args:
+                betas: A `torch.Tensor`. The beta array for the discrete-time DPM. (See the original DDPM paper for details)
+                alphas_cumprod: A `torch.Tensor`. The cumprod alphas for the discrete-time DPM. (See the original DDPM paper for details)
+            Note that we always have alphas_cumprod = cumprod(betas). Therefore, we only need to set one of `betas` and `alphas_cumprod`.
+            **Important**:  Please pay special attention for the args for `alphas_cumprod`:
+                The `alphas_cumprod` is the \hat{alpha_n} arrays in the notations of DDPM. Specifically, DDPMs assume that
+                    q_{t_n | 0}(x_{t_n} | x_0) = N ( \sqrt{\hat{alpha_n}} * x_0, (1 - \hat{alpha_n}) * I ).
+                Therefore, the notation \hat{alpha_n} is different from the notation alpha_t in DPM-Solver. In fact, we have
+                    alpha_{t_n} = \sqrt{\hat{alpha_n}},
+                and
+                    log(alpha_{t_n}) = 0.5 * log(\hat{alpha_n}).
+        2. For continuous-time DPMs:
+            We support two types of VPSDEs: linear (DDPM) and cosine (improved-DDPM). The hyperparameters for the noise
+            schedule are the default settings in DDPM and improved-DDPM:
+            Args:
+                beta_min: A `float` number. The smallest beta for the linear schedule.
+                beta_max: A `float` number. The largest beta for the linear schedule.
+                cosine_s: A `float` number. The hyperparameter in the cosine schedule.
+                cosine_beta_max: A `float` number. The hyperparameter in the cosine schedule.
+                T: A `float` number. The ending time of the forward process.
+        ===============================================================
+        Args:
+            schedule: A `str`. The noise schedule of the forward SDE. 'discrete' for discrete-time DPMs,
+                    'linear' or 'cosine' for continuous-time DPMs.
+        Returns:
+            A wrapper object of the forward SDE (VP type).
+        ===============================================================
+        Example:
+        # For discrete-time DPMs, given betas (the beta array for n = 0, 1, ..., N - 1):
+        >>> ns = NoiseScheduleVP('discrete', betas=betas)
+        # For discrete-time DPMs, given alphas_cumprod (the \hat{alpha_n} array for n = 0, 1, ..., N - 1):
+        >>> ns = NoiseScheduleVP('discrete', alphas_cumprod=alphas_cumprod)
+        # For continuous-time DPMs (VPSDE), linear schedule:
+        >>> ns = NoiseScheduleVP('linear', continuous_beta_0=0.1, continuous_beta_1=20.)
+        """
+        if schedule not in ['discrete', 'linear', 'cosine']:
+            raise ValueError(
+                "Unsupported noise schedule {}. The schedule needs to be 'discrete' or 'linear' or 'cosine'".format(
+                    schedule))
+        self.schedule = schedule
+        if schedule == 'discrete':
+            if betas is not None:
+                log_alphas = 0.5 * torch.log(1 - betas).cumsum(dim=0)
+            else:
+                assert alphas_cumprod is not None
+                log_alphas = 0.5 * torch.log(alphas_cumprod)
+            self.total_N = len(log_alphas)
+            self.T = 1.
+            self.t_array = torch.linspace(0., 1., self.total_N + 1)[1:].reshape((1, -1))
+            self.log_alpha_array = log_alphas.reshape((1, -1,))
+        else:
+            self.total_N = 1000
+            self.beta_0 = continuous_beta_0
+            self.beta_1 = continuous_beta_1
+            self.cosine_s = 0.008
+            self.cosine_beta_max = 999.
+            self.cosine_t_max = math.atan(self.cosine_beta_max * (1. + self.cosine_s) / math.pi) * 2. * (
+                        1. + self.cosine_s) / math.pi - self.cosine_s
+            self.cosine_log_alpha_0 = math.log(math.cos(self.cosine_s / (1. + self.cosine_s) * math.pi / 2.))
+            self.schedule = schedule
+            if schedule == 'cosine':
+                # For the cosine schedule, T = 1 will have numerical issues. So we manually set the ending time T.
+                # Note that T = 0.9946 may be not the optimal setting. However, we find it works well.
+                self.T = 0.9946
+            else:
+                self.T = 1.
+    def marginal_log_mean_coeff(self, t):
+        """
+        Compute log(alpha_t) of a given continuous-time label t in [0, T].
+        """
+        if self.schedule == 'discrete':
+            return interpolate_fn(t.reshape((-1, 1)), self.t_array.to(t.device),
+                                  self.log_alpha_array.to(t.device)).reshape((-1))
+        elif self.schedule == 'linear':
+            return -0.25 * t ** 2 * (self.beta_1 - self.beta_0) - 0.5 * t * self.beta_0
+        elif self.schedule == 'cosine':
+            log_alpha_fn = lambda s: torch.log(torch.cos((s + self.cosine_s) / (1. + self.cosine_s) * math.pi / 2.))
+            log_alpha_t = log_alpha_fn(t) - self.cosine_log_alpha_0
+            return log_alpha_t
+    def marginal_alpha(self, t):
+        """
+        Compute alpha_t of a given continuous-time label t in [0, T].
+        """
+        return torch.exp(self.marginal_log_mean_coeff(t))
+    def marginal_std(self, t):
+        """
+        Compute sigma_t of a given continuous-time label t in [0, T].
+        """
+        return torch.sqrt(1. - torch.exp(2. * self.marginal_log_mean_coeff(t)))
+    def marginal_lambda(self, t):
+        """
+        Compute lambda_t = log(alpha_t) - log(sigma_t) of a given continuous-time label t in [0, T].
+        """
+        log_mean_coeff = self.marginal_log_mean_coeff(t)
+        log_std = 0.5 * torch.log(1. - torch.exp(2. * log_mean_coeff))
+        return log_mean_coeff - log_std
+    def inverse_lambda(self, lamb):
+        """
+        Compute the continuous-time label t in [0, T] of a given half-logSNR lambda_t.
+        """
+        if self.schedule == 'linear':
+            tmp = 2. * (self.beta_1 - self.beta_0) * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
+            Delta = self.beta_0 ** 2 + tmp
+            return tmp / (torch.sqrt(Delta) + self.beta_0) / (self.beta_1 - self.beta_0)
+        elif self.schedule == 'discrete':
+            log_alpha = -0.5 * torch.logaddexp(torch.zeros((1,)).to(lamb.device), -2. * lamb)
+            t = interpolate_fn(log_alpha.reshape((-1, 1)), torch.flip(self.log_alpha_array.to(lamb.device), [1]),
+                               torch.flip(self.t_array.to(lamb.device), [1]))
+            return t.reshape((-1,))
+        else:
+            log_alpha = -0.5 * torch.logaddexp(-2. * lamb, torch.zeros((1,)).to(lamb))
+            t_fn = lambda log_alpha_t: torch.arccos(torch.exp(log_alpha_t + self.cosine_log_alpha_0)) * 2. * (
+                        1. + self.cosine_s) / math.pi - self.cosine_s
+            t = t_fn(log_alpha)
+            return t
+def model_wrapper(
+        model,
+        noise_schedule,
+        model_type="noise",
+        model_kwargs={},
+        guidance_type="uncond",
+        condition=None,
+        unconditional_condition=None,
+        guidance_scale=1.,
+        classifier_fn=None,
+        classifier_kwargs={},
+):
+    """Create a wrapper function for the noise prediction model.
+    DPM-Solver needs to solve the continuous-time diffusion ODEs. For DPMs trained on discrete-time labels, we need to
+    firstly wrap the model function to a noise prediction model that accepts the continuous time as the input.
+    We support four types of the diffusion model by setting `model_type`:
+        1. "noise": noise prediction model. (Trained by predicting noise).
+        2. "x_start": data prediction model. (Trained by predicting the data x_0 at time 0).
+        3. "v": velocity prediction model. (Trained by predicting the velocity).
+            The "v" prediction is derivation detailed in Appendix D of [1], and is used in Imagen-Video [2].
+            [1] Salimans, Tim, and Jonathan Ho. "Progressive distillation for fast sampling of diffusion models."
+                arXiv preprint arXiv:2202.00512 (2022).
+            [2] Ho, Jonathan, et al. "Imagen Video: High Definition Video Generation with Diffusion Models."
+                arXiv preprint arXiv:2210.02303 (2022).
+        4. "score": marginal score function. (Trained by denoising score matching).
+            Note that the score function and the noise prediction model follows a simple relationship:
+            ```
+                noise(x_t, t) = -sigma_t * score(x_t, t)
+            ```
+    We support three types of guided sampling by DPMs by setting `guidance_type`:
+        1. "uncond": unconditional sampling by DPMs.
+            The input `model` has the following format:
+            ``
+                model(x, t_input, **model_kwargs) -> noise | x_start | v | score
+            ``
+        2. "classifier": classifier guidance sampling [3] by DPMs and another classifier.
+            The input `model` has the following format:
+            ``
+                model(x, t_input, **model_kwargs) -> noise | x_start | v | score
+            ``
+            The input `classifier_fn` has the following format:
+            ``
+                classifier_fn(x, t_input, cond, **classifier_kwargs) -> logits(x, t_input, cond)
+            ``
+            [3] P. Dhariwal and A. Q. Nichol, "Diffusion models beat GANs on image synthesis,"
+                in Advances in Neural Information Processing Systems, vol. 34, 2021, pp. 8780-8794.
+        3. "classifier-free": classifier-free guidance sampling by conditional DPMs.
+            The input `model` has the following format:
+            ``
+                model(x, t_input, cond, **model_kwargs) -> noise | x_start | v | score
+            ``
+            And if cond == `unconditional_condition`, the model output is the unconditional DPM output.
+            [4] Ho, Jonathan, and Tim Salimans. "Classifier-free diffusion guidance."
+                arXiv preprint arXiv:2207.12598 (2022).
+    The `t_input` is the time label of the model, which may be discrete-time labels (i.e. 0 to 999)
+    or continuous-time labels (i.e. epsilon to T).
+    We wrap the model function to accept only `x` and `t_continuous` as inputs, and outputs the predicted noise:
+    ``
+        def model_fn(x, t_continuous) -> noise:
+            t_input = get_model_input_time(t_continuous)
+            return noise_pred(model, x, t_input, **model_kwargs)
+    ``
+    where `t_continuous` is the continuous time labels (i.e. epsilon to T). And we use `model_fn` for DPM-Solver.
+    ===============================================================
+    Args:
+        model: A diffusion model with the corresponding format described above.
+        noise_schedule: A noise schedule object, such as NoiseScheduleVP.
+        model_type: A `str`. The parameterization type of the diffusion model.
+                    "noise" or "x_start" or "v" or "score".
+        model_kwargs: A `dict`. A dict for the other inputs of the model function.
+        guidance_type: A `str`. The type of the guidance for sampling.
+                    "uncond" or "classifier" or "classifier-free".
+        condition: A pytorch tensor. The condition for the guided sampling.
+                    Only used for "classifier" or "classifier-free" guidance type.
+        unconditional_condition: A pytorch tensor. The condition for the unconditional sampling.
+                    Only used for "classifier-free" guidance type.
+        guidance_scale: A `float`. The scale for the guided sampling.
+        classifier_fn: A classifier function. Only used for the classifier guidance.
+        classifier_kwargs: A `dict`. A dict for the other inputs of the classifier function.
+    Returns:
+        A noise prediction model that accepts the noised data and the continuous time as the inputs.
+    """
+    def get_model_input_time(t_continuous):
+        """
+        Convert the continuous-time `t_continuous` (in [epsilon, T]) to the model input time.
+        For discrete-time DPMs, we convert `t_continuous` in [1 / N, 1] to `t_input` in [0, 1000 * (N - 1) / N].
+        For continuous-time DPMs, we just use `t_continuous`.
+        """
+        if noise_schedule.schedule == 'discrete':
+            return (t_continuous - 1. / noise_schedule.total_N) * 1000.
+        else:
+            return t_continuous
+    def noise_pred_fn(x, t_continuous, cond=None):
+        if t_continuous.reshape((-1,)).shape[0] == 1:
+            t_continuous = t_continuous.expand((x.shape[0]))
+        t_input = get_model_input_time(t_continuous)
+        if cond is None:
+            output = model(x, t_input, **model_kwargs)
+        else:
+            output = model(x, t_input, cond, **model_kwargs)
+        if model_type == "noise":
+            return output
+        elif model_type == "x_start":
+            alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous)
+            dims = x.dim()
+            return (x - expand_dims(alpha_t, dims) * output) / expand_dims(sigma_t, dims)
+        elif model_type == "v":
+            alpha_t, sigma_t = noise_schedule.marginal_alpha(t_continuous), noise_schedule.marginal_std(t_continuous)
+            dims = x.dim()
+            return expand_dims(alpha_t, dims) * output + expand_dims(sigma_t, dims) * x
+        elif model_type == "score":
+            sigma_t = noise_schedule.marginal_std(t_continuous)
+            dims = x.dim()
+            return -expand_dims(sigma_t, dims) * output
+    def cond_grad_fn(x, t_input):
+        """
+        Compute the gradient of the classifier, i.e. nabla_{x} log p_t(cond | x_t).
+        """
+        with torch.enable_grad():
+            x_in = x.detach().requires_grad_(True)
+            log_prob = classifier_fn(x_in, t_input, condition, **classifier_kwargs)
+            return torch.autograd.grad(log_prob.sum(), x_in)[0]
+    def model_fn(x, t_continuous):
+        """
+        The noise predicition model function that is used for DPM-Solver.
+        """
+        if t_continuous.reshape((-1,)).shape[0] == 1:
+            t_continuous = t_continuous.expand((x.shape[0]))
+        if guidance_type == "uncond":
+            return noise_pred_fn(x, t_continuous)
+        elif guidance_type == "classifier":
+            assert classifier_fn is not None
+            t_input = get_model_input_time(t_continuous)
+            cond_grad = cond_grad_fn(x, t_input)
+            sigma_t = noise_schedule.marginal_std(t_continuous)
+            noise = noise_pred_fn(x, t_continuous)
+            return noise - guidance_scale * expand_dims(sigma_t, dims=cond_grad.dim()) * cond_grad
+        elif guidance_type == "classifier-free":
+            if guidance_scale == 1. or unconditional_condition is None:
+                return noise_pred_fn(x, t_continuous, cond=condition)
+            else:
+                x_in = torch.cat([x] * 2)
+                t_in = torch.cat([t_continuous] * 2)
+                c_in = torch.cat([unconditional_condition, condition])
+                noise_uncond, noise = noise_pred_fn(x_in, t_in, cond=c_in).chunk(2)
+                return noise_uncond + guidance_scale * (noise - noise_uncond)
+    assert model_type in ["noise", "x_start", "v"]
+    assert guidance_type in ["uncond", "classifier", "classifier-free"]
+    return model_fn
+class DPM_Solver:
+    def __init__(self, model_fn, noise_schedule, predict_x0=False, thresholding=False, max_val=1.):
+        """Construct a DPM-Solver.
+        We support both the noise prediction model ("predicting epsilon") and the data prediction model ("predicting x0").
+        If `predict_x0` is False, we use the solver for the noise prediction model (DPM-Solver).
+        If `predict_x0` is True, we use the solver for the data prediction model (DPM-Solver++).
+            In such case, we further support the "dynamic thresholding" in [1] when `thresholding` is True.
+            The "dynamic thresholding" can greatly improve the sample quality for pixel-space DPMs with large guidance scales.
+        Args:
+            model_fn: A noise prediction model function which accepts the continuous-time input (t in [epsilon, T]):
+                ``
+                def model_fn(x, t_continuous):
+                    return noise
+                ``
+            noise_schedule: A noise schedule object, such as NoiseScheduleVP.
+            predict_x0: A `bool`. If true, use the data prediction model; else, use the noise prediction model.
+            thresholding: A `bool`. Valid when `predict_x0` is True. Whether to use the "dynamic thresholding" in [1].
+            max_val: A `float`. Valid when both `predict_x0` and `thresholding` are True. The max value for thresholding.
+        [1] Chitwan Saharia, William Chan, Saurabh Saxena, Lala Li, Jay Whang, Emily Denton, Seyed Kamyar Seyed Ghasemipour, Burcu Karagol Ayan, S Sara Mahdavi, Rapha Gontijo Lopes, et al. Photorealistic text-to-image diffusion models with deep language understanding. arXiv preprint arXiv:2205.11487, 2022b.
+        """
+        self.model = model_fn
+        self.noise_schedule = noise_schedule
+        self.predict_x0 = predict_x0
+        self.thresholding = thresholding
+        self.max_val = max_val
+    def noise_prediction_fn(self, x, t):
+        """
+        Return the noise prediction model.
+        """
+        return self.model(x, t)
+    def data_prediction_fn(self, x, t):
+        """
+        Return the data prediction model (with thresholding).
+        """
+        noise = self.noise_prediction_fn(x, t)
+        dims = x.dim()
+        alpha_t, sigma_t = self.noise_schedule.marginal_alpha(t), self.noise_schedule.marginal_std(t)
+        x0 = (x - expand_dims(sigma_t, dims) * noise) / expand_dims(alpha_t, dims)
+        if self.thresholding:
+            p = 0.995  # A hyperparameter in the paper of "Imagen" [1].
+            s = torch.quantile(torch.abs(x0).reshape((x0.shape[0], -1)), p, dim=1)
+            s = expand_dims(torch.maximum(s, self.max_val * torch.ones_like(s).to(s.device)), dims)
+            x0 = torch.clamp(x0, -s, s) / s
+        return x0
+    def model_fn(self, x, t):
+        """
+        Convert the model to the noise prediction model or the data prediction model.
+        """
+        if self.predict_x0:
+            return self.data_prediction_fn(x, t)
+        else:
+            return self.noise_prediction_fn(x, t)
+    def get_time_steps(self, skip_type, t_T, t_0, N, device):
+        """Compute the intermediate time steps for sampling.
+        Args:
+            skip_type: A `str`. The type for the spacing of the time steps. We support three types:
+                - 'logSNR': uniform logSNR for the time steps.
+                - 'time_uniform': uniform time for the time steps. (**Recommended for high-resolutional data**.)
+                - 'time_quadratic': quadratic time for the time steps. (Used in DDIM for low-resolutional data.)
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            N: A `int`. The total number of the spacing of the time steps.
+            device: A torch device.
+        Returns:
+            A pytorch tensor of the time steps, with the shape (N + 1,).
+        """
+        if skip_type == 'logSNR':
+            lambda_T = self.noise_schedule.marginal_lambda(torch.tensor(t_T).to(device))
+            lambda_0 = self.noise_schedule.marginal_lambda(torch.tensor(t_0).to(device))
+            logSNR_steps = torch.linspace(lambda_T.cpu().item(), lambda_0.cpu().item(), N + 1).to(device)
+            return self.noise_schedule.inverse_lambda(logSNR_steps)
+        elif skip_type == 'time_uniform':
+            return torch.linspace(t_T, t_0, N + 1).to(device)
+        elif skip_type == 'time_quadratic':
+            t_order = 2
+            t = torch.linspace(t_T ** (1. / t_order), t_0 ** (1. / t_order), N + 1).pow(t_order).to(device)
+            return t
+        else:
+            raise ValueError(
+                "Unsupported skip_type {}, need to be 'logSNR' or 'time_uniform' or 'time_quadratic'".format(skip_type))
+    def get_orders_and_timesteps_for_singlestep_solver(self, steps, order, skip_type, t_T, t_0, device):
+        """
+        Get the order of each step for sampling by the singlestep DPM-Solver.
+        We combine both DPM-Solver-1,2,3 to use all the function evaluations, which is named as "DPM-Solver-fast".
+        Given a fixed number of function evaluations by `steps`, the sampling procedure by DPM-Solver-fast is:
+            - If order == 1:
+                We take `steps` of DPM-Solver-1 (i.e. DDIM).
+            - If order == 2:
+                - Denote K = (steps // 2). We take K or (K + 1) intermediate time steps for sampling.
+                - If steps % 2 == 0, we use K steps of DPM-Solver-2.
+                - If steps % 2 == 1, we use K steps of DPM-Solver-2 and 1 step of DPM-Solver-1.
+            - If order == 3:
+                - Denote K = (steps // 3 + 1). We take K intermediate time steps for sampling.
+                - If steps % 3 == 0, we use (K - 2) steps of DPM-Solver-3, and 1 step of DPM-Solver-2 and 1 step of DPM-Solver-1.
+                - If steps % 3 == 1, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-1.
+                - If steps % 3 == 2, we use (K - 1) steps of DPM-Solver-3 and 1 step of DPM-Solver-2.
+        ============================================
+        Args:
+            order: A `int`. The max order for the solver (2 or 3).
+            steps: A `int`. The total number of function evaluations (NFE).
+            skip_type: A `str`. The type for the spacing of the time steps. We support three types:
+                - 'logSNR': uniform logSNR for the time steps.
+                - 'time_uniform': uniform time for the time steps. (**Recommended for high-resolutional data**.)
+                - 'time_quadratic': quadratic time for the time steps. (Used in DDIM for low-resolutional data.)
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            device: A torch device.
+        Returns:
+            orders: A list of the solver order of each step.
+        """
+        if order == 3:
+            K = steps // 3 + 1
+            if steps % 3 == 0:
+                orders = [3, ] * (K - 2) + [2, 1]
+            elif steps % 3 == 1:
+                orders = [3, ] * (K - 1) + [1]
+            else:
+                orders = [3, ] * (K - 1) + [2]
+        elif order == 2:
+            if steps % 2 == 0:
+                K = steps // 2
+                orders = [2, ] * K
+            else:
+                K = steps // 2 + 1
+                orders = [2, ] * (K - 1) + [1]
+        elif order == 1:
+            K = 1
+            orders = [1, ] * steps
+        else:
+            raise ValueError("'order' must be '1' or '2' or '3'.")
+        if skip_type == 'logSNR':
+            # To reproduce the results in DPM-Solver paper
+            timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, K, device)
+        else:
+            timesteps_outer = self.get_time_steps(skip_type, t_T, t_0, steps, device)[
+                torch.cumsum(torch.tensor([0, ] + orders)).to(device)]
+        return timesteps_outer, orders
+    def denoise_to_zero_fn(self, x, s):
+        """
+        Denoise at the final step, which is equivalent to solve the ODE from lambda_s to infty by first-order discretization.
+        """
+        return self.data_prediction_fn(x, s)
+    def dpm_solver_first_update(self, x, s, t, model_s=None, return_intermediate=False):
+        """
+        DPM-Solver-1 (equivalent to DDIM) from time `s` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            model_s: A pytorch tensor. The model function evaluated at time `s`.
+                If `model_s` is None, we evaluate the model by `x` and `s`; otherwise we directly use it.
+            return_intermediate: A `bool`. If true, also return the model value at time `s`.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        ns = self.noise_schedule
+        dims = x.dim()
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        log_alpha_s, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_t = ns.marginal_std(s), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_1 = torch.expm1(-h)
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            x_t = (
+                    expand_dims(sigma_t / sigma_s, dims) * x
+                    - expand_dims(alpha_t * phi_1, dims) * model_s
+            )
+            if return_intermediate:
+                return x_t, {'model_s': model_s}
+            else:
+                return x_t
+        else:
+            phi_1 = torch.expm1(h)
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            x_t = (
+                    expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x
+                    - expand_dims(sigma_t * phi_1, dims) * model_s
+            )
+            if return_intermediate:
+                return x_t, {'model_s': model_s}
+            else:
+                return x_t
+    def singlestep_dpm_solver_second_update(self, x, s, t, r1=0.5, model_s=None, return_intermediate=False,
+                                            solver_type='dpm_solver'):
+        """
+        Singlestep solver DPM-Solver-2 from time `s` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            r1: A `float`. The hyperparameter of the second-order solver.
+            model_s: A pytorch tensor. The model function evaluated at time `s`.
+                If `model_s` is None, we evaluate the model by `x` and `s`; otherwise we directly use it.
+            return_intermediate: A `bool`. If true, also return the model value at time `s` and `s1` (the intermediate time).
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if solver_type not in ['dpm_solver', 'taylor']:
+            raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type))
+        if r1 is None:
+            r1 = 0.5
+        ns = self.noise_schedule
+        dims = x.dim()
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        lambda_s1 = lambda_s + r1 * h
+        s1 = ns.inverse_lambda(lambda_s1)
+        log_alpha_s, log_alpha_s1, log_alpha_t = ns.marginal_log_mean_coeff(s), ns.marginal_log_mean_coeff(
+            s1), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_s1, sigma_t = ns.marginal_std(s), ns.marginal_std(s1), ns.marginal_std(t)
+        alpha_s1, alpha_t = torch.exp(log_alpha_s1), torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_11 = torch.expm1(-r1 * h)
+            phi_1 = torch.expm1(-h)
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            x_s1 = (
+                    expand_dims(sigma_s1 / sigma_s, dims) * x
+                    - expand_dims(alpha_s1 * phi_11, dims) * model_s
+            )
+            model_s1 = self.model_fn(x_s1, s1)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(sigma_t / sigma_s, dims) * x
+                        - expand_dims(alpha_t * phi_1, dims) * model_s
+                        - (0.5 / r1) * expand_dims(alpha_t * phi_1, dims) * (model_s1 - model_s)
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                        expand_dims(sigma_t / sigma_s, dims) * x
+                        - expand_dims(alpha_t * phi_1, dims) * model_s
+                        + (1. / r1) * expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * (
+                                    model_s1 - model_s)
+                )
+        else:
+            phi_11 = torch.expm1(r1 * h)
+            phi_1 = torch.expm1(h)
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            x_s1 = (
+                    expand_dims(torch.exp(log_alpha_s1 - log_alpha_s), dims) * x
+                    - expand_dims(sigma_s1 * phi_11, dims) * model_s
+            )
+            model_s1 = self.model_fn(x_s1, s1)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x
+                        - expand_dims(sigma_t * phi_1, dims) * model_s
+                        - (0.5 / r1) * expand_dims(sigma_t * phi_1, dims) * (model_s1 - model_s)
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x
+                        - expand_dims(sigma_t * phi_1, dims) * model_s
+                        - (1. / r1) * expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * (model_s1 - model_s)
+                )
+        if return_intermediate:
+            return x_t, {'model_s': model_s, 'model_s1': model_s1}
+        else:
+            return x_t
+    def singlestep_dpm_solver_third_update(self, x, s, t, r1=1. / 3., r2=2. / 3., model_s=None, model_s1=None,
+                                           return_intermediate=False, solver_type='dpm_solver'):
+        """
+        Singlestep solver DPM-Solver-3 from time `s` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            r1: A `float`. The hyperparameter of the third-order solver.
+            r2: A `float`. The hyperparameter of the third-order solver.
+            model_s: A pytorch tensor. The model function evaluated at time `s`.
+                If `model_s` is None, we evaluate the model by `x` and `s`; otherwise we directly use it.
+            model_s1: A pytorch tensor. The model function evaluated at time `s1` (the intermediate time given by `r1`).
+                If `model_s1` is None, we evaluate the model at `s1`; otherwise we directly use it.
+            return_intermediate: A `bool`. If true, also return the model value at time `s`, `s1` and `s2` (the intermediate times).
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if solver_type not in ['dpm_solver', 'taylor']:
+            raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type))
+        if r1 is None:
+            r1 = 1. / 3.
+        if r2 is None:
+            r2 = 2. / 3.
+        ns = self.noise_schedule
+        dims = x.dim()
+        lambda_s, lambda_t = ns.marginal_lambda(s), ns.marginal_lambda(t)
+        h = lambda_t - lambda_s
+        lambda_s1 = lambda_s + r1 * h
+        lambda_s2 = lambda_s + r2 * h
+        s1 = ns.inverse_lambda(lambda_s1)
+        s2 = ns.inverse_lambda(lambda_s2)
+        log_alpha_s, log_alpha_s1, log_alpha_s2, log_alpha_t = ns.marginal_log_mean_coeff(
+            s), ns.marginal_log_mean_coeff(s1), ns.marginal_log_mean_coeff(s2), ns.marginal_log_mean_coeff(t)
+        sigma_s, sigma_s1, sigma_s2, sigma_t = ns.marginal_std(s), ns.marginal_std(s1), ns.marginal_std(
+            s2), ns.marginal_std(t)
+        alpha_s1, alpha_s2, alpha_t = torch.exp(log_alpha_s1), torch.exp(log_alpha_s2), torch.exp(log_alpha_t)
+        if self.predict_x0:
+            phi_11 = torch.expm1(-r1 * h)
+            phi_12 = torch.expm1(-r2 * h)
+            phi_1 = torch.expm1(-h)
+            phi_22 = torch.expm1(-r2 * h) / (r2 * h) + 1.
+            phi_2 = phi_1 / h + 1.
+            phi_3 = phi_2 / h - 0.5
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            if model_s1 is None:
+                x_s1 = (
+                        expand_dims(sigma_s1 / sigma_s, dims) * x
+                        - expand_dims(alpha_s1 * phi_11, dims) * model_s
+                )
+                model_s1 = self.model_fn(x_s1, s1)
+            x_s2 = (
+                    expand_dims(sigma_s2 / sigma_s, dims) * x
+                    - expand_dims(alpha_s2 * phi_12, dims) * model_s
+                    + r2 / r1 * expand_dims(alpha_s2 * phi_22, dims) * (model_s1 - model_s)
+            )
+            model_s2 = self.model_fn(x_s2, s2)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(sigma_t / sigma_s, dims) * x
+                        - expand_dims(alpha_t * phi_1, dims) * model_s
+                        + (1. / r2) * expand_dims(alpha_t * phi_2, dims) * (model_s2 - model_s)
+                )
+            elif solver_type == 'taylor':
+                D1_0 = (1. / r1) * (model_s1 - model_s)
+                D1_1 = (1. / r2) * (model_s2 - model_s)
+                D1 = (r2 * D1_0 - r1 * D1_1) / (r2 - r1)
+                D2 = 2. * (D1_1 - D1_0) / (r2 - r1)
+                x_t = (
+                        expand_dims(sigma_t / sigma_s, dims) * x
+                        - expand_dims(alpha_t * phi_1, dims) * model_s
+                        + expand_dims(alpha_t * phi_2, dims) * D1
+                        - expand_dims(alpha_t * phi_3, dims) * D2
+                )
+        else:
+            phi_11 = torch.expm1(r1 * h)
+            phi_12 = torch.expm1(r2 * h)
+            phi_1 = torch.expm1(h)
+            phi_22 = torch.expm1(r2 * h) / (r2 * h) - 1.
+            phi_2 = phi_1 / h - 1.
+            phi_3 = phi_2 / h - 0.5
+            if model_s is None:
+                model_s = self.model_fn(x, s)
+            if model_s1 is None:
+                x_s1 = (
+                        expand_dims(torch.exp(log_alpha_s1 - log_alpha_s), dims) * x
+                        - expand_dims(sigma_s1 * phi_11, dims) * model_s
+                )
+                model_s1 = self.model_fn(x_s1, s1)
+            x_s2 = (
+                    expand_dims(torch.exp(log_alpha_s2 - log_alpha_s), dims) * x
+                    - expand_dims(sigma_s2 * phi_12, dims) * model_s
+                    - r2 / r1 * expand_dims(sigma_s2 * phi_22, dims) * (model_s1 - model_s)
+            )
+            model_s2 = self.model_fn(x_s2, s2)
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x
+                        - expand_dims(sigma_t * phi_1, dims) * model_s
+                        - (1. / r2) * expand_dims(sigma_t * phi_2, dims) * (model_s2 - model_s)
+                )
+            elif solver_type == 'taylor':
+                D1_0 = (1. / r1) * (model_s1 - model_s)
+                D1_1 = (1. / r2) * (model_s2 - model_s)
+                D1 = (r2 * D1_0 - r1 * D1_1) / (r2 - r1)
+                D2 = 2. * (D1_1 - D1_0) / (r2 - r1)
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_s), dims) * x
+                        - expand_dims(sigma_t * phi_1, dims) * model_s
+                        - expand_dims(sigma_t * phi_2, dims) * D1
+                        - expand_dims(sigma_t * phi_3, dims) * D2
+                )
+        if return_intermediate:
+            return x_t, {'model_s': model_s, 'model_s1': model_s1, 'model_s2': model_s2}
+        else:
+            return x_t
+    def multistep_dpm_solver_second_update(self, x, model_prev_list, t_prev_list, t, solver_type="dpm_solver"):
+        """
+        Multistep solver DPM-Solver-2 from time `t_prev_list[-1]` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            model_prev_list: A list of pytorch tensor. The previous computed model values.
+            t_prev_list: A list of pytorch tensor. The previous times, each time has the shape (x.shape[0],)
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if solver_type not in ['dpm_solver', 'taylor']:
+            raise ValueError("'solver_type' must be either 'dpm_solver' or 'taylor', got {}".format(solver_type))
+        ns = self.noise_schedule
+        dims = x.dim()
+        model_prev_1, model_prev_0 = model_prev_list
+        t_prev_1, t_prev_0 = t_prev_list
+        lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_1), ns.marginal_lambda(
+            t_prev_0), ns.marginal_lambda(t)
+        log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t)
+        sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        h_0 = lambda_prev_0 - lambda_prev_1
+        h = lambda_t - lambda_prev_0
+        r0 = h_0 / h
+        D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1)
+        if self.predict_x0:
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(sigma_t / sigma_prev_0, dims) * x
+                        - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0
+                        - 0.5 * expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * D1_0
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                        expand_dims(sigma_t / sigma_prev_0, dims) * x
+                        - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0
+                        + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1_0
+                )
+        else:
+            if solver_type == 'dpm_solver':
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x
+                        - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0
+                        - 0.5 * expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * D1_0
+                )
+            elif solver_type == 'taylor':
+                x_t = (
+                        expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x
+                        - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0
+                        - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1_0
+                )
+        return x_t
+    def multistep_dpm_solver_third_update(self, x, model_prev_list, t_prev_list, t, solver_type='dpm_solver'):
+        """
+        Multistep solver DPM-Solver-3 from time `t_prev_list[-1]` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            model_prev_list: A list of pytorch tensor. The previous computed model values.
+            t_prev_list: A list of pytorch tensor. The previous times, each time has the shape (x.shape[0],)
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        ns = self.noise_schedule
+        dims = x.dim()
+        model_prev_2, model_prev_1, model_prev_0 = model_prev_list
+        t_prev_2, t_prev_1, t_prev_0 = t_prev_list
+        lambda_prev_2, lambda_prev_1, lambda_prev_0, lambda_t = ns.marginal_lambda(t_prev_2), ns.marginal_lambda(
+            t_prev_1), ns.marginal_lambda(t_prev_0), ns.marginal_lambda(t)
+        log_alpha_prev_0, log_alpha_t = ns.marginal_log_mean_coeff(t_prev_0), ns.marginal_log_mean_coeff(t)
+        sigma_prev_0, sigma_t = ns.marginal_std(t_prev_0), ns.marginal_std(t)
+        alpha_t = torch.exp(log_alpha_t)
+        h_1 = lambda_prev_1 - lambda_prev_2
+        h_0 = lambda_prev_0 - lambda_prev_1
+        h = lambda_t - lambda_prev_0
+        r0, r1 = h_0 / h, h_1 / h
+        D1_0 = expand_dims(1. / r0, dims) * (model_prev_0 - model_prev_1)
+        D1_1 = expand_dims(1. / r1, dims) * (model_prev_1 - model_prev_2)
+        D1 = D1_0 + expand_dims(r0 / (r0 + r1), dims) * (D1_0 - D1_1)
+        D2 = expand_dims(1. / (r0 + r1), dims) * (D1_0 - D1_1)
+        if self.predict_x0:
+            x_t = (
+                    expand_dims(sigma_t / sigma_prev_0, dims) * x
+                    - expand_dims(alpha_t * (torch.exp(-h) - 1.), dims) * model_prev_0
+                    + expand_dims(alpha_t * ((torch.exp(-h) - 1.) / h + 1.), dims) * D1
+                    - expand_dims(alpha_t * ((torch.exp(-h) - 1. + h) / h ** 2 - 0.5), dims) * D2
+            )
+        else:
+            x_t = (
+                    expand_dims(torch.exp(log_alpha_t - log_alpha_prev_0), dims) * x
+                    - expand_dims(sigma_t * (torch.exp(h) - 1.), dims) * model_prev_0
+                    - expand_dims(sigma_t * ((torch.exp(h) - 1.) / h - 1.), dims) * D1
+                    - expand_dims(sigma_t * ((torch.exp(h) - 1. - h) / h ** 2 - 0.5), dims) * D2
+            )
+        return x_t
+    def singlestep_dpm_solver_update(self, x, s, t, order, return_intermediate=False, solver_type='dpm_solver', r1=None,
+                                     r2=None):
+        """
+        Singlestep DPM-Solver with the order `order` from time `s` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            s: A pytorch tensor. The starting time, with the shape (x.shape[0],).
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            order: A `int`. The order of DPM-Solver. We only support order == 1 or 2 or 3.
+            return_intermediate: A `bool`. If true, also return the model value at time `s`, `s1` and `s2` (the intermediate times).
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+            r1: A `float`. The hyperparameter of the second-order or third-order solver.
+            r2: A `float`. The hyperparameter of the third-order solver.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if order == 1:
+            return self.dpm_solver_first_update(x, s, t, return_intermediate=return_intermediate)
+        elif order == 2:
+            return self.singlestep_dpm_solver_second_update(x, s, t, return_intermediate=return_intermediate,
+                                                            solver_type=solver_type, r1=r1)
+        elif order == 3:
+            return self.singlestep_dpm_solver_third_update(x, s, t, return_intermediate=return_intermediate,
+                                                           solver_type=solver_type, r1=r1, r2=r2)
+        else:
+            raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order))
+    def multistep_dpm_solver_update(self, x, model_prev_list, t_prev_list, t, order, solver_type='dpm_solver'):
+        """
+        Multistep DPM-Solver with the order `order` from time `t_prev_list[-1]` to time `t`.
+        Args:
+            x: A pytorch tensor. The initial value at time `s`.
+            model_prev_list: A list of pytorch tensor. The previous computed model values.
+            t_prev_list: A list of pytorch tensor. The previous times, each time has the shape (x.shape[0],)
+            t: A pytorch tensor. The ending time, with the shape (x.shape[0],).
+            order: A `int`. The order of DPM-Solver. We only support order == 1 or 2 or 3.
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_t: A pytorch tensor. The approximated solution at time `t`.
+        """
+        if order == 1:
+            return self.dpm_solver_first_update(x, t_prev_list[-1], t, model_s=model_prev_list[-1])
+        elif order == 2:
+            return self.multistep_dpm_solver_second_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type)
+        elif order == 3:
+            return self.multistep_dpm_solver_third_update(x, model_prev_list, t_prev_list, t, solver_type=solver_type)
+        else:
+            raise ValueError("Solver order must be 1 or 2 or 3, got {}".format(order))
+    def dpm_solver_adaptive(self, x, order, t_T, t_0, h_init=0.05, atol=0.0078, rtol=0.05, theta=0.9, t_err=1e-5,
+                            solver_type='dpm_solver'):
+        """
+        The adaptive step size solver based on singlestep DPM-Solver.
+        Args:
+            x: A pytorch tensor. The initial value at time `t_T`.
+            order: A `int`. The (higher) order of the solver. We only support order == 2 or 3.
+            t_T: A `float`. The starting time of the sampling (default is T).
+            t_0: A `float`. The ending time of the sampling (default is epsilon).
+            h_init: A `float`. The initial step size (for logSNR).
+            atol: A `float`. The absolute tolerance of the solver. For image data, the default setting is 0.0078, followed [1].
+            rtol: A `float`. The relative tolerance of the solver. The default setting is 0.05.
+            theta: A `float`. The safety hyperparameter for adapting the step size. The default setting is 0.9, followed [1].
+            t_err: A `float`. The tolerance for the time. We solve the diffusion ODE until the absolute error between the
+                current time and `t_0` is less than `t_err`. The default setting is 1e-5.
+            solver_type: either 'dpm_solver' or 'taylor'. The type for the high-order solvers.
+                The type slightly impacts the performance. We recommend to use 'dpm_solver' type.
+        Returns:
+            x_0: A pytorch tensor. The approximated solution at time `t_0`.
+        [1] A. Jolicoeur-Martineau, K. Li, R. Piché-Taillefer, T. Kachman, and I. Mitliagkas, "Gotta go fast when generating data with score-based models," arXiv preprint arXiv:2105.14080, 2021.
+        """
+        ns = self.noise_schedule
+        s = t_T * torch.ones((x.shape[0],)).to(x)
+        lambda_s = ns.marginal_lambda(s)
+        lambda_0 = ns.marginal_lambda(t_0 * torch.ones_like(s).to(x))
+        h = h_init * torch.ones_like(s).to(x)
+        x_prev = x
+        nfe = 0
+        if order == 2:
+            r1 = 0.5
+            lower_update = lambda x, s, t: self.dpm_solver_first_update(x, s, t, return_intermediate=True)
+            higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1,
+                                                                                               solver_type=solver_type,
+                                                                                               **kwargs)
+        elif order == 3:
+            r1, r2 = 1. / 3., 2. / 3.
+            lower_update = lambda x, s, t: self.singlestep_dpm_solver_second_update(x, s, t, r1=r1,
+                                                                                    return_intermediate=True,
+                                                                                    solver_type=solver_type)
+            higher_update = lambda x, s, t, **kwargs: self.singlestep_dpm_solver_third_update(x, s, t, r1=r1, r2=r2,
+                                                                                              solver_type=solver_type,
+                                                                                              **kwargs)
+        else:
+            raise ValueError("For adaptive step size solver, order must be 2 or 3, got {}".format(order))
+        while torch.abs((s - t_0)).mean() > t_err:
+            t = ns.inverse_lambda(lambda_s + h)
+            x_lower, lower_noise_kwargs = lower_update(x, s, t)
+            x_higher = higher_update(x, s, t, **lower_noise_kwargs)
+            delta = torch.max(torch.ones_like(x).to(x) * atol, rtol * torch.max(torch.abs(x_lower), torch.abs(x_prev)))
+            norm_fn = lambda v: torch.sqrt(torch.square(v.reshape((v.shape[0], -1))).mean(dim=-1, keepdim=True))
+            E = norm_fn((x_higher - x_lower) / delta).max()
+            if torch.all(E <= 1.):
+                x = x_higher
+                s = t
+                x_prev = x_lower
+                lambda_s = ns.marginal_lambda(s)
+            h = torch.min(theta * h * torch.float_power(E, -1. / order).float(), lambda_0 - lambda_s)
+            nfe += order
+        print('adaptive solver nfe', nfe)
+        return x
+    def sample(self, x, steps=20, t_start=None, t_end=None, order=3, skip_type='time_uniform',
+               method='singlestep', lower_order_final=True, denoise_to_zero=False, solver_type='dpm_solver',
+               atol=0.0078, rtol=0.05,
+               ):
+        """
+        Compute the sample at time `t_end` by DPM-Solver, given the initial `x` at time `t_start`.
+        =====================================================
+        We support the following algorithms for both noise prediction model and data prediction model:
+            - 'singlestep':
+                Singlestep DPM-Solver (i.e. "DPM-Solver-fast" in the paper), which combines different orders of singlestep DPM-Solver.
+                We combine all the singlestep solvers with order <= `order` to use up all the function evaluations (steps).
+                The total number of function evaluations (NFE) == `steps`.
+                Given a fixed NFE == `steps`, the sampling procedure is:
+                    - If `order` == 1:
+                        - Denote K = steps. We use K steps of DPM-Solver-1 (i.e. DDIM).
+                    - If `order` == 2:
+                        - Denote K = (steps // 2) + (steps % 2). We take K intermediate time steps for sampling.
+                        - If steps % 2 == 0, we use K steps of singlestep DPM-Solver-2.
+                        - If steps % 2 == 1, we use (K - 1) steps of singlestep DPM-Solver-2 and 1 step of DPM-Solver-1.
+                    - If `order` == 3:
+                        - Denote K = (steps // 3 + 1). We take K intermediate time steps for sampling.
+                        - If steps % 3 == 0, we use (K - 2) steps of singlestep DPM-Solver-3, and 1 step of singlestep DPM-Solver-2 and 1 step of DPM-Solver-1.
+                        - If steps % 3 == 1, we use (K - 1) steps of singlestep DPM-Solver-3 and 1 step of DPM-Solver-1.
+                        - If steps % 3 == 2, we use (K - 1) steps of singlestep DPM-Solver-3 and 1 step of singlestep DPM-Solver-2.
+            - 'multistep':
+                Multistep DPM-Solver with the order of `order`. The total number of function evaluations (NFE) == `steps`.
+                We initialize the first `order` values by lower order multistep solvers.
+                Given a fixed NFE == `steps`, the sampling procedure is:
+                    Denote K = steps.
+                    - If `order` == 1:
+                        - We use K steps of DPM-Solver-1 (i.e. DDIM).
+                    - If `order` == 2:
+                        - We firstly use 1 step of DPM-Solver-1, then use (K - 1) step of multistep DPM-Solver-2.
+                    - If `order` == 3:
+                        - We firstly use 1 step of DPM-Solver-1, then 1 step of multistep DPM-Solver-2, then (K - 2) step of multistep DPM-Solver-3.
+            - 'singlestep_fixed':
+                Fixed order singlestep DPM-Solver (i.e. DPM-Solver-1 or singlestep DPM-Solver-2 or singlestep DPM-Solver-3).
+                We use singlestep DPM-Solver-`order` for `order`=1 or 2 or 3, with total [`steps` // `order`] * `order` NFE.
+            - 'adaptive':
+                Adaptive step size DPM-Solver (i.e. "DPM-Solver-12" and "DPM-Solver-23" in the paper).
+                We ignore `steps` and use adaptive step size DPM-Solver with a higher order of `order`.
+                You can adjust the absolute tolerance `atol` and the relative tolerance `rtol` to balance the computatation costs
+                (NFE) and the sample quality.
+                    - If `order` == 2, we use DPM-Solver-12 which combines DPM-Solver-1 and singlestep DPM-Solver-2.
+                    - If `order` == 3, we use DPM-Solver-23 which combines singlestep DPM-Solver-2 and singlestep DPM-Solver-3.
+        =====================================================
+        Some advices for choosing the algorithm:
+            - For **unconditional sampling** or **guided sampling with small guidance scale** by DPMs:
+                Use singlestep DPM-Solver ("DPM-Solver-fast" in the paper) with `order = 3`.
+                e.g.
+                    >>> dpm_solver = DPM_Solver(model_fn, noise_schedule, predict_x0=False)
+                    >>> x_sample = dpm_solver.sample(x, steps=steps, t_start=t_start, t_end=t_end, order=3,
+                            skip_type='time_uniform', method='singlestep')
+            - For **guided sampling with large guidance scale** by DPMs:
+                Use multistep DPM-Solver with `predict_x0 = True` and `order = 2`.
+                e.g.
+                    >>> dpm_solver = DPM_Solver(model_fn, noise_schedule, predict_x0=True)
+                    >>> x_sample = dpm_solver.sample(x, steps=steps, t_start=t_start, t_end=t_end, order=2,
+                            skip_type='time_uniform', method='multistep')
+        We support three types of `skip_type`:
+            - 'logSNR': uniform logSNR for the time steps. **Recommended for low-resolutional images**
+            - 'time_uniform': uniform time for the time steps. **Recommended for high-resolutional images**.
+            - 'time_quadratic': quadratic time for the time steps.
+        =====================================================
+        Args:
+            x: A pytorch tensor. The initial value at time `t_start`
+                e.g. if `t_start` == T, then `x` is a sample from the standard normal distribution.
+            steps: A `int`. The total number of function evaluations (NFE).
+            t_start: A `float`. The starting time of the sampling.
+                If `T` is None, we use self.noise_schedule.T (default is 1.0).
+            t_end: A `float`. The ending time of the sampling.
+                If `t_end` is None, we use 1. / self.noise_schedule.total_N.
+                e.g. if total_N == 1000, we have `t_end` == 1e-3.
+                For discrete-time DPMs:
+                    - We recommend `t_end` == 1. / self.noise_schedule.total_N.
+                For continuous-time DPMs:
+                    - We recommend `t_end` == 1e-3 when `steps` <= 15; and `t_end` == 1e-4 when `steps` > 15.
+            order: A `int`. The order of DPM-Solver.
+            skip_type: A `str`. The type for the spacing of the time steps. 'time_uniform' or 'logSNR' or 'time_quadratic'.
+            method: A `str`. The method for sampling. 'singlestep' or 'multistep' or 'singlestep_fixed' or 'adaptive'.
+            denoise_to_zero: A `bool`. Whether to denoise to time 0 at the final step.
+                Default is `False`. If `denoise_to_zero` is `True`, the total NFE is (`steps` + 1).
+                This trick is firstly proposed by DDPM (https://arxiv.org/abs/2006.11239) and
+                score_sde (https://arxiv.org/abs/2011.13456). Such trick can improve the FID
+                for diffusion models sampling by diffusion SDEs for low-resolutional images
+                (such as CIFAR-10). However, we observed that such trick does not matter for
+                high-resolutional images. As it needs an additional NFE, we do not recommend
+                it for high-resolutional images.
+            lower_order_final: A `bool`. Whether to use lower order solvers at the final steps.
+                Only valid for `method=multistep` and `steps < 15`. We empirically find that
+                this trick is a key to stabilizing the sampling by DPM-Solver with very few steps
+                (especially for steps <= 10). So we recommend to set it to be `True`.
+            solver_type: A `str`. The taylor expansion type for the solver. `dpm_solver` or `taylor`. We recommend `dpm_solver`.
+            atol: A `float`. The absolute tolerance of the adaptive step size solver. Valid when `method` == 'adaptive'.
+            rtol: A `float`. The relative tolerance of the adaptive step size solver. Valid when `method` == 'adaptive'.
+        Returns:
+            x_end: A pytorch tensor. The approximated solution at time `t_end`.
+        """
+        t_0 = 1. / self.noise_schedule.total_N if t_end is None else t_end
+        t_T = self.noise_schedule.T if t_start is None else t_start
+        device = x.device
+        if method == 'adaptive':
+            with torch.no_grad():
+                x = self.dpm_solver_adaptive(x, order=order, t_T=t_T, t_0=t_0, atol=atol, rtol=rtol,
+                                             solver_type=solver_type)
+        elif method == 'multistep':
+            assert steps >= order
+            timesteps = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=steps, device=device)
+            assert timesteps.shape[0] - 1 == steps
+            with torch.no_grad():
+                vec_t = timesteps[0].expand((x.shape[0]))
+                model_prev_list = [self.model_fn(x, vec_t)]
+                t_prev_list = [vec_t]
+                # Init the first `order` values by lower order multistep DPM-Solver.
+                for init_order in tqdm(range(1, order), desc="DPM init order"):
+                    vec_t = timesteps[init_order].expand(x.shape[0])
+                    x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, init_order,
+                                                         solver_type=solver_type)
+                    model_prev_list.append(self.model_fn(x, vec_t))
+                    t_prev_list.append(vec_t)
+                # Compute the remaining values by `order`-th order multistep DPM-Solver.
+                for step in tqdm(range(order, steps + 1), desc="DPM multistep"):
+                    vec_t = timesteps[step].expand(x.shape[0])
+                    if lower_order_final and steps < 15:
+                        step_order = min(order, steps + 1 - step)
+                    else:
+                        step_order = order
+                    x = self.multistep_dpm_solver_update(x, model_prev_list, t_prev_list, vec_t, step_order,
+                                                         solver_type=solver_type)
+                    for i in range(order - 1):
+                        t_prev_list[i] = t_prev_list[i + 1]
+                        model_prev_list[i] = model_prev_list[i + 1]
+                    t_prev_list[-1] = vec_t
+                    # We do not need to evaluate the final model value.
+                    if step < steps:
+                        model_prev_list[-1] = self.model_fn(x, vec_t)
+        elif method in ['singlestep', 'singlestep_fixed']:
+            if method == 'singlestep':
+                timesteps_outer, orders = self.get_orders_and_timesteps_for_singlestep_solver(steps=steps, order=order,
+                                                                                              skip_type=skip_type,
+                                                                                              t_T=t_T, t_0=t_0,
+                                                                                              device=device)
+            elif method == 'singlestep_fixed':
+                K = steps // order
+                orders = [order, ] * K
+                timesteps_outer = self.get_time_steps(skip_type=skip_type, t_T=t_T, t_0=t_0, N=K, device=device)
+            for i, order in enumerate(orders):
+                t_T_inner, t_0_inner = timesteps_outer[i], timesteps_outer[i + 1]
+                timesteps_inner = self.get_time_steps(skip_type=skip_type, t_T=t_T_inner.item(), t_0=t_0_inner.item(),
+                                                      N=order, device=device)
+                lambda_inner = self.noise_schedule.marginal_lambda(timesteps_inner)
+                vec_s, vec_t = t_T_inner.tile(x.shape[0]), t_0_inner.tile(x.shape[0])
+                h = lambda_inner[-1] - lambda_inner[0]
+                r1 = None if order <= 1 else (lambda_inner[1] - lambda_inner[0]) / h
+                r2 = None if order <= 2 else (lambda_inner[2] - lambda_inner[0]) / h
+                x = self.singlestep_dpm_solver_update(x, vec_s, vec_t, order, solver_type=solver_type, r1=r1, r2=r2)
+        if denoise_to_zero:
+            x = self.denoise_to_zero_fn(x, torch.ones((x.shape[0],)).to(device) * t_0)
+        return x
+#############################################################
+# other utility functions
+#############################################################
+def interpolate_fn(x, xp, yp):
+    """
+    A piecewise linear function y = f(x), using xp and yp as keypoints.
+    We implement f(x) in a differentiable way (i.e. applicable for autograd).
+    The function f(x) is well-defined for all x-axis. (For x beyond the bounds of xp, we use the outmost points of xp to define the linear function.)
+    Args:
+        x: PyTorch tensor with shape [N, C], where N is the batch size, C is the number of channels (we use C = 1 for DPM-Solver).
+        xp: PyTorch tensor with shape [C, K], where K is the number of keypoints.
+        yp: PyTorch tensor with shape [C, K].
+    Returns:
+        The function values f(x), with shape [N, C].
+    """
+    N, K = x.shape[0], xp.shape[1]
+    all_x = torch.cat([x.unsqueeze(2), xp.unsqueeze(0).repeat((N, 1, 1))], dim=2)
+    sorted_all_x, x_indices = torch.sort(all_x, dim=2)
+    x_idx = torch.argmin(x_indices, dim=2)
+    cand_start_idx = x_idx - 1
+    start_idx = torch.where(
+        torch.eq(x_idx, 0),
+        torch.tensor(1, device=x.device),
+        torch.where(
+            torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,
+        ),
+    )
+    end_idx = torch.where(torch.eq(start_idx, cand_start_idx), start_idx + 2, start_idx + 1)
+    start_x = torch.gather(sorted_all_x, dim=2, index=start_idx.unsqueeze(2)).squeeze(2)
+    end_x = torch.gather(sorted_all_x, dim=2, index=end_idx.unsqueeze(2)).squeeze(2)
+    start_idx2 = torch.where(
+        torch.eq(x_idx, 0),
+        torch.tensor(0, device=x.device),
+        torch.where(
+            torch.eq(x_idx, K), torch.tensor(K - 2, device=x.device), cand_start_idx,
+        ),
+    )
+    y_positions_expanded = yp.unsqueeze(0).expand(N, -1, -1)
+    start_y = torch.gather(y_positions_expanded, dim=2, index=start_idx2.unsqueeze(2)).squeeze(2)
+    end_y = torch.gather(y_positions_expanded, dim=2, index=(start_idx2 + 1).unsqueeze(2)).squeeze(2)
+    cand = start_y + (x - start_x) * (end_y - start_y) / (end_x - start_x)
+    return cand
+def expand_dims(v, dims):
+    """
+    Expand the tensor `v` to the dim `dims`.
+    Args:
+        `v`: a PyTorch tensor with shape [N].
+        `dim`: a `int`.
+    Returns:
+        a PyTorch tensor with shape [N, 1, 1, ..., 1] and the total dimension is `dims`.
+    """
+    return v[(...,) + (None,) * (dims - 1)]

watermarker/LaWa/ldm/models/diffusion/dpm_solver/sampler.py ADDED Viewed

	@@ -0,0 +1,87 @@

+"""SAMPLING ONLY."""
+import torch
+from .dpm_solver import NoiseScheduleVP, model_wrapper, DPM_Solver
+MODEL_TYPES = {
+    "eps": "noise",
+    "v": "v"
+}
+class DPMSolverSampler(object):
+    def __init__(self, model, **kwargs):
+        super().__init__()
+        self.model = model
+        to_torch = lambda x: x.clone().detach().to(torch.float32).to(model.device)
+        self.register_buffer('alphas_cumprod', to_torch(model.alphas_cumprod))
+    def register_buffer(self, name, attr):
+        if type(attr) == torch.Tensor:
+            if attr.device != torch.device("cuda"):
+                attr = attr.to(torch.device("cuda"))
+        setattr(self, name, attr)
+    @torch.no_grad()
+    def sample(self,
+               S,
+               batch_size,
+               shape,
+               conditioning=None,
+               callback=None,
+               normals_sequence=None,
+               img_callback=None,
+               quantize_x0=False,
+               eta=0.,
+               mask=None,
+               x0=None,
+               temperature=1.,
+               noise_dropout=0.,
+               score_corrector=None,
+               corrector_kwargs=None,
+               verbose=True,
+               x_T=None,
+               log_every_t=100,
+               unconditional_guidance_scale=1.,
+               unconditional_conditioning=None,
+               # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+               **kwargs
+               ):
+        if conditioning is not None:
+            if isinstance(conditioning, dict):
+                cbs = conditioning[list(conditioning.keys())[0]].shape[0]
+                if cbs != batch_size:
+                    print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+            else:
+                if conditioning.shape[0] != batch_size:
+                    print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+        # sampling
+        C, H, W = shape
+        size = (batch_size, C, H, W)
+        print(f'Data shape for DPM-Solver sampling is {size}, sampling steps {S}')
+        device = self.model.betas.device
+        if x_T is None:
+            img = torch.randn(size, device=device)
+        else:
+            img = x_T
+        ns = NoiseScheduleVP('discrete', alphas_cumprod=self.alphas_cumprod)
+        model_fn = model_wrapper(
+            lambda x, t, c: self.model.apply_model(x, t, c),
+            ns,
+            model_type=MODEL_TYPES[self.model.parameterization],
+            guidance_type="classifier-free",
+            condition=conditioning,
+            unconditional_condition=unconditional_conditioning,
+            guidance_scale=unconditional_guidance_scale,
+        )
+        dpm_solver = DPM_Solver(model_fn, ns, predict_x0=True, thresholding=False)
+        x = dpm_solver.sample(img, steps=S, skip_type="time_uniform", method="multistep", order=2, lower_order_final=True)
+        return x.to(device), None

watermarker/LaWa/ldm/models/diffusion/plms.py ADDED Viewed

	@@ -0,0 +1,244 @@

+"""SAMPLING ONLY."""
+import torch
+import numpy as np
+from tqdm import tqdm
+from functools import partial
+from ldm.modules.diffusionmodules.util import make_ddim_sampling_parameters, make_ddim_timesteps, noise_like
+from ldm.models.diffusion.sampling_util import norm_thresholding
+class PLMSSampler(object):
+    def __init__(self, model, schedule="linear", **kwargs):
+        super().__init__()
+        self.model = model
+        self.ddpm_num_timesteps = model.num_timesteps
+        self.schedule = schedule
+    def register_buffer(self, name, attr):
+        if type(attr) == torch.Tensor:
+            if attr.device != torch.device("cuda"):
+                attr = attr.to(torch.device("cuda"))
+        setattr(self, name, attr)
+    def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddim_eta=0., verbose=True):
+        if ddim_eta != 0:
+            raise ValueError('ddim_eta must be 0 for PLMS')
+        self.ddim_timesteps = make_ddim_timesteps(ddim_discr_method=ddim_discretize, num_ddim_timesteps=ddim_num_steps,
+                                                  num_ddpm_timesteps=self.ddpm_num_timesteps,verbose=verbose)
+        alphas_cumprod = self.model.alphas_cumprod
+        assert alphas_cumprod.shape[0] == self.ddpm_num_timesteps, 'alphas have to be defined for each timestep'
+        to_torch = lambda x: x.clone().detach().to(torch.float32).to(self.model.device)
+        self.register_buffer('betas', to_torch(self.model.betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(self.model.alphas_cumprod_prev))
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod.cpu())))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu())))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod.cpu() - 1)))
+        # ddim sampling parameters
+        ddim_sigmas, ddim_alphas, ddim_alphas_prev = make_ddim_sampling_parameters(alphacums=alphas_cumprod.cpu(),
+                                                                                   ddim_timesteps=self.ddim_timesteps,
+                                                                                   eta=ddim_eta,verbose=verbose)
+        self.register_buffer('ddim_sigmas', ddim_sigmas)
+        self.register_buffer('ddim_alphas', ddim_alphas)
+        self.register_buffer('ddim_alphas_prev', ddim_alphas_prev)
+        self.register_buffer('ddim_sqrt_one_minus_alphas', np.sqrt(1. - ddim_alphas))
+        sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt(
+            (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) * (
+                        1 - self.alphas_cumprod / self.alphas_cumprod_prev))
+        self.register_buffer('ddim_sigmas_for_original_num_steps', sigmas_for_original_sampling_steps)
+    @torch.no_grad()
+    def sample(self,
+               S,
+               batch_size,
+               shape,
+               conditioning=None,
+               callback=None,
+               normals_sequence=None,
+               img_callback=None,
+               quantize_x0=False,
+               eta=0.,
+               mask=None,
+               x0=None,
+               temperature=1.,
+               noise_dropout=0.,
+               score_corrector=None,
+               corrector_kwargs=None,
+               verbose=True,
+               x_T=None,
+               log_every_t=100,
+               unconditional_guidance_scale=1.,
+               unconditional_conditioning=None,
+               # this has to come in the same format as the conditioning, # e.g. as encoded tokens, ...
+               dynamic_threshold=None,
+               **kwargs
+               ):
+        if conditioning is not None:
+            if isinstance(conditioning, dict):
+                cbs = conditioning[list(conditioning.keys())[0]].shape[0]
+                if cbs != batch_size:
+                    print(f"Warning: Got {cbs} conditionings but batch-size is {batch_size}")
+            else:
+                if conditioning.shape[0] != batch_size:
+                    print(f"Warning: Got {conditioning.shape[0]} conditionings but batch-size is {batch_size}")
+        self.make_schedule(ddim_num_steps=S, ddim_eta=eta, verbose=verbose)
+        # sampling
+        C, H, W = shape
+        size = (batch_size, C, H, W)
+        print(f'Data shape for PLMS sampling is {size}')
+        samples, intermediates = self.plms_sampling(conditioning, size,
+                                                    callback=callback,
+                                                    img_callback=img_callback,
+                                                    quantize_denoised=quantize_x0,
+                                                    mask=mask, x0=x0,
+                                                    ddim_use_original_steps=False,
+                                                    noise_dropout=noise_dropout,
+                                                    temperature=temperature,
+                                                    score_corrector=score_corrector,
+                                                    corrector_kwargs=corrector_kwargs,
+                                                    x_T=x_T,
+                                                    log_every_t=log_every_t,
+                                                    unconditional_guidance_scale=unconditional_guidance_scale,
+                                                    unconditional_conditioning=unconditional_conditioning,
+                                                    dynamic_threshold=dynamic_threshold,
+                                                    )
+        return samples, intermediates
+    @torch.no_grad()
+    def plms_sampling(self, cond, shape,
+                      x_T=None, ddim_use_original_steps=False,
+                      callback=None, timesteps=None, quantize_denoised=False,
+                      mask=None, x0=None, img_callback=None, log_every_t=100,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None,
+                      dynamic_threshold=None):
+        device = self.model.betas.device
+        b = shape[0]
+        if x_T is None:
+            img = torch.randn(shape, device=device)
+        else:
+            img = x_T
+        if timesteps is None:
+            timesteps = self.ddpm_num_timesteps if ddim_use_original_steps else self.ddim_timesteps
+        elif timesteps is not None and not ddim_use_original_steps:
+            subset_end = int(min(timesteps / self.ddim_timesteps.shape[0], 1) * self.ddim_timesteps.shape[0]) - 1
+            timesteps = self.ddim_timesteps[:subset_end]
+        intermediates = {'x_inter': [img], 'pred_x0': [img]}
+        time_range = list(reversed(range(0,timesteps))) if ddim_use_original_steps else np.flip(timesteps)
+        total_steps = timesteps if ddim_use_original_steps else timesteps.shape[0]
+        print(f"Running PLMS Sampling with {total_steps} timesteps")
+        iterator = tqdm(time_range, desc='PLMS Sampler', total=total_steps)
+        old_eps = []
+        for i, step in enumerate(iterator):
+            index = total_steps - i - 1
+            ts = torch.full((b,), step, device=device, dtype=torch.long)
+            ts_next = torch.full((b,), time_range[min(i + 1, len(time_range) - 1)], device=device, dtype=torch.long)
+            if mask is not None:
+                assert x0 is not None
+                img_orig = self.model.q_sample(x0, ts)  # TODO: deterministic forward pass?
+                img = img_orig * mask + (1. - mask) * img
+            outs = self.p_sample_plms(img, cond, ts, index=index, use_original_steps=ddim_use_original_steps,
+                                      quantize_denoised=quantize_denoised, temperature=temperature,
+                                      noise_dropout=noise_dropout, score_corrector=score_corrector,
+                                      corrector_kwargs=corrector_kwargs,
+                                      unconditional_guidance_scale=unconditional_guidance_scale,
+                                      unconditional_conditioning=unconditional_conditioning,
+                                      old_eps=old_eps, t_next=ts_next,
+                                      dynamic_threshold=dynamic_threshold)
+            img, pred_x0, e_t = outs
+            old_eps.append(e_t)
+            if len(old_eps) >= 4:
+                old_eps.pop(0)
+            if callback: callback(i)
+            if img_callback: img_callback(pred_x0, i)
+            if index % log_every_t == 0 or index == total_steps - 1:
+                intermediates['x_inter'].append(img)
+                intermediates['pred_x0'].append(pred_x0)
+        return img, intermediates
+    @torch.no_grad()
+    def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_original_steps=False, quantize_denoised=False,
+                      temperature=1., noise_dropout=0., score_corrector=None, corrector_kwargs=None,
+                      unconditional_guidance_scale=1., unconditional_conditioning=None, old_eps=None, t_next=None,
+                      dynamic_threshold=None):
+        b, *_, device = *x.shape, x.device
+        def get_model_output(x, t):
+            if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
+                e_t = self.model.apply_model(x, t, c)
+            else:
+                x_in = torch.cat([x] * 2)
+                t_in = torch.cat([t] * 2)
+                c_in = torch.cat([unconditional_conditioning, c])
+                e_t_uncond, e_t = self.model.apply_model(x_in, t_in, c_in).chunk(2)
+                e_t = e_t_uncond + unconditional_guidance_scale * (e_t - e_t_uncond)
+            if score_corrector is not None:
+                assert self.model.parameterization == "eps"
+                e_t = score_corrector.modify_score(self.model, e_t, x, t, c, **corrector_kwargs)
+            return e_t
+        alphas = self.model.alphas_cumprod if use_original_steps else self.ddim_alphas
+        alphas_prev = self.model.alphas_cumprod_prev if use_original_steps else self.ddim_alphas_prev
+        sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
+        sigmas = self.model.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
+        def get_x_prev_and_pred_x0(e_t, index):
+            # select parameters corresponding to the currently considered timestep
+            a_t = torch.full((b, 1, 1, 1), alphas[index], device=device)
+            a_prev = torch.full((b, 1, 1, 1), alphas_prev[index], device=device)
+            sigma_t = torch.full((b, 1, 1, 1), sigmas[index], device=device)
+            sqrt_one_minus_at = torch.full((b, 1, 1, 1), sqrt_one_minus_alphas[index],device=device)
+            # current prediction for x_0
+            pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
+            if quantize_denoised:
+                pred_x0, _, *_ = self.model.first_stage_model.quantize(pred_x0)
+            if dynamic_threshold is not None:
+                pred_x0 = norm_thresholding(pred_x0, dynamic_threshold)
+            # direction pointing to x_t
+            dir_xt = (1. - a_prev - sigma_t**2).sqrt() * e_t
+            noise = sigma_t * noise_like(x.shape, device, repeat_noise) * temperature
+            if noise_dropout > 0.:
+                noise = torch.nn.functional.dropout(noise, p=noise_dropout)
+            x_prev = a_prev.sqrt() * pred_x0 + dir_xt + noise
+            return x_prev, pred_x0
+        e_t = get_model_output(x, t)
+        if len(old_eps) == 0:
+            # Pseudo Improved Euler (2nd order)
+            x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t, index)
+            e_t_next = get_model_output(x_prev, t_next)
+            e_t_prime = (e_t + e_t_next) / 2
+        elif len(old_eps) == 1:
+            # 2nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (3 * e_t - old_eps[-1]) / 2
+        elif len(old_eps) == 2:
+            # 3nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (23 * e_t - 16 * old_eps[-1] + 5 * old_eps[-2]) / 12
+        elif len(old_eps) >= 3:
+            # 4nd order Pseudo Linear Multistep (Adams-Bashforth)
+            e_t_prime = (55 * e_t - 59 * old_eps[-1] + 37 * old_eps[-2] - 9 * old_eps[-3]) / 24
+        x_prev, pred_x0 = get_x_prev_and_pred_x0(e_t_prime, index)
+        return x_prev, pred_x0, e_t

watermarker/LaWa/ldm/models/diffusion/sampling_util.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import torch
+import numpy as np
+def append_dims(x, target_dims):
+    """Appends dimensions to the end of a tensor until it has target_dims dimensions.
+    From https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/utils.py"""
+    dims_to_append = target_dims - x.ndim
+    if dims_to_append < 0:
+        raise ValueError(f'input has {x.ndim} dims but target_dims is {target_dims}, which is less')
+    return x[(...,) + (None,) * dims_to_append]
+def norm_thresholding(x0, value):
+    s = append_dims(x0.pow(2).flatten(1).mean(1).sqrt().clamp(min=value), x0.ndim)
+    return x0 * (value / s)
+def spatial_norm_thresholding(x0, value):
+    # b c h w
+    s = x0.pow(2).mean(1, keepdim=True).sqrt().clamp(min=value)
+    return x0 * (value / s)

watermarker/LaWa/ldm/modules/__pycache__/attention.cpython-38.pyc ADDED Viewed

Binary file (10.5 kB). View file

watermarker/LaWa/ldm/modules/__pycache__/ema.cpython-38.pyc ADDED Viewed

Binary file (3.2 kB). View file

watermarker/LaWa/ldm/modules/__pycache__/x_transformer.cpython-38.pyc ADDED Viewed

Binary file (18.3 kB). View file