add cond stage to trainable parameters

Browse files

Files changed (5) hide show

ControlNet/ControlNet.ipynb +0 -0
ControlNet/cldm/cldm.py +32 -3
ControlNet/environment.yaml +3 -1
ControlNet/ldm/models/diffusion/ddpm.py +0 -2
ControlNet/ldm/modules/encoders/modules.py +0 -2

ControlNet/ControlNet.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

ControlNet/cldm/cldm.py CHANGED Viewed

@@ -2,6 +2,8 @@ import einops
 import torch
 import torch as th
 import torch.nn as nn
 from torchvision.transforms import Resize
 from ldm.modules.diffusionmodules.util import (
@@ -305,12 +307,15 @@ class ControlNet(nn.Module):
 class ControlInpaintLDM(LatentDiffusion):
-    def __init__(self, control_stage_config, control_key, only_mid_control, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.control_model = instantiate_from_config(control_stage_config)
         self.control_key = control_key
         self.only_mid_control = only_mid_control
         self.control_scales = [1.0] * 13
     @torch.no_grad()
     def get_input(self, batch, k, bs=None, *args, **kwargs):
@@ -380,6 +385,7 @@ class ControlInpaintLDM(LatentDiffusion):
         if self.cond_stage_trainable:
             c = self.get_learned_conditioning(c)
         if sample:
             # get denoise row
@@ -412,15 +418,38 @@ class ControlInpaintLDM(LatentDiffusion):
         shape = (self.channels, h // 8, w // 8)
         samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs)
         return samples, intermediates
     def configure_optimizers(self):
         lr = self.learning_rate
         params = list(self.control_model.parameters())
         if not self.sd_locked:
             params += list(self.model.diffusion_model.output_blocks.parameters())
             params += list(self.model.diffusion_model.out.parameters())
-        opt = torch.optim.AdamW(params, lr=lr)
         return opt
     def low_vram_shift(self, is_diffusing):
         if is_diffusing:

 import torch
 import torch as th
 import torch.nn as nn
+import random
+import bitsandbytes as bnb
 from torchvision.transforms import Resize
 from ldm.modules.diffusionmodules.util import (
 class ControlInpaintLDM(LatentDiffusion):
+    def __init__(self, control_stage_config, control_key, u_cond_percent, only_mid_control, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.control_model = instantiate_from_config(control_stage_config)
         self.control_key = control_key
         self.only_mid_control = only_mid_control
         self.control_scales = [1.0] * 13
+        self.learnable_vector = nn.Parameter(torch.randn((1,1,768)), requires_grad=True)
+        self.proj_out=nn.Linear(1024, 768)
+        self.u_cond_percent=u_cond_percent
     @torch.no_grad()
     def get_input(self, batch, k, bs=None, *args, **kwargs):
         if self.cond_stage_trainable:
             c = self.get_learned_conditioning(c)
+            c = self.proj_out(c)
         if sample:
             # get denoise row
         shape = (self.channels, h // 8, w // 8)
         samples, intermediates = ddim_sampler.sample(ddim_steps, batch_size, shape, cond, verbose=False, **kwargs)
         return samples, intermediates
     def configure_optimizers(self):
         lr = self.learning_rate
         params = list(self.control_model.parameters())
+        if self.cond_stage_trainable:
+            print(f"{self.__class__.__name__}: Also optimizing conditioner params!")
+            params = params + list(self.cond_stage_model.final_ln.parameters())+list(self.cond_stage_model.mapper.parameters())+list(self.proj_out.parameters())
+        self.params = params
+        self.params_with_white=params + list(self.learnable_vector)
         if not self.sd_locked:
             params += list(self.model.diffusion_model.output_blocks.parameters())
             params += list(self.model.diffusion_model.out.parameters())
+        #opt = torch.optim.AdamW(params, lr=lr)
+        opt = bnb.optim.Adam8bit(params,lr=lr)
+        self.opt=opt
         return opt
+    def forward(self, x, c, *args, **kwargs):
+        self.opt.params=self.params
+        t = torch.randint(0, self.num_timesteps, (x.shape[0],), device=self.device).long()
+        if self.model.conditioning_key is not None:
+            assert c is not None
+            if self.cond_stage_trainable:
+                c['c_crossattn'][0] = self.get_learned_conditioning(c['c_crossattn'][0])
+                c['c_crossattn'][0] = self.proj_out(c['c_crossattn'][0])
+        u_cond_prop=random.uniform(0, 1)
+        if u_cond_prop<self.u_cond_percent:
+            self.opt.params=self.params_with_white
+            c['c_crossattn'][0] = self.learnable_vector.repeat(x.shape[0],1,1)
+            return self.p_losses(x, c, t, *args, **kwargs)
+        return self.p_losses(x, c, t, *args, **kwargs)
     def low_vram_shift(self, is_diffusing):
         if is_diffusing:

ControlNet/environment.yaml CHANGED Viewed

@@ -1,12 +1,13 @@
 name: control
 channels:
   - pytorch
   - defaults
 dependencies:
   - python=3.8.5
   - pip=20.3
   - cudatoolkit=11.3
-  - pytorch=1.12.1
   - torchvision=0.13.1
   - numpy=1.23.1
   - pip:
@@ -36,4 +37,5 @@ dependencies:
       - ipdb==0.13.11
       - ipython==8.11.0
       - ipykernel==6.21.2

 name: control
 channels:
   - pytorch
+  - anaconda
   - defaults
 dependencies:
   - python=3.8.5
   - pip=20.3
   - cudatoolkit=11.3
+  - pytorch=1.13.1
   - torchvision=0.13.1
   - numpy=1.23.1
   - pip:
       - ipdb==0.13.11
       - ipython==8.11.0
       - ipykernel==6.21.2
+      - bitsandbytes==0.37.1

ControlNet/ldm/models/diffusion/ddpm.py CHANGED Viewed

@@ -552,8 +552,6 @@ class LatentDiffusion(DDPM):
         reset_num_ema_updates = kwargs.pop("reset_num_ema_updates", False)
         ignore_keys = kwargs.pop("ignore_keys", [])
         super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
-        self.learnable_vector = nn.Parameter(torch.randn((1,1,768)), requires_grad=True)
-        self.u_cond_percent=u_cond_percent
         self.concat_mode = concat_mode
         self.cond_stage_trainable = cond_stage_trainable
         self.cond_stage_key = cond_stage_key

         reset_num_ema_updates = kwargs.pop("reset_num_ema_updates", False)
         ignore_keys = kwargs.pop("ignore_keys", [])
         super().__init__(conditioning_key=conditioning_key, *args, **kwargs)
         self.concat_mode = concat_mode
         self.cond_stage_trainable = cond_stage_trainable
         self.cond_stage_key = cond_stage_key

ControlNet/ldm/modules/encoders/modules.py CHANGED Viewed

@@ -137,7 +137,6 @@ class FrozenCLIPImageEmbedder(AbstractEncoder):
         super().__init__()
         self.transformer = CLIPVisionModel.from_pretrained(version)
         self.final_ln = LayerNorm(1024)
-        self.proj_out=nn.Linear(1024, 768)
         self.mapper = Transformer(
                 1,
                 1024,
@@ -162,7 +161,6 @@ class FrozenCLIPImageEmbedder(AbstractEncoder):
         z = z.unsqueeze(1)
         z = self.mapper(z)
         z = self.final_ln(z)
-        z = self.proj_out(z)
         return z
     def encode(self, image):

         super().__init__()
         self.transformer = CLIPVisionModel.from_pretrained(version)
         self.final_ln = LayerNorm(1024)
         self.mapper = Transformer(
                 1,
                 1024,
         z = z.unsqueeze(1)
         z = self.mapper(z)
         z = self.final_ln(z)
         return z
     def encode(self, image):