YashNagraj75
/

ControlNet

Model card Files Files and versions

xet

Community

YashNagraj75 commited on Apr 6, 2025

Commit

d62b4c3

1 Parent(s): 76a0a2e

Add ControlNet and Scheduler

Browse files

Files changed (2) hide show

model_blocks/controlnet.py +187 -0
scheduler/linear_scheduler.py +91 -0

model_blocks/controlnet.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import enum
+import logging
+import os
+from re import UNICODE
+import torch
+import torch.nn as nn
+from unet_base import UNet, get_time_embedding
+logger = logging.getLogger(__name__)
+def make_zero_module(module):
+    for p in module.parameters():
+        p.detach().zero_()
+    return module
+class ControlNet(nn.Module):
+    r"""
+    ControlNet for trained DDPM
+    """
+    def __init__(
+        self, device, model_config, trained_ckpt_path=None, model_locked=True
+    ) -> None:
+        super().__init__()
+        # Trained DDPM
+        self.model = UNet(model_config)
+        self.model_locked = model_locked
+        if trained_ckpt_path is not None:
+            print("Loading Checkpoint")
+            self.model = torch.load(trained_ckpt_path).to(device)
+        # False the upblocks (Decoder blocks) from the DDPM and uses only the encoder
+        self.control_copy = UNet(model_config, use_up=False)
+        if trained_ckpt_path is not None:
+            self.control_copy.load_state_dict(self.model.state_dict(), strict=False)
+        # Hint Block for ControlNet
+        # Stack of Conv Activation and Zero Convolution at the end
+        self.hint_block = nn.Sequential(
+            nn.Conv2d(model_config["hint_channels"], 64, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(64, 128, kernel_size=3, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(128, self.model.down_channels[0], kernel_size=3, padding=1),
+            nn.SiLU(),
+            make_zero_module(
+                nn.Conv2d(
+                    self.model.down_channels[0],
+                    self.model.down_channels[0],
+                    kernel_size=1,
+                    padding=0,
+                )
+            ),
+        )
+        self.control_copy_down_blocks = nn.ModuleList(
+            [
+                make_zero_module(
+                    nn.Conv2d(
+                        self.model.down_channels[i],
+                        self.model.down_channels[i],
+                        kernel_size=1,
+                        padding=0,
+                    )
+                )
+                for i in range(len(self.model.down_channels) - 1)
+            ]
+        )
+        self.control_copy_mid_blocks = nn.ModuleList(
+            [
+                make_zero_module(
+                    nn.Conv2d(
+                        self.model.mid_channels[i],
+                        self.model.mid_channels[i],
+                        kernel_size=1,
+                        padding=0,
+                    )
+                )
+                for i in range(1, len(self.model.mid_channels) - 1)
+            ]
+        )
+    def get_params(self):
+        # Get all the control_net params
+        params = list(self.control_copy.parameters())
+        params += list(self.hint_block.parameters())
+        params += list(self.control_copy_down_blocks.parameters())
+        params += list(self.control_copy_mid_blocks.parameters())
+        return params
+    def forward(self, x, t, hint):
+        time_embedding = get_time_embedding(
+            torch.as_tensor(t).long(), self.model.t_emb_dim
+        )
+        time_embedding = self.model.t_proj(time_embedding)
+        logger.debug(f"Got Time embeddings for Original Copy : {time_embedding.shape}")
+        model_down_outs = []
+        with torch.no_grad():
+            model_out = self.model.conv_in(x)
+            for idx, down in enumerate(self.model.downs):
+                model_down_outs.append(model_in)
+                model_out = down(model_out, time_embedding)
+                logger.debug(
+                    f"Getting output of Down Layer {idx} from the original copy : {model_out.shape}"
+                )
+        logger.debug("Passing into ControlNet")
+        controlnet_time_embedding = get_time_embedding(
+            torch.as_tensor(t).long(), self.control_copy.t_emb_dim
+        )
+        controlnet_time_embedding = self.control_copy.t_proj(controlnet_time_embedding)
+        logger.debug(
+            f"Got Time embedding for ControlNet : {controlnet_time_embedding.shape}"
+        )
+        # Hint layer output here
+        controlnet_hint_output = self.hint_block(hint)
+        logger.debug(
+            f"Getting output of the Hint Block into the ControlNet : {controlnet_hint_output.shape}"
+        )
+        controlnet_out = self.control_copy.conv_in(x)
+        logger.debug(
+            f"Getting output of the Input Conv of ControlNet: {controlnet_out.shape}"
+        )
+        controlnet_out += controlnet_hint_output
+        logger.debug(f"Added Hint to the Conv Input: {controlnet_out.shape}")
+        controlnet_down_outs = []
+        # Get all the outputs of the controlnet down blocks
+        for idx, down in enumerate(self.control_copy.downs):
+            down_out = self.control_copy_down_blocks[idx](controlnet_out)
+            controlnet_down_outs.append(down_out)
+            logger.debug(
+                f"Got output of the {idx} Down Block of the ControlNet: {down_out.shape}"
+            )
+        # Now get the midblocks and then give to original copy
+        for idx in range(len(self.control_copy.mids)):
+            controlnet_out = self.control_copy.mids[idx](
+                controlnet_out, controlnet_time_embedding
+            )
+            logger.debug(
+                f"Got the output of the mid block {idx} in controlnet : {controlnet_out.shape}"
+            )
+            model_out = self.model.mids[idx](model_out, time_embedding)
+            logger.debug(
+                f"Got the output of Mid Block {idx} from original model : {model_out.shape}"
+            )
+            model_out += self.control_copy_mid_blocks[idx](controlnet_out)
+            logger.debug(
+                f"Concatinating the ControlNet Mid Block {idx} output :{model_out.shape} to original copy"
+            )
+        # Call the upblocks now
+        for idx, up in enumerate(self.model.ups):
+            model_down_out = model_down_outs.pop()
+            logger.debug(
+                f"Got the output from the down blocks from original model : {model_down_out.shape}"
+            )
+            controlnet_down_out = controlnet_down_outs.pop()
+            logger.debug(
+                f"Got the output from the down blocks from controlnet copy : {controlnet_down_out.shape}"
+            )
+            model_out = up(
+                model_out, controlnet_down_out + model_down_out, time_embedding
+            )
+        model_out = self.model.norm_out(model_out)
+        model_out = nn.SiLU()(model_out)
+        model_out = self.model.conv_out(model_out)
+        return model_out

scheduler/linear_scheduler.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import torch
+class LinearNoiseScheduler:
+    r"""
+    Class for the linear noise scheduler that is used in DDPM.
+    """
+    def __init__(self, num_timesteps, beta_start, beta_end, ldm_scheduler=False):
+        self.num_timesteps = num_timesteps
+        self.beta_start = beta_start
+        self.beta_end = beta_end
+        if ldm_scheduler:
+            # Mimicking how compvis repo creates schedule
+            self.betas = (
+                torch.linspace(beta_start**0.5, beta_end**0.5, num_timesteps) ** 2
+            )
+        else:
+            self.betas = torch.linspace(beta_start, beta_end, num_timesteps)
+        self.alphas = 1.0 - self.betas
+        self.alpha_cum_prod = torch.cumprod(self.alphas, dim=0)
+        self.sqrt_alpha_cum_prod = torch.sqrt(self.alpha_cum_prod)
+        self.sqrt_one_minus_alpha_cum_prod = torch.sqrt(1 - self.alpha_cum_prod)
+    def add_noise(self, original, noise, t):
+        r"""
+        Forward method for diffusion
+        :param original: Image on which noise is to be applied
+        :param noise: Random Noise Tensor (from normal dist)
+        :param t: timestep of the forward process of shape -> (B,)
+        :return:
+        """
+        original_shape = original.shape
+        batch_size = original_shape[0]
+        sqrt_alpha_cum_prod = self.sqrt_alpha_cum_prod.to(original.device)[t].reshape(
+            batch_size
+        )
+        sqrt_one_minus_alpha_cum_prod = self.sqrt_one_minus_alpha_cum_prod.to(
+            original.device
+        )[t].reshape(batch_size)
+        # Reshape till (B,) becomes (B,1,1,1) if image is (B,C,H,W)
+        for _ in range(len(original_shape) - 1):
+            sqrt_alpha_cum_prod = sqrt_alpha_cum_prod.unsqueeze(-1)
+        for _ in range(len(original_shape) - 1):
+            sqrt_one_minus_alpha_cum_prod = sqrt_one_minus_alpha_cum_prod.unsqueeze(-1)
+        # Apply and Return Forward process equation
+        return (
+            sqrt_alpha_cum_prod.to(original.device) * original
+            + sqrt_one_minus_alpha_cum_prod.to(original.device) * noise
+        )
+    def sample_prev_timestep(self, xt, noise_pred, t):
+        r"""
+            Use the noise prediction by model to get
+            xt-1 using xt and the noise predicted
+        :param xt: current timestep sample
+        :param noise_pred: model noise prediction
+        :param t: current timestep we are at
+        :return:
+        """
+        x0 = (
+            xt - (self.sqrt_one_minus_alpha_cum_prod.to(xt.device)[t] * noise_pred)
+        ) / torch.sqrt(self.alpha_cum_prod.to(xt.device)[t])
+        x0 = torch.clamp(x0, -1.0, 1.0)
+        mean = (
+            xt
+            - ((self.betas.to(xt.device)[t]) * noise_pred)
+            / (self.sqrt_one_minus_alpha_cum_prod.to(xt.device)[t])
+        )
+        mean = mean / torch.sqrt(self.alphas.to(xt.device)[t])
+        if t == 0:
+            return mean, x0
+        else:
+            variance = (1 - self.alpha_cum_prod.to(xt.device)[t - 1]) / (
+                1.0 - self.alpha_cum_prod.to(xt.device)[t]
+            )
+            variance = variance * self.betas.to(xt.device)[t]
+            sigma = variance**0.5
+            z = torch.randn(xt.shape).to(xt.device)
+            # OR
+            # variance = self.betas[t]
+            # sigma = variance ** 0.5
+            # z = torch.randn(xt.shape).to(xt.device)
+            return mean + sigma * z, x0